• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2003, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  unorm_it.h
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2003jan21
14 *   created by: Markus W. Scherer
15 */
16 
17 #ifndef __UNORM_IT_H__
18 #define __UNORM_IT_H__
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
23 
24 #include "unicode/uiter.h"
25 #include "unicode/unorm.h"
26 
27 /**
28  * Normalizing UCharIterator wrapper.
29  * This internal API basically duplicates the functionality of the C++ Normalizer
30  * but
31  * - it actually implements a character iterator (UCharIterator)
32  *   with few restrictions (see unorm_setIter())
33  * - it supports UCharIterator getState()/setState()
34  * - it uses lower-level APIs and buffers more text and states,
35  *   hopefully resulting in higher performance
36  *
37  * Usage example:
38  * \code
39  * function(UCharIterator *srcIter) {
40  *     UNormIterator *uni;
41  *     UCharIterator *iter;
42  *     UErrorCode errorCode;
43  *
44  *     errorCode=U_ZERO_ERROR;
45  *     uni=unorm_openIter(&errorCode);
46  *     if(U_FAILURE(errorCode)) {
47  *         // report error
48  *         return;
49  *     }
50  *
51  *     iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode);
52  *     if(U_FAILURE(errorCode)) {
53  *         // report error
54  *     } else {
55  *         // use iter to iterate over the canonically ordered
56  *         // version of srcIter's text
57  *         uint32_t state;
58  *
59  *         ...
60  *
61  *         state=uiter_getState(iter);
62  *         if(state!=UITER_NO_STATE) {
63  *             // use valid state, store it, use iter some more
64  *             ...
65  *
66  *             // later restore iter to the saved state:
67  *             uiter_setState(iter, state, &errorCode);
68  *
69  *             ...
70  *         }
71  *
72  *         ...
73  *     }
74  *     unorm_closeIter(uni);
75  * }
76  * \endcode
77  *
78  * See also the ICU test suites.
79  *
80  * @internal
81  */
82 struct UNormIterator;
83 typedef struct UNormIterator UNormIterator;
84 
85 /**
86  * Size of a stack buffer to hold a UNormIterator, see the stackMem parameter
87  * of unorm_openIter().
88  *
89  * @internal
90  */
91 #define UNORM_ITER_SIZE 1024
92 
93 /**
94  * Open a normalizing iterator. Must be closed later.
95  * Use unorm_setIter().
96  *
97  * @param stackMem Pointer to preallocated (stack-allocated) buffer to hold
98  *                 the UNormIterator if possible; can be NULL.
99  * @param stackMemSize Number of bytes at stackMem; can be 0,
100  *                     or should be >= UNORM_ITER_SIZE for a non-NULL stackMem.
101  * @param pErrorCode ICU error code
102  * @return an allocated and pre-initialized UNormIterator
103  * @internal
104  */
105 U_CAPI UNormIterator * U_EXPORT2
106 unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode);
107 
108 /**
109  * Close a normalizing iterator.
110  *
111  * @param uni UNormIterator from unorm_openIter()
112  * @internal
113  */
114 U_CAPI void U_EXPORT2
115 unorm_closeIter(UNormIterator *uni);
116 
117 /**
118  * Set a UCharIterator and a normalization mode for the normalizing iterator
119  * to wrap. The normalizing iterator will read from the character iterator,
120  * normalize the text, and in turn deliver it with its own wrapper UCharIterator
121  * interface which it returns.
122  *
123  * The source iterator remains at its current position through the unorm_setIter()
124  * call but will be used and moved as soon as the
125  * the returned normalizing iterator is.
126  *
127  * The returned interface pointer is valid for as long as the normalizing iterator
128  * is open and until another unorm_setIter() call is made on it.
129  *
130  * The normalizing iterator's UCharIterator interface has the following properties:
131  * - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX
132  * - getState() will return UITER_NO_STATE for unknown states for positions
133  *              that are not at normalization boundaries
134  *
135  * @param uni UNormIterator from unorm_openIter()
136  * @param iter The source text UCharIterator to be wrapped. It is aliases into the normalizing iterator.
137  *             Must support getState() and setState().
138  * @param mode The normalization mode.
139  * @param pErrorCode ICU error code
140  * @return an alias to the normalizing iterator's UCharIterator interface
141  * @internal
142  */
143 U_CAPI UCharIterator * U_EXPORT2
144 unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode);
145 
146 #endif /* uconfig.h switches */
147 
148 #endif
149