• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  *   Copyright (C) 1997-2005, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  ******************************************************************************
6  */
7 
8 /**
9 * File coleitr.h
10 *
11 *
12 *
13 * Created by: Helena Shih
14 *
15 * Modification History:
16 *
17 *  Date       Name        Description
18 *
19 *  8/18/97    helena      Added internal API documentation.
20 * 08/03/98    erm         Synched with 1.2 version CollationElementIterator.java
21 * 12/10/99    aliu        Ported Thai collation support from Java.
22 * 01/25/01    swquek      Modified into a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01    swquek      Removed CollationElementsIterator() since it is
24 *                         private constructor and no calls are made to it
25 */
26 
27 #ifndef COLEITR_H
28 #define COLEITR_H
29 
30 #include "unicode/utypes.h"
31 
32 /**
33  * \file
34  * \brief C++ API: Collation Element Iterator.
35  */
36 
37 #if !UCONFIG_NO_COLLATION
38 
39 #include "unicode/uobject.h"
40 #include "unicode/tblcoll.h"
41 #include "unicode/ucoleitr.h"
42 
43 /**
44  * The UCollationElements struct.
45  * For usage in C programs.
46  * @stable ICU 2.0
47  */
48 typedef struct UCollationElements UCollationElements;
49 
50 U_NAMESPACE_BEGIN
51 
52 /**
53 * The CollationElementIterator class is used as an iterator to walk through
54 * each character of an international string. Use the iterator to return the
55 * ordering priority of the positioned character. The ordering priority of a
56 * character, which we refer to as a key, defines how a character is collated in
57 * the given collation object.
58 * For example, consider the following in Spanish:
59 * <pre>
60 *        "ca" -> the first key is key('c') and second key is key('a').
61 *        "cha" -> the first key is key('ch') and second key is key('a').</pre>
62 * And in German,
63 * <pre> \htmlonly       "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
64 *        the third key is key('b'). \endhtmlonly </pre>
65 * The key of a character, is an integer composed of primary order(short),
66 * secondary order(char), and tertiary order(char). Java strictly defines the
67 * size and signedness of its primitive data types. Therefore, the static
68 * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return
69 * int32_t to ensure the correctness of the key value.
70 * <p>Example of the iterator usage: (without error checking)
71 * <pre>
72 * \code
73 *   void CollationElementIterator_Example()
74 *   {
75 *       UnicodeString str = "This is a test";
76 *       UErrorCode success = U_ZERO_ERROR;
77 *       RuleBasedCollator* rbc =
78 *           (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
79 *       CollationElementIterator* c =
80 *           rbc->createCollationElementIterator( str );
81 *       int32_t order = c->next(success);
82 *       c->reset();
83 *       order = c->previous(success);
84 *       delete c;
85 *       delete rbc;
86 *   }
87 * \endcode
88 * </pre>
89 * <p>
90 * CollationElementIterator::next returns the collation order of the next
91 * character based on the comparison level of the collator.
92 * CollationElementIterator::previous returns the collation order of the
93 * previous character based on the comparison level of the collator.
94 * The Collation Element Iterator moves only in one direction between calls to
95 * CollationElementIterator::reset. That is, CollationElementIterator::next()
96 * and CollationElementIterator::previous can not be inter-used. Whenever
97 * CollationElementIterator::previous is to be called after
98 * CollationElementIterator::next() or vice versa,
99 * CollationElementIterator::reset has to be called first to reset the status,
100 * shifting pointers to either the end or the start of the string. Hence at the
101 * next call of CollationElementIterator::previous or
102 * CollationElementIterator::next(), the first or last collation order will be
103 * returned.
104 * If a change of direction is done without a CollationElementIterator::reset(),
105 * the result is undefined.
106 * The result of a forward iterate (CollationElementIterator::next) and
107 * reversed result of the backward iterate (CollationElementIterator::previous)
108 * on the same string are equivalent, if collation orders with the value
109 * UCOL_IGNORABLE are ignored.
110 * Character based on the comparison level of the collator.  A collation order
111 * consists of primary order, secondary order and tertiary order.  The data
112 * type of the collation order is <strong>t_int32</strong>.
113 *
114 * Note, CollationElementIterator should not be subclassed.
115 * @see     Collator
116 * @see     RuleBasedCollator
117 * @version 1.8 Jan 16 2001
118 */
119 class U_I18N_API CollationElementIterator : public UObject {
120 public:
121 
122     // CollationElementIterator public data member ------------------------------
123 
124     enum {
125         /**
126          * NULLORDER indicates that an error has occured while processing
127          * @stable ICU 2.0
128          */
129         NULLORDER = (int32_t)0xffffffff
130     };
131 
132     // CollationElementIterator public constructor/destructor -------------------
133 
134     /**
135     * Copy constructor.
136     *
137     * @param other    the object to be copied from
138     * @stable ICU 2.0
139     */
140     CollationElementIterator(const CollationElementIterator& other);
141 
142     /**
143     * Destructor
144     * @stable ICU 2.0
145     */
146     virtual ~CollationElementIterator();
147 
148     // CollationElementIterator public methods ----------------------------------
149 
150     /**
151     * Returns true if "other" is the same as "this"
152     *
153     * @param other    the object to be compared
154     * @return         true if "other" is the same as "this"
155     * @stable ICU 2.0
156     */
157     UBool operator==(const CollationElementIterator& other) const;
158 
159     /**
160     * Returns true if "other" is not the same as "this".
161     *
162     * @param other    the object to be compared
163     * @return         true if "other" is not the same as "this"
164     * @stable ICU 2.0
165     */
166     UBool operator!=(const CollationElementIterator& other) const;
167 
168     /**
169     * Resets the cursor to the beginning of the string.
170     * @stable ICU 2.0
171     */
172     void reset(void);
173 
174     /**
175     * Gets the ordering priority of the next character in the string.
176     * @param status the error code status.
177     * @return the next character's ordering. otherwise returns NULLORDER if an
178     *         error has occured or if the end of string has been reached
179     * @stable ICU 2.0
180     */
181     int32_t next(UErrorCode& status);
182 
183     /**
184     * Get the ordering priority of the previous collation element in the string.
185     * @param status the error code status.
186     * @return the previous element's ordering. otherwise returns NULLORDER if an
187     *         error has occured or if the start of string has been reached
188     * @stable ICU 2.0
189     */
190     int32_t previous(UErrorCode& status);
191 
192     /**
193     * Gets the primary order of a collation order.
194     * @param order the collation order
195     * @return the primary order of a collation order.
196     * @stable ICU 2.0
197     */
198     static inline int32_t primaryOrder(int32_t order);
199 
200     /**
201     * Gets the secondary order of a collation order.
202     * @param order the collation order
203     * @return the secondary order of a collation order.
204     * @stable ICU 2.0
205     */
206     static inline int32_t secondaryOrder(int32_t order);
207 
208     /**
209     * Gets the tertiary order of a collation order.
210     * @param order the collation order
211     * @return the tertiary order of a collation order.
212     * @stable ICU 2.0
213     */
214     static inline int32_t tertiaryOrder(int32_t order);
215 
216     /**
217     * Return the maximum length of any expansion sequences that end with the
218     * specified comparison order.
219     * @param order a collation order returned by previous or next.
220     * @return maximum size of the expansion sequences ending with the collation
221     *         element or 1 if collation element does not occur at the end of any
222     *         expansion sequence
223     * @stable ICU 2.0
224     */
225     int32_t getMaxExpansion(int32_t order) const;
226 
227     /**
228     * Gets the comparison order in the desired strength. Ignore the other
229     * differences.
230     * @param order The order value
231     * @stable ICU 2.0
232     */
233     int32_t strengthOrder(int32_t order) const;
234 
235     /**
236     * Sets the source string.
237     * @param str the source string.
238     * @param status the error code status.
239     * @stable ICU 2.0
240     */
241     void setText(const UnicodeString& str, UErrorCode& status);
242 
243     /**
244     * Sets the source string.
245     * @param str the source character iterator.
246     * @param status the error code status.
247     * @stable ICU 2.0
248     */
249     void setText(CharacterIterator& str, UErrorCode& status);
250 
251     /**
252     * Checks if a comparison order is ignorable.
253     * @param order the collation order.
254     * @return TRUE if a character is ignorable, FALSE otherwise.
255     * @stable ICU 2.0
256     */
257     static inline UBool isIgnorable(int32_t order);
258 
259     /**
260     * Gets the offset of the currently processed character in the source string.
261     * @return the offset of the character.
262     * @stable ICU 2.0
263     */
264     int32_t getOffset(void) const;
265 
266     /**
267     * Sets the offset of the currently processed character in the source string.
268     * @param newOffset the new offset.
269     * @param status the error code status.
270     * @return the offset of the character.
271     * @stable ICU 2.0
272     */
273     void setOffset(int32_t newOffset, UErrorCode& status);
274 
275     /**
276     * ICU "poor man's RTTI", returns a UClassID for the actual class.
277     *
278     * @stable ICU 2.2
279     */
280     virtual UClassID getDynamicClassID() const;
281 
282     /**
283     * ICU "poor man's RTTI", returns a UClassID for this class.
284     *
285     * @stable ICU 2.2
286     */
287     static UClassID U_EXPORT2 getStaticClassID();
288 
289 protected:
290 
291     // CollationElementIterator protected constructors --------------------------
292     /**
293     * @stable ICU 2.0
294     */
295     friend class RuleBasedCollator;
296 
297     /**
298     * CollationElementIterator constructor. This takes the source string and the
299     * collation object. The cursor will walk thru the source string based on the
300     * predefined collation rules. If the source string is empty, NULLORDER will
301     * be returned on the calls to next().
302     * @param sourceText    the source string.
303     * @param order         the collation object.
304     * @param status        the error code status.
305     * @stable ICU 2.0
306     */
307     CollationElementIterator(const UnicodeString& sourceText,
308         const RuleBasedCollator* order, UErrorCode& status);
309 
310     /**
311     * CollationElementIterator constructor. This takes the source string and the
312     * collation object.  The cursor will walk thru the source string based on the
313     * predefined collation rules.  If the source string is empty, NULLORDER will
314     * be returned on the calls to next().
315     * @param sourceText    the source string.
316     * @param order         the collation object.
317     * @param status        the error code status.
318     * @stable ICU 2.0
319     */
320     CollationElementIterator(const CharacterIterator& sourceText,
321         const RuleBasedCollator* order, UErrorCode& status);
322 
323     // CollationElementIterator protected methods -------------------------------
324 
325     /**
326     * Assignment operator
327     *
328     * @param other    the object to be copied
329     * @stable ICU 2.0
330     */
331     const CollationElementIterator&
332         operator=(const CollationElementIterator& other);
333 
334 private:
335     CollationElementIterator(); // default constructor not implemented
336 
337     // CollationElementIterator private data members ----------------------------
338 
339     /**
340     * Data wrapper for collation elements
341     */
342     UCollationElements *m_data_;
343 
344     /**
345     * Indicates if m_data_ belongs to this object.
346     */
347     UBool isDataOwned_;
348 
349 };
350 
351 // CollationElementIterator inline method defination --------------------------
352 
353 /**
354 * Get the primary order of a collation order.
355 * @param order the collation order
356 * @return the primary order of a collation order.
357 */
primaryOrder(int32_t order)358 inline int32_t CollationElementIterator::primaryOrder(int32_t order)
359 {
360     order &= RuleBasedCollator::PRIMARYORDERMASK;
361     return (order >> RuleBasedCollator::PRIMARYORDERSHIFT);
362 }
363 
364 /**
365 * Get the secondary order of a collation order.
366 * @param order the collation order
367 * @return the secondary order of a collation order.
368 */
secondaryOrder(int32_t order)369 inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
370 {
371     order = order & RuleBasedCollator::SECONDARYORDERMASK;
372     return (order >> RuleBasedCollator::SECONDARYORDERSHIFT);
373 }
374 
375 /**
376 * Get the tertiary order of a collation order.
377 * @param order the collation order
378 * @return the tertiary order of a collation order.
379 */
tertiaryOrder(int32_t order)380 inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
381 {
382     return (order &= RuleBasedCollator::TERTIARYORDERMASK);
383 }
384 
getMaxExpansion(int32_t order)385 inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const
386 {
387     return ucol_getMaxExpansion(m_data_, (uint32_t)order);
388 }
389 
isIgnorable(int32_t order)390 inline UBool CollationElementIterator::isIgnorable(int32_t order)
391 {
392     return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE);
393 }
394 
395 U_NAMESPACE_END
396 
397 #endif /* #if !UCONFIG_NO_COLLATION */
398 
399 #endif
400