1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2012-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * uitercollationiterator.h 9 * 10 * created on: 2012sep23 (from utf16collationiterator.h) 11 * created by: Markus W. Scherer 12 */ 13 14 #ifndef __UITERCOLLATIONITERATOR_H__ 15 #define __UITERCOLLATIONITERATOR_H__ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_COLLATION 20 21 #include "unicode/uiter.h" 22 #include "cmemory.h" 23 #include "collation.h" 24 #include "collationdata.h" 25 #include "collationiterator.h" 26 #include "normalizer2impl.h" 27 28 U_NAMESPACE_BEGIN 29 30 /** 31 * UCharIterator-based collation element and character iterator. 32 * Handles normalized text inline, with length or NUL-terminated. 33 * Unnormalized text is handled by a subclass. 34 */ 35 class U_I18N_API UIterCollationIterator : public CollationIterator { 36 public: UIterCollationIterator(const CollationData * d,UBool numeric,UCharIterator & ui)37 UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui) 38 : CollationIterator(d, numeric), iter(ui) {} 39 40 virtual ~UIterCollationIterator(); 41 42 virtual void resetToOffset(int32_t newOffset); 43 44 virtual int32_t getOffset() const; 45 46 virtual UChar32 nextCodePoint(UErrorCode &errorCode); 47 48 virtual UChar32 previousCodePoint(UErrorCode &errorCode); 49 50 protected: 51 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 52 53 virtual UChar handleGetTrailSurrogate(); 54 55 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 56 57 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 58 59 UCharIterator &iter; 60 }; 61 62 /** 63 * Incrementally checks the input text for FCD and normalizes where necessary. 64 */ 65 class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator { 66 public: FCDUIterCollationIterator(const CollationData * data,UBool numeric,UCharIterator & ui,int32_t startIndex)67 FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex) 68 : UIterCollationIterator(data, numeric, ui), 69 state(ITER_CHECK_FWD), start(startIndex), 70 nfcImpl(data->nfcImpl) {} 71 72 virtual ~FCDUIterCollationIterator(); 73 74 virtual void resetToOffset(int32_t newOffset); 75 76 virtual int32_t getOffset() const; 77 78 virtual UChar32 nextCodePoint(UErrorCode &errorCode); 79 80 virtual UChar32 previousCodePoint(UErrorCode &errorCode); 81 82 protected: 83 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 84 85 virtual UChar handleGetTrailSurrogate(); 86 87 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 88 89 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 90 91 private: 92 /** 93 * Switches to forward checking if possible. 94 */ 95 void switchToForward(); 96 97 /** 98 * Extends the FCD text segment forward or normalizes around pos. 99 * @return true if success 100 */ 101 UBool nextSegment(UErrorCode &errorCode); 102 103 /** 104 * Switches to backward checking. 105 */ 106 void switchToBackward(); 107 108 /** 109 * Extends the FCD text segment backward or normalizes around pos. 110 * @return true if success 111 */ 112 UBool previousSegment(UErrorCode &errorCode); 113 114 UBool normalize(const UnicodeString &s, UErrorCode &errorCode); 115 116 enum State { 117 /** 118 * The input text [start..(iter index)[ passes the FCD check. 119 * Moving forward checks incrementally. 120 * pos & limit are undefined. 121 */ 122 ITER_CHECK_FWD, 123 /** 124 * The input text [(iter index)..limit[ passes the FCD check. 125 * Moving backward checks incrementally. 126 * start & pos are undefined. 127 */ 128 ITER_CHECK_BWD, 129 /** 130 * The input text [start..limit[ passes the FCD check. 131 * pos tracks the current text index. 132 */ 133 ITER_IN_FCD_SEGMENT, 134 /** 135 * The input text [start..limit[ failed the FCD check and was normalized. 136 * pos tracks the current index in the normalized string. 137 * The text iterator is at the limit index. 138 */ 139 IN_NORM_ITER_AT_LIMIT, 140 /** 141 * The input text [start..limit[ failed the FCD check and was normalized. 142 * pos tracks the current index in the normalized string. 143 * The text iterator is at the start index. 144 */ 145 IN_NORM_ITER_AT_START 146 }; 147 148 State state; 149 150 int32_t start; 151 int32_t pos; 152 int32_t limit; 153 154 const Normalizer2Impl &nfcImpl; 155 UnicodeString normalized; 156 }; 157 158 U_NAMESPACE_END 159 160 #endif // !UCONFIG_NO_COLLATION 161 #endif // __UITERCOLLATIONITERATOR_H__ 162