1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * utf16collationiterator.h 9 * 10 * created on: 2010oct27 11 * created by: Markus W. Scherer 12 */ 13 14 #ifndef __UTF16COLLATIONITERATOR_H__ 15 #define __UTF16COLLATIONITERATOR_H__ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_COLLATION 20 21 #include "cmemory.h" 22 #include "collation.h" 23 #include "collationdata.h" 24 #include "collationiterator.h" 25 #include "normalizer2impl.h" 26 27 U_NAMESPACE_BEGIN 28 29 /** 30 * UTF-16 collation element and character iterator. 31 * Handles normalized UTF-16 text inline, with length or NUL-terminated. 32 * Unnormalized text is handled by a subclass. 33 */ 34 class U_I18N_API UTF16CollationIterator : public CollationIterator { 35 public: UTF16CollationIterator(const CollationData * d,UBool numeric,const char16_t * s,const char16_t * p,const char16_t * lim)36 UTF16CollationIterator(const CollationData *d, UBool numeric, 37 const char16_t *s, const char16_t *p, const char16_t *lim) 38 : CollationIterator(d, numeric), 39 start(s), pos(p), limit(lim) {} 40 41 UTF16CollationIterator(const UTF16CollationIterator &other, const char16_t *newText); 42 43 virtual ~UTF16CollationIterator(); 44 45 virtual bool operator==(const CollationIterator &other) const override; 46 47 virtual void resetToOffset(int32_t newOffset) override; 48 49 virtual int32_t getOffset() const override; 50 setText(const char16_t * s,const char16_t * lim)51 void setText(const char16_t *s, const char16_t *lim) { 52 reset(); 53 start = pos = s; 54 limit = lim; 55 } 56 57 virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; 58 59 virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; 60 61 protected: 62 // Copy constructor only for subclasses which set the pointers. UTF16CollationIterator(const UTF16CollationIterator & other)63 UTF16CollationIterator(const UTF16CollationIterator &other) 64 : CollationIterator(other), 65 start(nullptr), pos(nullptr), limit(nullptr) {} 66 67 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; 68 69 virtual char16_t handleGetTrailSurrogate() override; 70 71 virtual UBool foundNULTerminator() override; 72 73 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 74 75 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 76 77 // UTF-16 string pointers. 78 // limit can be nullptr for NUL-terminated strings. 79 const char16_t *start, *pos, *limit; 80 }; 81 82 /** 83 * Incrementally checks the input text for FCD and normalizes where necessary. 84 */ 85 class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator { 86 public: FCDUTF16CollationIterator(const CollationData * data,UBool numeric,const char16_t * s,const char16_t * p,const char16_t * lim)87 FCDUTF16CollationIterator(const CollationData *data, UBool numeric, 88 const char16_t *s, const char16_t *p, const char16_t *lim) 89 : UTF16CollationIterator(data, numeric, s, p, lim), 90 rawStart(s), segmentStart(p), segmentLimit(nullptr), rawLimit(lim), 91 nfcImpl(data->nfcImpl), 92 checkDir(1) {} 93 94 FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const char16_t *newText); 95 96 virtual ~FCDUTF16CollationIterator(); 97 98 virtual bool operator==(const CollationIterator &other) const override; 99 100 virtual void resetToOffset(int32_t newOffset) override; 101 102 virtual int32_t getOffset() const override; 103 104 virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; 105 106 virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; 107 108 protected: 109 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; 110 111 virtual UBool foundNULTerminator() override; 112 113 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 114 115 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 116 117 private: 118 /** 119 * Switches to forward checking if possible. 120 * To be called when checkDir < 0 || (checkDir == 0 && pos == limit). 121 * Returns with checkDir > 0 || (checkDir == 0 && pos != limit). 122 */ 123 void switchToForward(); 124 125 /** 126 * Extend the FCD text segment forward or normalize around pos. 127 * To be called when checkDir > 0 && pos != limit. 128 * @return true if success, checkDir == 0 and pos != limit 129 */ 130 UBool nextSegment(UErrorCode &errorCode); 131 132 /** 133 * Switches to backward checking. 134 * To be called when checkDir > 0 || (checkDir == 0 && pos == start). 135 * Returns with checkDir < 0 || (checkDir == 0 && pos != start). 136 */ 137 void switchToBackward(); 138 139 /** 140 * Extend the FCD text segment backward or normalize around pos. 141 * To be called when checkDir < 0 && pos != start. 142 * @return true if success, checkDir == 0 and pos != start 143 */ 144 UBool previousSegment(UErrorCode &errorCode); 145 146 UBool normalize(const char16_t *from, const char16_t *to, UErrorCode &errorCode); 147 148 // Text pointers: The input text is [rawStart, rawLimit[ 149 // where rawLimit can be nullptr for NUL-terminated text. 150 // 151 // checkDir > 0: 152 // 153 // The input text [segmentStart..pos[ passes the FCD check. 154 // Moving forward checks incrementally. 155 // segmentLimit is undefined. limit == rawLimit. 156 // 157 // checkDir < 0: 158 // The input text [pos..segmentLimit[ passes the FCD check. 159 // Moving backward checks incrementally. 160 // segmentStart is undefined, start == rawStart. 161 // 162 // checkDir == 0: 163 // 164 // The input text [segmentStart..segmentLimit[ is being processed. 165 // These pointers are at FCD boundaries. 166 // Either this text segment already passes the FCD check 167 // and segmentStart==start<=pos<=limit==segmentLimit, 168 // or the current segment had to be normalized so that 169 // [segmentStart..segmentLimit[ turned into the normalized string, 170 // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length(). 171 const char16_t *rawStart; 172 const char16_t *segmentStart; 173 const char16_t *segmentLimit; 174 // rawLimit==nullptr for a NUL-terminated string. 175 const char16_t *rawLimit; 176 177 const Normalizer2Impl &nfcImpl; 178 UnicodeString normalized; 179 // Direction of incremental FCD check. See comments before rawStart. 180 int8_t checkDir; 181 }; 182 183 U_NAMESPACE_END 184 185 #endif // !UCONFIG_NO_COLLATION 186 #endif // __UTF16COLLATIONITERATOR_H__ 187