1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_UTIL_CHARACTER_ITERATOR_H_ 16 #define ICING_UTIL_CHARACTER_ITERATOR_H_ 17 18 #include "icing/legacy/core/icing-string-util.h" 19 #include "icing/util/i18n-utils.h" 20 21 namespace icing { 22 namespace lib { 23 24 class CharacterIterator { 25 public: CharacterIterator(std::string_view text)26 explicit CharacterIterator(std::string_view text) 27 : CharacterIterator(text, 0, 0, 0) {} 28 CharacterIterator(std::string_view text,int utf8_index,int utf16_index,int utf32_index)29 CharacterIterator(std::string_view text, int utf8_index, int utf16_index, 30 int utf32_index) 31 : text_(text), 32 cached_current_char_(i18n_utils::kInvalidUChar32), 33 utf8_index_(utf8_index), 34 utf16_index_(utf16_index), 35 utf32_index_(utf32_index) {} 36 37 // Returns the character that the iterator currently points to. 38 // i18n_utils::kInvalidUChar32 if unable to read that character. 39 UChar32 GetCurrentChar(); 40 41 // Moves current position to desired_utf8_index. 42 // REQUIRES: 0 <= desired_utf8_index <= text_.length() 43 bool MoveToUtf8(int desired_utf8_index); 44 45 // Advances from current position to the character that includes the specified 46 // UTF-8 index. 47 // REQUIRES: desired_utf8_index <= text_.length() 48 // desired_utf8_index is allowed to point one index past the end, but no 49 // further. 50 bool AdvanceToUtf8(int desired_utf8_index); 51 52 // Rewinds from current position to the character that includes the specified 53 // UTF-8 index. 54 // REQUIRES: 0 <= desired_utf8_index 55 bool RewindToUtf8(int desired_utf8_index); 56 57 // Moves current position to desired_utf16_index. 58 // REQUIRES: 0 <= desired_utf16_index <= text_.utf16_length() 59 bool MoveToUtf16(int desired_utf16_index); 60 61 // Advances current position to desired_utf16_index. 62 // REQUIRES: desired_utf16_index <= text_.utf16_length() 63 // desired_utf16_index is allowed to point one index past the end, but no 64 // further. 65 bool AdvanceToUtf16(int desired_utf16_index); 66 67 // Rewinds current position to desired_utf16_index. 68 // REQUIRES: 0 <= desired_utf16_index 69 bool RewindToUtf16(int desired_utf16_index); 70 71 // Moves current position to desired_utf32_index. 72 // REQUIRES: 0 <= desired_utf32_index <= text_.utf32_length() 73 bool MoveToUtf32(int desired_utf32_index); 74 75 // Advances current position to desired_utf32_index. 76 // REQUIRES: desired_utf32_index <= text_.utf32_length() 77 // desired_utf32_index is allowed to point one index past the end, but no 78 // further. 79 bool AdvanceToUtf32(int desired_utf32_index); 80 81 // Rewinds current position to desired_utf32_index. 82 // REQUIRES: 0 <= desired_utf32_index 83 bool RewindToUtf32(int desired_utf32_index); 84 utf8_index()85 int utf8_index() const { return utf8_index_; } utf16_index()86 int utf16_index() const { return utf16_index_; } utf32_index()87 int utf32_index() const { return utf32_index_; } 88 89 bool operator==(const CharacterIterator& rhs) const { 90 // cached_current_char_ is just that: a cached value. As such, it's not 91 // considered for equality. 92 return text_ == rhs.text_ && utf8_index_ == rhs.utf8_index_ && 93 utf16_index_ == rhs.utf16_index_ && utf32_index_ == rhs.utf32_index_; 94 } 95 DebugString()96 std::string DebugString() const { 97 return IcingStringUtil::StringPrintf("(u8:%d,u16:%d,u32:%d)", utf8_index_, 98 utf16_index_, utf32_index_); 99 } 100 101 private: 102 // Resets the character iterator to the start of the text if any of the 103 // indices are negative. 104 void ResetToStartIfNecessary(); 105 106 std::string_view text_; 107 UChar32 cached_current_char_; 108 int utf8_index_; 109 int utf16_index_; 110 int utf32_index_; 111 }; 112 113 } // namespace lib 114 } // namespace icing 115 116 #endif // ICING_UTIL_CHARACTER_ITERATOR_H_ 117