• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_UTIL_CHARACTER_ITERATOR_H_
16 #define ICING_UTIL_CHARACTER_ITERATOR_H_
17 
18 #include "icing/legacy/core/icing-string-util.h"
19 #include "icing/util/i18n-utils.h"
20 
21 namespace icing {
22 namespace lib {
23 
24 class CharacterIterator {
25  public:
CharacterIterator(std::string_view text)26   explicit CharacterIterator(std::string_view text)
27       : CharacterIterator(text, 0, 0, 0) {}
28 
CharacterIterator(std::string_view text,int utf8_index,int utf16_index,int utf32_index)29   CharacterIterator(std::string_view text, int utf8_index, int utf16_index,
30                     int utf32_index)
31       : text_(text),
32         cached_current_char_(i18n_utils::kInvalidUChar32),
33         utf8_index_(utf8_index),
34         utf16_index_(utf16_index),
35         utf32_index_(utf32_index) {}
36 
37   // Returns the character that the iterator currently points to.
38   // i18n_utils::kInvalidUChar32 if unable to read that character.
39   UChar32 GetCurrentChar();
40 
41   // Moves current position to desired_utf8_index.
42   // REQUIRES: 0 <= desired_utf8_index <= text_.length()
43   bool MoveToUtf8(int desired_utf8_index);
44 
45   // Advances from current position to the character that includes the specified
46   // UTF-8 index.
47   // REQUIRES: desired_utf8_index <= text_.length()
48   // desired_utf8_index is allowed to point one index past the end, but no
49   // further.
50   bool AdvanceToUtf8(int desired_utf8_index);
51 
52   // Rewinds from current position to the character that includes the specified
53   // UTF-8 index.
54   // REQUIRES: 0 <= desired_utf8_index
55   bool RewindToUtf8(int desired_utf8_index);
56 
57   // Moves current position to desired_utf16_index.
58   // REQUIRES: 0 <= desired_utf16_index <= text_.utf16_length()
59   bool MoveToUtf16(int desired_utf16_index);
60 
61   // Advances current position to desired_utf16_index.
62   // REQUIRES: desired_utf16_index <= text_.utf16_length()
63   // desired_utf16_index is allowed to point one index past the end, but no
64   // further.
65   bool AdvanceToUtf16(int desired_utf16_index);
66 
67   // Rewinds current position to desired_utf16_index.
68   // REQUIRES: 0 <= desired_utf16_index
69   bool RewindToUtf16(int desired_utf16_index);
70 
71   // Moves current position to desired_utf32_index.
72   // REQUIRES: 0 <= desired_utf32_index <= text_.utf32_length()
73   bool MoveToUtf32(int desired_utf32_index);
74 
75   // Advances current position to desired_utf32_index.
76   // REQUIRES: desired_utf32_index <= text_.utf32_length()
77   // desired_utf32_index is allowed to point one index past the end, but no
78   // further.
79   bool AdvanceToUtf32(int desired_utf32_index);
80 
81   // Rewinds current position to desired_utf32_index.
82   // REQUIRES: 0 <= desired_utf32_index
83   bool RewindToUtf32(int desired_utf32_index);
84 
utf8_index()85   int utf8_index() const { return utf8_index_; }
utf16_index()86   int utf16_index() const { return utf16_index_; }
utf32_index()87   int utf32_index() const { return utf32_index_; }
88 
89   bool operator==(const CharacterIterator& rhs) const {
90     // cached_current_char_ is just that: a cached value. As such, it's not
91     // considered for equality.
92     return text_ == rhs.text_ && utf8_index_ == rhs.utf8_index_ &&
93            utf16_index_ == rhs.utf16_index_ && utf32_index_ == rhs.utf32_index_;
94   }
95 
DebugString()96   std::string DebugString() const {
97     return IcingStringUtil::StringPrintf("(u8:%d,u16:%d,u32:%d)", utf8_index_,
98                                          utf16_index_, utf32_index_);
99   }
100 
101  private:
102   // Resets the character iterator to the start of the text if any of the
103   // indices are negative.
104   void ResetToStartIfNecessary();
105 
106   std::string_view text_;
107   UChar32 cached_current_char_;
108   int utf8_index_;
109   int utf16_index_;
110   int utf32_index_;
111 };
112 
113 }  // namespace lib
114 }  // namespace icing
115 
116 #endif  // ICING_UTIL_CHARACTER_ITERATOR_H_
117