• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_UTIL_CHARACTER_ITERATOR_H_
16 #define ICING_UTIL_CHARACTER_ITERATOR_H_
17 
18 #include <string>
19 #include <string_view>
20 
21 #include "icing/legacy/core/icing-string-util.h"
22 #include "icing/util/i18n-utils.h"
23 #include "unicode/utypes.h"
24 
25 namespace icing {
26 namespace lib {
27 
28 class CharacterIterator {
29  public:
CharacterIterator(std::string_view text)30   explicit CharacterIterator(std::string_view text)
31       : text_(text),
32         cached_current_char_(i18n_utils::kInvalidUChar32),
33         utf8_index_(0),
34         utf16_index_(0),
35         utf32_index_(0) {}
36 
CharacterIterator()37   CharacterIterator() : utf8_index_(-1), utf16_index_(-1), utf32_index_(-1) {}
38 
39   // Returns the character that the iterator currently points to.
40   // i18n_utils::kInvalidUChar32 if unable to read that character.
41   //
42   // REQUIRES: the instance is not in an undefined state (i.e. all previous
43   //   calls succeeded).
44   //
45   // RETURNS:
46   //   - Null character if the iterator is at the end of the text.
47   //   - The character that the iterator currently points to, if the iterator is
48   //     within the text.
49   //   - i18n_utils::kInvalidUChar32, if unable to decode the character.
50   UChar32 GetCurrentChar() const;
51 
52   // Moves current position to desired_utf8_index.
53   // REQUIRES: 0 <= desired_utf8_index <= text_.length()
54   bool MoveToUtf8(int desired_utf8_index);
55 
56   // Advances from current position to the character that includes the specified
57   // UTF-8 index.
58   //
59   // desired_utf8_index should be in range [0, text_.length()]. Note that it is
60   // allowed to point one index past the end (i.e. equals text_.length()), but
61   // no further.
62   //
63   // REQUIRES:
64   //   - The instance is not in an undefined state (i.e. all previous calls
65   //     succeeded).
66   //   - The current position is not ahead of desired_utf8_index, i.e.
67   //     utf8_index() <= desired_utf8_index.
68   //
69   // RETURNS:
70   //   - True if successfully advanced.
71   //   - False otherwise. Also the iterator will be in an undefined state.
72   bool AdvanceToUtf8(int desired_utf8_index);
73 
74   // Rewinds from current position to the character that includes the specified
75   // UTF-8 index.
76   // REQUIRES: 0 <= desired_utf8_index
77   bool RewindToUtf8(int desired_utf8_index);
78 
79   // Moves current position to desired_utf16_index.
80   // REQUIRES: 0 <= desired_utf16_index <= text_.utf16_length()
81   bool MoveToUtf16(int desired_utf16_index);
82 
83   // Advances current position to desired_utf16_index.
84   // REQUIRES: desired_utf16_index <= text_.utf16_length()
85   // desired_utf16_index is allowed to point one index past the end, but no
86   // further.
87   bool AdvanceToUtf16(int desired_utf16_index);
88 
89   // Rewinds current position to desired_utf16_index.
90   // REQUIRES: 0 <= desired_utf16_index
91   bool RewindToUtf16(int desired_utf16_index);
92 
93   // Moves current position to desired_utf32_index.
94   // REQUIRES: 0 <= desired_utf32_index <= text_.utf32_length()
95   bool MoveToUtf32(int desired_utf32_index);
96 
97   // Advances current position to desired_utf32_index.
98   // REQUIRES: desired_utf32_index <= text_.utf32_length()
99   // desired_utf32_index is allowed to point one index past the end, but no
100   // further.
101   bool AdvanceToUtf32(int desired_utf32_index);
102 
103   // Rewinds current position to desired_utf32_index.
104   // REQUIRES: 0 <= desired_utf32_index
105   bool RewindToUtf32(int desired_utf32_index);
106 
is_valid()107   bool is_valid() const {
108     return text_.data() != nullptr && utf8_index_ >= 0 && utf16_index_ >= 0 &&
109            utf32_index_ >= 0;
110   }
111 
text()112   std::string_view text() const { return text_; }
utf8_index()113   int utf8_index() const { return utf8_index_; }
utf16_index()114   int utf16_index() const { return utf16_index_; }
utf32_index()115   int utf32_index() const { return utf32_index_; }
116 
117   bool operator==(const CharacterIterator& rhs) const {
118     // cached_current_char_ is just that: a cached value. As such, it's not
119     // considered for equality.
120     return text_ == rhs.text_ && utf8_index_ == rhs.utf8_index_ &&
121            utf16_index_ == rhs.utf16_index_ && utf32_index_ == rhs.utf32_index_;
122   }
123 
DebugString()124   std::string DebugString() const {
125     return IcingStringUtil::StringPrintf("(u8:%d,u16:%d,u32:%d)", utf8_index_,
126                                          utf16_index_, utf32_index_);
127   }
128 
129  private:
130   // Resets the character iterator to the start of the text if any of the
131   // indices are negative.
132   void ResetToStartIfNecessary();
133 
134   std::string_view text_;
135   mutable UChar32 cached_current_char_;
136   int utf8_index_;
137   int utf16_index_;
138   int utf32_index_;
139 };
140 
141 }  // namespace lib
142 }  // namespace icing
143 
144 #endif  // ICING_UTIL_CHARACTER_ITERATOR_H_
145