• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_I18N_CHAR_ITERATOR_H_
6 #define BASE_I18N_CHAR_ITERATOR_H_
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include <string>
12 
13 #include "base/i18n/base_i18n_export.h"
14 #include "base/macros.h"
15 #include "base/strings/string16.h"
16 #include "build/build_config.h"
17 
18 // The CharIterator classes iterate through the characters in UTF8 and
19 // UTF16 strings.  Example usage:
20 //
21 //   UTF8CharIterator iter(&str);
22 //   while (!iter.end()) {
23 //     VLOG(1) << iter.get();
24 //     iter.Advance();
25 //   }
26 
27 #if defined(OS_WIN)
28 typedef unsigned char uint8_t;
29 #endif
30 
31 namespace base {
32 namespace i18n {
33 
34 class BASE_I18N_EXPORT UTF8CharIterator {
35  public:
36   // Requires |str| to live as long as the UTF8CharIterator does.
37   explicit UTF8CharIterator(const std::string* str);
38   ~UTF8CharIterator();
39 
40   // Return the starting array index of the current character within the
41   // string.
array_pos()42   int32_t array_pos() const { return array_pos_; }
43 
44   // Return the logical index of the current character, independent of the
45   // number of bytes each character takes.
char_pos()46   int32_t char_pos() const { return char_pos_; }
47 
48   // Return the current char.
get()49   int32_t get() const { return char_; }
50 
51   // Returns true if we're at the end of the string.
end()52   bool end() const { return array_pos_ == len_; }
53 
54   // Advance to the next actual character.  Returns false if we're at the
55   // end of the string.
56   bool Advance();
57 
58  private:
59   // The string we're iterating over.
60   const uint8_t* str_;
61 
62   // The length of the encoded string.
63   int32_t len_;
64 
65   // Array index.
66   int32_t array_pos_;
67 
68   // The next array index.
69   int32_t next_pos_;
70 
71   // Character index.
72   int32_t char_pos_;
73 
74   // The current character.
75   int32_t char_;
76 
77   DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator);
78 };
79 
80 class BASE_I18N_EXPORT UTF16CharIterator {
81  public:
82   // Requires |str| to live as long as the UTF16CharIterator does.
83   explicit UTF16CharIterator(const string16* str);
84   UTF16CharIterator(const char16* str, size_t str_len);
85   ~UTF16CharIterator();
86 
87   // Return the starting array index of the current character within the
88   // string.
array_pos()89   int32_t array_pos() const { return array_pos_; }
90 
91   // Return the logical index of the current character, independent of the
92   // number of codewords each character takes.
char_pos()93   int32_t char_pos() const { return char_pos_; }
94 
95   // Return the current char.
get()96   int32_t get() const { return char_; }
97 
98   // Returns true if we're at the end of the string.
end()99   bool end() const { return array_pos_ == len_; }
100 
101   // Advance to the next actual character.  Returns false if we're at the
102   // end of the string.
103   bool Advance();
104 
105  private:
106   // Fills in the current character we found and advances to the next
107   // character, updating all flags as necessary.
108   void ReadChar();
109 
110   // The string we're iterating over.
111   const char16* str_;
112 
113   // The length of the encoded string.
114   int32_t len_;
115 
116   // Array index.
117   int32_t array_pos_;
118 
119   // The next array index.
120   int32_t next_pos_;
121 
122   // Character index.
123   int32_t char_pos_;
124 
125   // The current character.
126   int32_t char_;
127 
128   DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator);
129 };
130 
131 }  // namespace i18n
132 }  // namespace base
133 
134 #endif  // BASE_I18N_CHAR_ITERATOR_H_
135