• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_I18N_CHAR_ITERATOR_H_
6 #define BASE_I18N_CHAR_ITERATOR_H_
7 #pragma once
8 
9 #include <string>
10 
11 #include "base/basictypes.h"
12 #include "base/string16.h"
13 
14 // The CharIterator classes iterate through the characters in UTF8 and
15 // UTF16 strings.  Example usage:
16 //
17 //   UTF8CharIterator iter(&str);
18 //   while (!iter.End()) {
19 //     VLOG(1) << iter.get();
20 //     iter.Advance();
21 //   }
22 
23 #if defined(OS_WIN)
24 typedef unsigned char uint8_t;
25 #endif
26 
27 namespace base {
28 namespace i18n {
29 
30 class UTF8CharIterator {
31  public:
32   // Requires |str| to live as long as the UTF8CharIterator does.
33   UTF8CharIterator(const std::string* str);
34   ~UTF8CharIterator();
35 
36   // Return the starting array index of the current character within the
37   // string.
array_pos()38   int32 array_pos() const { return array_pos_; }
39 
40   // Return the logical index of the current character, independent of the
41   // number of bytes each character takes.
char_pos()42   int32 char_pos() const { return char_pos_; }
43 
44   // Return the current char.
get()45   int32 get() const { return char_; }
46 
47   // Returns true if we're at the end of the string.
end()48   bool end() const { return array_pos_ == len_; }
49 
50   // Advance to the next actual character.  Returns false if we're at the
51   // end of the string.
52   bool Advance();
53 
54  private:
55   // The string we're iterating over.
56   const uint8_t* str_;
57 
58   // The length of the encoded string.
59   int32 len_;
60 
61   // Array index.
62   int32 array_pos_;
63 
64   // The next array index.
65   int32 next_pos_;
66 
67   // Character index.
68   int32 char_pos_;
69 
70   // The current character.
71   int32 char_;
72 
73   DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator);
74 };
75 
76 class UTF16CharIterator {
77  public:
78   // Requires |str| to live as long as the UTF16CharIterator does.
79   UTF16CharIterator(const string16* str);
80   UTF16CharIterator(const char16* str, size_t str_len);
81   ~UTF16CharIterator();
82 
83   // Return the starting array index of the current character within the
84   // string.
array_pos()85   int32 array_pos() const { return array_pos_; }
86 
87   // Return the logical index of the current character, independent of the
88   // number of codewords each character takes.
char_pos()89   int32 char_pos() const { return char_pos_; }
90 
91   // Return the current char.
get()92   int32 get() const { return char_; }
93 
94   // Returns true if we're at the end of the string.
end()95   bool end() const { return array_pos_ == len_; }
96 
97   // Advance to the next actual character.  Returns false if we're at the
98   // end of the string.
99   bool Advance();
100 
101  private:
102   // Fills in the current character we found and advances to the next
103   // character, updating all flags as necessary.
104   void ReadChar();
105 
106   // The string we're iterating over.
107   const char16* str_;
108 
109   // The length of the encoded string.
110   int32 len_;
111 
112   // Array index.
113   int32 array_pos_;
114 
115   // The next array index.
116   int32 next_pos_;
117 
118   // Character index.
119   int32 char_pos_;
120 
121   // The current character.
122   int32 char_;
123 
124   DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator);
125 };
126 
127 }  // namespace i18n
128 }  // namespace base
129 
130 #endif  // BASE_I18N_CHAR_ITERATOR_H_
131