1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef BASE_I18N_CHAR_ITERATOR_H_ 6 #define BASE_I18N_CHAR_ITERATOR_H_ 7 8 #include <stddef.h> 9 #include <stdint.h> 10 11 #include <string> 12 13 #include "base/i18n/base_i18n_export.h" 14 #include "base/macros.h" 15 #include "base/strings/string16.h" 16 #include "build/build_config.h" 17 18 // The CharIterator classes iterate through the characters in UTF8 and 19 // UTF16 strings. Example usage: 20 // 21 // UTF8CharIterator iter(&str); 22 // while (!iter.end()) { 23 // VLOG(1) << iter.get(); 24 // iter.Advance(); 25 // } 26 27 #if defined(OS_WIN) 28 typedef unsigned char uint8_t; 29 #endif 30 31 namespace base { 32 namespace i18n { 33 34 class BASE_I18N_EXPORT UTF8CharIterator { 35 public: 36 // Requires |str| to live as long as the UTF8CharIterator does. 37 explicit UTF8CharIterator(const std::string* str); 38 ~UTF8CharIterator(); 39 40 // Return the starting array index of the current character within the 41 // string. array_pos()42 int32_t array_pos() const { return array_pos_; } 43 44 // Return the logical index of the current character, independent of the 45 // number of bytes each character takes. char_pos()46 int32_t char_pos() const { return char_pos_; } 47 48 // Return the current char. get()49 int32_t get() const { return char_; } 50 51 // Returns true if we're at the end of the string. end()52 bool end() const { return array_pos_ == len_; } 53 54 // Advance to the next actual character. Returns false if we're at the 55 // end of the string. 56 bool Advance(); 57 58 private: 59 // The string we're iterating over. 60 const uint8_t* str_; 61 62 // The length of the encoded string. 63 int32_t len_; 64 65 // Array index. 66 int32_t array_pos_; 67 68 // The next array index. 69 int32_t next_pos_; 70 71 // Character index. 72 int32_t char_pos_; 73 74 // The current character. 75 int32_t char_; 76 77 DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator); 78 }; 79 80 class BASE_I18N_EXPORT UTF16CharIterator { 81 public: 82 // Requires |str| to live as long as the UTF16CharIterator does. 83 explicit UTF16CharIterator(const string16* str); 84 UTF16CharIterator(const char16* str, size_t str_len); 85 ~UTF16CharIterator(); 86 87 // Return the starting array index of the current character within the 88 // string. array_pos()89 int32_t array_pos() const { return array_pos_; } 90 91 // Return the logical index of the current character, independent of the 92 // number of codewords each character takes. char_pos()93 int32_t char_pos() const { return char_pos_; } 94 95 // Return the current char. get()96 int32_t get() const { return char_; } 97 98 // Returns true if we're at the end of the string. end()99 bool end() const { return array_pos_ == len_; } 100 101 // Advance to the next actual character. Returns false if we're at the 102 // end of the string. 103 bool Advance(); 104 105 private: 106 // Fills in the current character we found and advances to the next 107 // character, updating all flags as necessary. 108 void ReadChar(); 109 110 // The string we're iterating over. 111 const char16* str_; 112 113 // The length of the encoded string. 114 int32_t len_; 115 116 // Array index. 117 int32_t array_pos_; 118 119 // The next array index. 120 int32_t next_pos_; 121 122 // Character index. 123 int32_t char_pos_; 124 125 // The current character. 126 int32_t char_; 127 128 DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator); 129 }; 130 131 } // namespace i18n 132 } // namespace base 133 134 #endif // BASE_I18N_CHAR_ITERATOR_H_ 135