1 // Copyright (C) 2011 The Libphonenumber Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Author: Philippe Liard 16 17 #ifndef I18N_PHONENUMBERS_UNICODESTRING_H_ 18 #define I18N_PHONENUMBERS_UNICODESTRING_H_ 19 20 #include "phonenumbers/utf/unicodetext.h" 21 22 #include <cstring> 23 #include <limits> 24 25 namespace i18n { 26 namespace phonenumbers { 27 28 // This class supports the minimal subset of icu::UnicodeString needed by 29 // AsYouTypeFormatter in order to let the libphonenumber not depend on ICU 30 // which is not available by default on some systems, such as iOS. 31 class UnicodeString { 32 public: UnicodeString()33 UnicodeString() : cached_index_(-1) {} 34 35 // Constructs a new unicode string copying the provided C string. UnicodeString(const char * utf8)36 explicit UnicodeString(const char* utf8) 37 : text_(UTF8ToUnicodeText(utf8, static_cast<int>(std::strlen(utf8)))), 38 cached_index_(-1) {} 39 40 // Constructs a new unicode string containing the provided codepoint. UnicodeString(char32 codepoint)41 explicit UnicodeString(char32 codepoint) : cached_index_(-1) { 42 append(codepoint); 43 } 44 UnicodeString(const UnicodeString & src)45 UnicodeString(const UnicodeString& src) 46 : text_(src.text_), cached_index_(-1) {} 47 48 UnicodeString& operator=(const UnicodeString& src); 49 50 bool operator==(const UnicodeString& rhs) const; 51 52 void append(const UnicodeString& unicode_string); 53 append(char32 codepoint)54 inline void append(char32 codepoint) { 55 invalidateCachedIndex(); 56 text_.push_back(codepoint); 57 } 58 59 typedef UnicodeText::const_iterator const_iterator; 60 begin()61 inline const_iterator begin() const { 62 return text_.begin(); 63 } 64 end()65 inline const_iterator end() const { 66 return text_.end(); 67 } 68 69 // Returns the index of the provided codepoint or -1 if not found. 70 int indexOf(char32 codepoint) const; 71 72 // Returns the number of codepoints contained in the unicode string. length()73 inline int length() const { 74 return text_.size(); 75 } 76 77 // Clears the unicode string. remove()78 inline void remove() { 79 invalidateCachedIndex(); 80 text_.clear(); 81 } 82 83 // Replaces the substring located at [ start, start + length - 1 ] with the 84 // provided unicode string. 85 void replace(int start, int length, const UnicodeString& src); 86 87 void setCharAt(int pos, char32 c); 88 89 // Copies the provided C string. setTo(const char * s,size_t len)90 inline void setTo(const char* s, size_t len) { 91 invalidateCachedIndex(); 92 text_.CopyUTF8(s, static_cast<int>(len)); 93 } 94 95 // Was this UnicodeString created from valid UTF-8? UTF8WasValid()96 bool UTF8WasValid() const { return text_.UTF8WasValid(); } 97 98 // Returns the substring located at [ start, start + length - 1 ] without 99 // copying the underlying C string. If one of the provided parameters is out 100 // of range, the function returns an empty unicode string. 101 UnicodeString tempSubString( 102 int start, 103 int length = std::numeric_limits<int>::max()) const; 104 toUTF8String(string & out)105 inline void toUTF8String(string& out) const { 106 out = UnicodeTextToUTF8(text_); 107 } 108 109 char32 operator[](int index) const; 110 111 private: 112 UnicodeText text_; 113 114 // As UnicodeText doesn't provide random access, an operator[] implementation 115 // would naively iterate from the beginning of the string to the supplied 116 // index which would be inefficient. 117 // As operator[] is very likely to be called in a loop with consecutive 118 // indexes, we save the corresponding iterator so we can reuse it the next 119 // time it is called. 120 121 // The following function which invalidates the cached index corresponding to 122 // the iterator position must be called every time the unicode string is 123 // modified (i.e. in all the non-const methods). invalidateCachedIndex()124 inline void invalidateCachedIndex() { 125 cached_index_ = -1; 126 } 127 128 // Iterator corresponding to the cached index below, used by operator[]. 129 mutable UnicodeText::const_iterator cached_it_; 130 mutable int cached_index_; 131 }; 132 133 } // namespace phonenumbers 134 } // namespace i18n 135 136 #endif // I18N_PHONENUMBERS_UNICODESTRING_H_ 137