1 // Copyright 2017 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FXCRT_WIDESTRING_H_ 8 #define CORE_FXCRT_WIDESTRING_H_ 9 10 #include <functional> 11 #include <iterator> 12 #include <ostream> 13 #include <utility> 14 15 #include "core/fxcrt/fx_system.h" 16 #include "core/fxcrt/retain_ptr.h" 17 #include "core/fxcrt/string_data_template.h" 18 #include "core/fxcrt/string_view_template.h" 19 #include "third_party/base/logging.h" 20 #include "third_party/base/optional.h" 21 #include "third_party/base/span.h" 22 23 namespace fxcrt { 24 25 class ByteString; 26 27 // A mutable string with shared buffers using copy-on-write semantics that 28 // avoids the cost of std::string's iterator stability guarantees. 29 class WideString { 30 public: 31 using CharType = wchar_t; 32 using const_iterator = const CharType*; 33 using const_reverse_iterator = std::reverse_iterator<const_iterator>; 34 35 static WideString Format(const wchar_t* pFormat, ...) WARN_UNUSED_RESULT; 36 static WideString FormatV(const wchar_t* lpszFormat, 37 va_list argList) WARN_UNUSED_RESULT; 38 39 WideString(); 40 WideString(const WideString& other); 41 42 // Move-construct a WideString. After construction, |other| is empty. 43 WideString(WideString&& other) noexcept; 44 45 // Deliberately implicit to avoid calling on every string literal. 46 // NOLINTNEXTLINE(runtime/explicit) 47 WideString(wchar_t ch); 48 // NOLINTNEXTLINE(runtime/explicit) 49 WideString(const wchar_t* ptr); 50 51 // No implicit conversions from byte strings. 52 // NOLINTNEXTLINE(runtime/explicit) 53 WideString(char) = delete; 54 55 WideString(const wchar_t* pStr, size_t len); 56 57 explicit WideString(WideStringView str); 58 WideString(WideStringView str1, WideStringView str2); 59 WideString(const std::initializer_list<WideStringView>& list); 60 61 ~WideString(); 62 63 static WideString FromASCII(ByteStringView str) WARN_UNUSED_RESULT; 64 static WideString FromLatin1(ByteStringView str) WARN_UNUSED_RESULT; 65 static WideString FromDefANSI(ByteStringView str) WARN_UNUSED_RESULT; 66 static WideString FromUTF8(ByteStringView str) WARN_UNUSED_RESULT; 67 static WideString FromUTF16LE(const unsigned short* str, 68 size_t len) WARN_UNUSED_RESULT; 69 static WideString FromUTF16BE(const unsigned short* wstr, 70 size_t wlen) WARN_UNUSED_RESULT; 71 72 static size_t WStringLength(const unsigned short* str) WARN_UNUSED_RESULT; 73 74 // Explicit conversion to C-style wide string. 75 // Note: Any subsequent modification of |this| will invalidate the result. c_str()76 const wchar_t* c_str() const { return m_pData ? m_pData->m_String : L""; } 77 78 // Explicit conversion to WideStringView. 79 // Note: Any subsequent modification of |this| will invalidate the result. AsStringView()80 WideStringView AsStringView() const { 81 return WideStringView(c_str(), GetLength()); 82 } 83 84 // Explicit conversion to span. 85 // Note: Any subsequent modification of |this| will invalidate the result. span()86 pdfium::span<const wchar_t> span() const { 87 return pdfium::make_span(m_pData ? m_pData->m_String : nullptr, 88 GetLength()); 89 } 90 91 // Note: Any subsequent modification of |this| will invalidate iterators. begin()92 const_iterator begin() const { return m_pData ? m_pData->m_String : nullptr; } end()93 const_iterator end() const { 94 return m_pData ? m_pData->m_String + m_pData->m_nDataLength : nullptr; 95 } 96 97 // Note: Any subsequent modification of |this| will invalidate iterators. rbegin()98 const_reverse_iterator rbegin() const { 99 return const_reverse_iterator(end()); 100 } rend()101 const_reverse_iterator rend() const { 102 return const_reverse_iterator(begin()); 103 } 104 clear()105 void clear() { m_pData.Reset(); } 106 GetLength()107 size_t GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; } GetStringLength()108 size_t GetStringLength() const { 109 return m_pData ? wcslen(m_pData->m_String) : 0; 110 } IsEmpty()111 bool IsEmpty() const { return !GetLength(); } IsValidIndex(size_t index)112 bool IsValidIndex(size_t index) const { return index < GetLength(); } IsValidLength(size_t length)113 bool IsValidLength(size_t length) const { return length <= GetLength(); } 114 115 WideString& operator=(const wchar_t* str); 116 WideString& operator=(WideStringView str); 117 WideString& operator=(const WideString& that); 118 119 // Move-assign a WideString. After assignment, |that| is empty. 120 WideString& operator=(WideString&& that); 121 122 WideString& operator+=(const wchar_t* str); 123 WideString& operator+=(wchar_t ch); 124 WideString& operator+=(const WideString& str); 125 WideString& operator+=(WideStringView str); 126 127 bool operator==(const wchar_t* ptr) const; 128 bool operator==(WideStringView str) const; 129 bool operator==(const WideString& other) const; 130 131 bool operator!=(const wchar_t* ptr) const { return !(*this == ptr); } 132 bool operator!=(WideStringView str) const { return !(*this == str); } 133 bool operator!=(const WideString& other) const { return !(*this == other); } 134 135 bool operator<(const wchar_t* ptr) const; 136 bool operator<(WideStringView str) const; 137 bool operator<(const WideString& other) const; 138 139 CharType operator[](const size_t index) const { 140 CHECK(IsValidIndex(index)); 141 return m_pData->m_String[index]; 142 } 143 Front()144 CharType Front() const { return GetLength() ? (*this)[0] : 0; } Back()145 CharType Back() const { return GetLength() ? (*this)[GetLength() - 1] : 0; } 146 147 void SetAt(size_t index, wchar_t c); 148 149 int Compare(const wchar_t* str) const; 150 int Compare(const WideString& str) const; 151 int CompareNoCase(const wchar_t* str) const; 152 153 WideString Substr(size_t first, size_t count) const; 154 WideString First(size_t count) const; 155 WideString Last(size_t count) const; 156 157 size_t Insert(size_t index, wchar_t ch); InsertAtFront(wchar_t ch)158 size_t InsertAtFront(wchar_t ch) { return Insert(0, ch); } InsertAtBack(wchar_t ch)159 size_t InsertAtBack(wchar_t ch) { return Insert(GetLength(), ch); } 160 size_t Delete(size_t index, size_t count = 1); 161 162 void MakeLower(); 163 void MakeUpper(); 164 165 void Trim(); 166 void Trim(wchar_t target); 167 void Trim(WideStringView targets); 168 169 void TrimLeft(); 170 void TrimLeft(wchar_t target); 171 void TrimLeft(WideStringView targets); 172 173 void TrimRight(); 174 void TrimRight(wchar_t target); 175 void TrimRight(WideStringView targets); 176 177 void Reserve(size_t len); 178 179 // Note: any modification of the string (including ReleaseBuffer()) may 180 // invalidate the span, which must not outlive its buffer. 181 pdfium::span<wchar_t> GetBuffer(size_t nMinBufLength); 182 void ReleaseBuffer(size_t nNewLength); 183 184 int GetInteger() const; 185 186 Optional<size_t> Find(WideStringView subStr, size_t start = 0) const; 187 Optional<size_t> Find(wchar_t ch, size_t start = 0) const; 188 Optional<size_t> ReverseFind(wchar_t ch) const; 189 190 bool Contains(WideStringView lpszSub, size_t start = 0) const { 191 return Find(lpszSub, start).has_value(); 192 } 193 194 bool Contains(char ch, size_t start = 0) const { 195 return Find(ch, start).has_value(); 196 } 197 198 size_t Replace(WideStringView pOld, WideStringView pNew); 199 size_t Remove(wchar_t ch); 200 IsASCII()201 bool IsASCII() const { return AsStringView().IsASCII(); } EqualsASCII(ByteStringView that)202 bool EqualsASCII(ByteStringView that) const { 203 return AsStringView().EqualsASCII(that); 204 } EqualsASCIINoCase(ByteStringView that)205 bool EqualsASCIINoCase(ByteStringView that) const { 206 return AsStringView().EqualsASCIINoCase(that); 207 } 208 209 ByteString ToASCII() const; 210 ByteString ToLatin1() const; 211 ByteString ToDefANSI() const; 212 ByteString ToUTF8() const; 213 214 // This method will add \0\0 to the end of the string to represent the 215 // wide string terminator. These values are in the string, not just the data, 216 // so GetLength() will include them. 217 ByteString ToUTF16LE() const; 218 219 protected: 220 using StringData = StringDataTemplate<wchar_t>; 221 222 void ReallocBeforeWrite(size_t nNewLength); 223 void AllocBeforeWrite(size_t nNewLength); 224 void AllocCopy(WideString& dest, size_t nCopyLen, size_t nCopyIndex) const; 225 void AssignCopy(const wchar_t* pSrcData, size_t nSrcLen); 226 void Concat(const wchar_t* pSrcData, size_t nSrcLen); 227 intptr_t ReferenceCountForTesting() const; 228 229 RetainPtr<StringData> m_pData; 230 231 friend class WideString_Assign_Test; 232 friend class WideString_ConcatInPlace_Test; 233 friend class WideString_Construct_Test; 234 friend class StringPool_WideString_Test; 235 }; 236 237 inline WideString operator+(WideStringView str1, WideStringView str2) { 238 return WideString(str1, str2); 239 } 240 inline WideString operator+(WideStringView str1, const wchar_t* str2) { 241 return WideString(str1, str2); 242 } 243 inline WideString operator+(const wchar_t* str1, WideStringView str2) { 244 return WideString(str1, str2); 245 } 246 inline WideString operator+(WideStringView str1, wchar_t ch) { 247 return WideString(str1, WideStringView(ch)); 248 } 249 inline WideString operator+(wchar_t ch, WideStringView str2) { 250 return WideString(ch, str2); 251 } 252 inline WideString operator+(const WideString& str1, const WideString& str2) { 253 return WideString(str1.AsStringView(), str2.AsStringView()); 254 } 255 inline WideString operator+(const WideString& str1, wchar_t ch) { 256 return WideString(str1.AsStringView(), WideStringView(ch)); 257 } 258 inline WideString operator+(wchar_t ch, const WideString& str2) { 259 return WideString(ch, str2.AsStringView()); 260 } 261 inline WideString operator+(const WideString& str1, const wchar_t* str2) { 262 return WideString(str1.AsStringView(), str2); 263 } 264 inline WideString operator+(const wchar_t* str1, const WideString& str2) { 265 return WideString(str1, str2.AsStringView()); 266 } 267 inline WideString operator+(const WideString& str1, WideStringView str2) { 268 return WideString(str1.AsStringView(), str2); 269 } 270 inline WideString operator+(WideStringView str1, const WideString& str2) { 271 return WideString(str1, str2.AsStringView()); 272 } 273 inline bool operator==(const wchar_t* lhs, const WideString& rhs) { 274 return rhs == lhs; 275 } 276 inline bool operator==(WideStringView lhs, const WideString& rhs) { 277 return rhs == lhs; 278 } 279 inline bool operator!=(const wchar_t* lhs, const WideString& rhs) { 280 return rhs != lhs; 281 } 282 inline bool operator!=(WideStringView lhs, const WideString& rhs) { 283 return rhs != lhs; 284 } 285 inline bool operator<(const wchar_t* lhs, const WideString& rhs) { 286 return rhs.Compare(lhs) > 0; 287 } 288 289 std::wostream& operator<<(std::wostream& os, const WideString& str); 290 std::ostream& operator<<(std::ostream& os, const WideString& str); 291 std::wostream& operator<<(std::wostream& os, WideStringView str); 292 std::ostream& operator<<(std::ostream& os, WideStringView str); 293 294 } // namespace fxcrt 295 296 using WideString = fxcrt::WideString; 297 298 uint32_t FX_HashCode_GetW(WideStringView str, bool bIgnoreCase); 299 300 namespace std { 301 302 template <> 303 struct hash<WideString> { 304 std::size_t operator()(const WideString& str) const { 305 return FX_HashCode_GetW(str.AsStringView(), false); 306 } 307 }; 308 309 } // namespace std 310 311 extern template struct std::hash<WideString>; 312 313 #endif // CORE_FXCRT_WIDESTRING_H_ 314