• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FXCRT_WIDESTRING_H_
8 #define CORE_FXCRT_WIDESTRING_H_
9 
10 #include <stdarg.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <wchar.h>
14 
15 #include <functional>
16 #include <iosfwd>
17 #include <iterator>
18 #include <utility>
19 
20 #include "core/fxcrt/retain_ptr.h"
21 #include "core/fxcrt/string_data_template.h"
22 #include "core/fxcrt/string_view_template.h"
23 #include "third_party/abseil-cpp/absl/types/optional.h"
24 #include "third_party/base/check.h"
25 #include "third_party/base/span.h"
26 
27 namespace fxcrt {
28 
29 class ByteString;
30 
31 // A mutable string with shared buffers using copy-on-write semantics that
32 // avoids the cost of std::string's iterator stability guarantees.
33 class WideString {
34  public:
35   using CharType = wchar_t;
36   using const_iterator = const CharType*;
37   using const_reverse_iterator = std::reverse_iterator<const_iterator>;
38 
39   [[nodiscard]] static WideString FormatInteger(int i);
40   [[nodiscard]] static WideString Format(const wchar_t* pFormat, ...);
41   [[nodiscard]] static WideString FormatV(const wchar_t* lpszFormat,
42                                           va_list argList);
43 
44   WideString();
45   WideString(const WideString& other);
46 
47   // Move-construct a WideString. After construction, |other| is empty.
48   WideString(WideString&& other) noexcept;
49 
50   // Make a one-character string from one wide char.
51   explicit WideString(wchar_t ch);
52 
53   // Deliberately implicit to avoid calling on every string literal.
54   // NOLINTNEXTLINE(runtime/explicit)
55   WideString(const wchar_t* ptr);
56 
57   // No implicit conversions from byte strings.
58   // NOLINTNEXTLINE(runtime/explicit)
59   WideString(char) = delete;
60 
61   WideString(const wchar_t* pStr, size_t len);
62 
63   explicit WideString(WideStringView str);
64   WideString(WideStringView str1, WideStringView str2);
65   WideString(const std::initializer_list<WideStringView>& list);
66 
67   ~WideString();
68 
69   [[nodiscard]] static WideString FromASCII(ByteStringView str);
70   [[nodiscard]] static WideString FromLatin1(ByteStringView str);
71   [[nodiscard]] static WideString FromDefANSI(ByteStringView str);
72   [[nodiscard]] static WideString FromUTF8(ByteStringView str);
73   [[nodiscard]] static WideString FromUTF16LE(const unsigned short* str,
74                                               size_t len);
75   [[nodiscard]] static WideString FromUTF16BE(const unsigned short* wstr,
76                                               size_t wlen);
77 
78   [[nodiscard]] static size_t WStringLength(const unsigned short* str);
79 
80   // Explicit conversion to C-style wide string.
81   // Note: Any subsequent modification of |this| will invalidate the result.
c_str()82   const wchar_t* c_str() const { return m_pData ? m_pData->m_String : L""; }
83 
84   // Explicit conversion to WideStringView.
85   // Note: Any subsequent modification of |this| will invalidate the result.
AsStringView()86   WideStringView AsStringView() const {
87     return WideStringView(c_str(), GetLength());
88   }
89 
90   // Explicit conversion to span.
91   // Note: Any subsequent modification of |this| will invalidate the result.
span()92   pdfium::span<const wchar_t> span() const {
93     return pdfium::make_span(m_pData ? m_pData->m_String : nullptr,
94                              GetLength());
95   }
96 
97   // Note: Any subsequent modification of |this| will invalidate iterators.
begin()98   const_iterator begin() const { return m_pData ? m_pData->m_String : nullptr; }
end()99   const_iterator end() const {
100     return m_pData ? m_pData->m_String + m_pData->m_nDataLength : nullptr;
101   }
102 
103   // Note: Any subsequent modification of |this| will invalidate iterators.
rbegin()104   const_reverse_iterator rbegin() const {
105     return const_reverse_iterator(end());
106   }
rend()107   const_reverse_iterator rend() const {
108     return const_reverse_iterator(begin());
109   }
110 
111   // Holds on to buffer if possible for later re-use. Assign WideString()
112   // to force immediate release if desired.
113   void clear();
114 
GetLength()115   size_t GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
GetStringLength()116   size_t GetStringLength() const {
117     return m_pData ? wcslen(m_pData->m_String) : 0;
118   }
IsEmpty()119   bool IsEmpty() const { return !GetLength(); }
IsValidIndex(size_t index)120   bool IsValidIndex(size_t index) const { return index < GetLength(); }
IsValidLength(size_t length)121   bool IsValidLength(size_t length) const { return length <= GetLength(); }
122 
123   WideString& operator=(const wchar_t* str);
124   WideString& operator=(WideStringView str);
125   WideString& operator=(const WideString& that);
126 
127   // Move-assign a WideString. After assignment, |that| is empty.
128   WideString& operator=(WideString&& that) noexcept;
129 
130   WideString& operator+=(const wchar_t* str);
131   WideString& operator+=(wchar_t ch);
132   WideString& operator+=(const WideString& str);
133   WideString& operator+=(WideStringView str);
134 
135   bool operator==(const wchar_t* ptr) const;
136   bool operator==(WideStringView str) const;
137   bool operator==(const WideString& other) const;
138 
139   bool operator!=(const wchar_t* ptr) const { return !(*this == ptr); }
140   bool operator!=(WideStringView str) const { return !(*this == str); }
141   bool operator!=(const WideString& other) const { return !(*this == other); }
142 
143   bool operator<(const wchar_t* ptr) const;
144   bool operator<(WideStringView str) const;
145   bool operator<(const WideString& other) const;
146 
147   CharType operator[](const size_t index) const {
148     CHECK(IsValidIndex(index));
149     return m_pData->m_String[index];
150   }
151 
Front()152   CharType Front() const { return GetLength() ? (*this)[0] : 0; }
Back()153   CharType Back() const { return GetLength() ? (*this)[GetLength() - 1] : 0; }
154 
155   void SetAt(size_t index, wchar_t c);
156 
157   int Compare(const wchar_t* str) const;
158   int Compare(const WideString& str) const;
159   int CompareNoCase(const wchar_t* str) const;
160 
161   WideString Substr(size_t offset) const;
162   WideString Substr(size_t first, size_t count) const;
163   WideString First(size_t count) const;
164   WideString Last(size_t count) const;
165 
166   size_t Insert(size_t index, wchar_t ch);
InsertAtFront(wchar_t ch)167   size_t InsertAtFront(wchar_t ch) { return Insert(0, ch); }
InsertAtBack(wchar_t ch)168   size_t InsertAtBack(wchar_t ch) { return Insert(GetLength(), ch); }
169   size_t Delete(size_t index, size_t count = 1);
170 
171   void MakeLower();
172   void MakeUpper();
173 
174   void Trim();
175   void Trim(wchar_t target);
176   void Trim(WideStringView targets);
177 
178   void TrimLeft();
179   void TrimLeft(wchar_t target);
180   void TrimLeft(WideStringView targets);
181 
182   void TrimRight();
183   void TrimRight(wchar_t target);
184   void TrimRight(WideStringView targets);
185 
186   void Reserve(size_t len);
187 
188   // Note: any modification of the string (including ReleaseBuffer()) may
189   // invalidate the span, which must not outlive its buffer.
190   pdfium::span<wchar_t> GetBuffer(size_t nMinBufLength);
191   void ReleaseBuffer(size_t nNewLength);
192 
193   int GetInteger() const;
194 
195   absl::optional<size_t> Find(WideStringView subStr, size_t start = 0) const;
196   absl::optional<size_t> Find(wchar_t ch, size_t start = 0) const;
197   absl::optional<size_t> ReverseFind(wchar_t ch) const;
198 
199   bool Contains(WideStringView lpszSub, size_t start = 0) const {
200     return Find(lpszSub, start).has_value();
201   }
202 
203   bool Contains(char ch, size_t start = 0) const {
204     return Find(ch, start).has_value();
205   }
206 
207   size_t Replace(WideStringView pOld, WideStringView pNew);
208   size_t Remove(wchar_t ch);
209 
IsASCII()210   bool IsASCII() const { return AsStringView().IsASCII(); }
EqualsASCII(ByteStringView that)211   bool EqualsASCII(ByteStringView that) const {
212     return AsStringView().EqualsASCII(that);
213   }
EqualsASCIINoCase(ByteStringView that)214   bool EqualsASCIINoCase(ByteStringView that) const {
215     return AsStringView().EqualsASCIINoCase(that);
216   }
217 
218   ByteString ToASCII() const;
219   ByteString ToLatin1() const;
220   ByteString ToDefANSI() const;
221   ByteString ToUTF8() const;
222 
223   // This method will add \0\0 to the end of the string to represent the
224   // wide string terminator. These values are in the string, not just the data,
225   // so GetLength() will include them.
226   ByteString ToUTF16LE() const;
227 
228   // Replace the characters &<>'" with HTML entities.
229   WideString EncodeEntities() const;
230 
231  protected:
232   using StringData = StringDataTemplate<wchar_t>;
233 
234   void ReallocBeforeWrite(size_t nNewLength);
235   void AllocBeforeWrite(size_t nNewLength);
236   void AllocCopy(WideString& dest, size_t nCopyLen, size_t nCopyIndex) const;
237   void AssignCopy(const wchar_t* pSrcData, size_t nSrcLen);
238   void Concat(const wchar_t* pSrcData, size_t nSrcLen);
239   intptr_t ReferenceCountForTesting() const;
240 
241   RetainPtr<StringData> m_pData;
242 
243   friend class WideString_Assign_Test;
244   friend class WideString_ConcatInPlace_Test;
245   friend class WideString_Construct_Test;
246   friend class StringPool_WideString_Test;
247 };
248 
249 inline WideString operator+(WideStringView str1, WideStringView str2) {
250   return WideString(str1, str2);
251 }
252 inline WideString operator+(WideStringView str1, const wchar_t* str2) {
253   return WideString(str1, str2);
254 }
255 inline WideString operator+(const wchar_t* str1, WideStringView str2) {
256   return WideString(str1, str2);
257 }
258 inline WideString operator+(WideStringView str1, wchar_t ch) {
259   return WideString(str1, WideStringView(ch));
260 }
261 inline WideString operator+(wchar_t ch, WideStringView str2) {
262   return WideString(WideStringView(ch), str2);
263 }
264 inline WideString operator+(const WideString& str1, const WideString& str2) {
265   return WideString(str1.AsStringView(), str2.AsStringView());
266 }
267 inline WideString operator+(const WideString& str1, wchar_t ch) {
268   return WideString(str1.AsStringView(), WideStringView(ch));
269 }
270 inline WideString operator+(wchar_t ch, const WideString& str2) {
271   return WideString(WideStringView(ch), str2.AsStringView());
272 }
273 inline WideString operator+(const WideString& str1, const wchar_t* str2) {
274   return WideString(str1.AsStringView(), str2);
275 }
276 inline WideString operator+(const wchar_t* str1, const WideString& str2) {
277   return WideString(str1, str2.AsStringView());
278 }
279 inline WideString operator+(const WideString& str1, WideStringView str2) {
280   return WideString(str1.AsStringView(), str2);
281 }
282 inline WideString operator+(WideStringView str1, const WideString& str2) {
283   return WideString(str1, str2.AsStringView());
284 }
285 inline bool operator==(const wchar_t* lhs, const WideString& rhs) {
286   return rhs == lhs;
287 }
288 inline bool operator==(WideStringView lhs, const WideString& rhs) {
289   return rhs == lhs;
290 }
291 inline bool operator!=(const wchar_t* lhs, const WideString& rhs) {
292   return rhs != lhs;
293 }
294 inline bool operator!=(WideStringView lhs, const WideString& rhs) {
295   return rhs != lhs;
296 }
297 inline bool operator<(const wchar_t* lhs, const WideString& rhs) {
298   return rhs.Compare(lhs) > 0;
299 }
300 
301 std::wostream& operator<<(std::wostream& os, const WideString& str);
302 std::ostream& operator<<(std::ostream& os, const WideString& str);
303 std::wostream& operator<<(std::wostream& os, WideStringView str);
304 std::ostream& operator<<(std::ostream& os, WideStringView str);
305 
306 }  // namespace fxcrt
307 
308 using WideString = fxcrt::WideString;
309 
310 uint32_t FX_HashCode_GetW(WideStringView str);
311 uint32_t FX_HashCode_GetLoweredW(WideStringView str);
312 
313 namespace std {
314 
315 template <>
316 struct hash<WideString> {
317   size_t operator()(const WideString& str) const {
318     return FX_HashCode_GetW(str.AsStringView());
319   }
320 };
321 
322 }  // namespace std
323 
324 extern template struct std::hash<WideString>;
325 
326 #endif  // CORE_FXCRT_WIDESTRING_H_
327