• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FXCRT_FX_STRING_H_
8 #define CORE_FXCRT_FX_STRING_H_
9 
10 #include <stdint.h>  // For intptr_t.
11 
12 #include <algorithm>
13 #include <functional>
14 
15 #include "core/fxcrt/cfx_retain_ptr.h"
16 #include "core/fxcrt/cfx_string_c_template.h"
17 #include "core/fxcrt/cfx_string_data_template.h"
18 #include "core/fxcrt/fx_memory.h"
19 #include "core/fxcrt/fx_system.h"
20 
21 class CFX_ByteString;
22 class CFX_WideString;
23 
24 using CFX_ByteStringC = CFX_StringCTemplate<FX_CHAR>;
25 using CFX_WideStringC = CFX_StringCTemplate<FX_WCHAR>;
26 
27 #define FXBSTR_ID(c1, c2, c3, c4)                                      \
28   (((uint32_t)c1 << 24) | ((uint32_t)c2 << 16) | ((uint32_t)c3 << 8) | \
29    ((uint32_t)c4))
30 
31 // A mutable string with shared buffers using copy-on-write semantics that
32 // avoids the cost of std::string's iterator stability guarantees.
33 class CFX_ByteString {
34  public:
35   using CharType = FX_CHAR;
36 
37   CFX_ByteString();
38   CFX_ByteString(const CFX_ByteString& other);
39   CFX_ByteString(CFX_ByteString&& other);
40 
41   // Deliberately implicit to avoid calling on every string literal.
42   // NOLINTNEXTLINE(runtime/explicit)
43   CFX_ByteString(char ch);
44   // NOLINTNEXTLINE(runtime/explicit)
45   CFX_ByteString(const FX_CHAR* ptr);
46 
47   CFX_ByteString(const FX_CHAR* ptr, FX_STRSIZE len);
48   CFX_ByteString(const uint8_t* ptr, FX_STRSIZE len);
49 
50   explicit CFX_ByteString(const CFX_ByteStringC& bstrc);
51   CFX_ByteString(const CFX_ByteStringC& bstrc1, const CFX_ByteStringC& bstrc2);
52 
53   ~CFX_ByteString();
54 
clear()55   void clear() { m_pData.Reset(); }
56 
57   static CFX_ByteString FromUnicode(const FX_WCHAR* ptr, FX_STRSIZE len = -1);
58   static CFX_ByteString FromUnicode(const CFX_WideString& str);
59 
60   // Explicit conversion to C-style string.
61   // Note: Any subsequent modification of |this| will invalidate the result.
c_str()62   const FX_CHAR* c_str() const { return m_pData ? m_pData->m_String : ""; }
63 
64   // Explicit conversion to uint8_t*.
65   // Note: Any subsequent modification of |this| will invalidate the result.
raw_str()66   const uint8_t* raw_str() const {
67     return m_pData ? reinterpret_cast<const uint8_t*>(m_pData->m_String)
68                    : nullptr;
69   }
70 
71   // Explicit conversion to CFX_ByteStringC.
72   // Note: Any subsequent modification of |this| will invalidate the result.
AsStringC()73   CFX_ByteStringC AsStringC() const {
74     return CFX_ByteStringC(raw_str(), GetLength());
75   }
76 
GetLength()77   FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
IsEmpty()78   bool IsEmpty() const { return !GetLength(); }
79 
80   int Compare(const CFX_ByteStringC& str) const;
81   bool EqualNoCase(const CFX_ByteStringC& str) const;
82 
83   bool operator==(const char* ptr) const;
84   bool operator==(const CFX_ByteStringC& str) const;
85   bool operator==(const CFX_ByteString& other) const;
86 
87   bool operator!=(const char* ptr) const { return !(*this == ptr); }
88   bool operator!=(const CFX_ByteStringC& str) const { return !(*this == str); }
89   bool operator!=(const CFX_ByteString& other) const {
90     return !(*this == other);
91   }
92 
93   bool operator<(const CFX_ByteString& str) const;
94 
95   const CFX_ByteString& operator=(const FX_CHAR* str);
96   const CFX_ByteString& operator=(const CFX_ByteStringC& bstrc);
97   const CFX_ByteString& operator=(const CFX_ByteString& stringSrc);
98 
99   const CFX_ByteString& operator+=(FX_CHAR ch);
100   const CFX_ByteString& operator+=(const FX_CHAR* str);
101   const CFX_ByteString& operator+=(const CFX_ByteString& str);
102   const CFX_ByteString& operator+=(const CFX_ByteStringC& bstrc);
103 
GetAt(FX_STRSIZE nIndex)104   uint8_t GetAt(FX_STRSIZE nIndex) const {
105     return m_pData ? m_pData->m_String[nIndex] : 0;
106   }
107 
108   uint8_t operator[](FX_STRSIZE nIndex) const {
109     return m_pData ? m_pData->m_String[nIndex] : 0;
110   }
111 
112   void SetAt(FX_STRSIZE nIndex, FX_CHAR ch);
113   FX_STRSIZE Insert(FX_STRSIZE index, FX_CHAR ch);
114   FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1);
115 
116   void Format(const FX_CHAR* lpszFormat, ...);
117   void FormatV(const FX_CHAR* lpszFormat, va_list argList);
118 
119   void Reserve(FX_STRSIZE len);
120   FX_CHAR* GetBuffer(FX_STRSIZE len);
121   void ReleaseBuffer(FX_STRSIZE len = -1);
122 
123   CFX_ByteString Mid(FX_STRSIZE first) const;
124   CFX_ByteString Mid(FX_STRSIZE first, FX_STRSIZE count) const;
125   CFX_ByteString Left(FX_STRSIZE count) const;
126   CFX_ByteString Right(FX_STRSIZE count) const;
127 
128   FX_STRSIZE Find(const CFX_ByteStringC& lpszSub, FX_STRSIZE start = 0) const;
129   FX_STRSIZE Find(FX_CHAR ch, FX_STRSIZE start = 0) const;
130   FX_STRSIZE ReverseFind(FX_CHAR ch) const;
131 
132   void MakeLower();
133   void MakeUpper();
134 
135   void TrimRight();
136   void TrimRight(FX_CHAR chTarget);
137   void TrimRight(const CFX_ByteStringC& lpszTargets);
138 
139   void TrimLeft();
140   void TrimLeft(FX_CHAR chTarget);
141   void TrimLeft(const CFX_ByteStringC& lpszTargets);
142 
143   FX_STRSIZE Replace(const CFX_ByteStringC& lpszOld,
144                      const CFX_ByteStringC& lpszNew);
145 
146   FX_STRSIZE Remove(FX_CHAR ch);
147 
148   CFX_WideString UTF8Decode() const;
149 
150   uint32_t GetID(FX_STRSIZE start_pos = 0) const;
151 
152 #define FXFORMAT_SIGNED 1
153 #define FXFORMAT_HEX 2
154 #define FXFORMAT_CAPITAL 4
155 
156   static CFX_ByteString FormatInteger(int i, uint32_t flags = 0);
157   static CFX_ByteString FormatFloat(FX_FLOAT f, int precision = 0);
158 
159  protected:
160   using StringData = CFX_StringDataTemplate<FX_CHAR>;
161 
162   void ReallocBeforeWrite(FX_STRSIZE nNewLen);
163   void AllocBeforeWrite(FX_STRSIZE nNewLen);
164   void AllocCopy(CFX_ByteString& dest,
165                  FX_STRSIZE nCopyLen,
166                  FX_STRSIZE nCopyIndex) const;
167   void AssignCopy(const FX_CHAR* pSrcData, FX_STRSIZE nSrcLen);
168   void Concat(const FX_CHAR* lpszSrcData, FX_STRSIZE nSrcLen);
169 
170   CFX_RetainPtr<StringData> m_pData;
171 
172   friend class fxcrt_ByteStringConcat_Test;
173   friend class fxcrt_ByteStringPool_Test;
174 };
175 
176 inline bool operator==(const char* lhs, const CFX_ByteString& rhs) {
177   return rhs == lhs;
178 }
179 inline bool operator==(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) {
180   return rhs == lhs;
181 }
182 inline bool operator!=(const char* lhs, const CFX_ByteString& rhs) {
183   return rhs != lhs;
184 }
185 inline bool operator!=(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) {
186   return rhs != lhs;
187 }
188 
189 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
190                                 const CFX_ByteStringC& str2) {
191   return CFX_ByteString(str1, str2);
192 }
193 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
194                                 const FX_CHAR* str2) {
195   return CFX_ByteString(str1, str2);
196 }
197 inline CFX_ByteString operator+(const FX_CHAR* str1,
198                                 const CFX_ByteStringC& str2) {
199   return CFX_ByteString(str1, str2);
200 }
201 inline CFX_ByteString operator+(const CFX_ByteStringC& str1, FX_CHAR ch) {
202   return CFX_ByteString(str1, CFX_ByteStringC(ch));
203 }
204 inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteStringC& str2) {
205   return CFX_ByteString(ch, str2);
206 }
207 inline CFX_ByteString operator+(const CFX_ByteString& str1,
208                                 const CFX_ByteString& str2) {
209   return CFX_ByteString(str1.AsStringC(), str2.AsStringC());
210 }
211 inline CFX_ByteString operator+(const CFX_ByteString& str1, FX_CHAR ch) {
212   return CFX_ByteString(str1.AsStringC(), CFX_ByteStringC(ch));
213 }
214 inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteString& str2) {
215   return CFX_ByteString(ch, str2.AsStringC());
216 }
217 inline CFX_ByteString operator+(const CFX_ByteString& str1,
218                                 const FX_CHAR* str2) {
219   return CFX_ByteString(str1.AsStringC(), str2);
220 }
221 inline CFX_ByteString operator+(const FX_CHAR* str1,
222                                 const CFX_ByteString& str2) {
223   return CFX_ByteString(str1, str2.AsStringC());
224 }
225 inline CFX_ByteString operator+(const CFX_ByteString& str1,
226                                 const CFX_ByteStringC& str2) {
227   return CFX_ByteString(str1.AsStringC(), str2);
228 }
229 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
230                                 const CFX_ByteString& str2) {
231   return CFX_ByteString(str1, str2.AsStringC());
232 }
233 
234 // A mutable string with shared buffers using copy-on-write semantics that
235 // avoids the cost of std::string's iterator stability guarantees.
236 class CFX_WideString {
237  public:
238   using CharType = FX_WCHAR;
239 
240   CFX_WideString();
241   CFX_WideString(const CFX_WideString& other);
242   CFX_WideString(CFX_WideString&& other);
243 
244   // Deliberately implicit to avoid calling on every string literal.
245   // NOLINTNEXTLINE(runtime/explicit)
246   CFX_WideString(FX_WCHAR ch);
247   // NOLINTNEXTLINE(runtime/explicit)
248   CFX_WideString(const FX_WCHAR* ptr);
249 
250   CFX_WideString(const FX_WCHAR* ptr, FX_STRSIZE len);
251 
252   explicit CFX_WideString(const CFX_WideStringC& str);
253   CFX_WideString(const CFX_WideStringC& str1, const CFX_WideStringC& str2);
254 
255   ~CFX_WideString();
256 
257   static CFX_WideString FromLocal(const CFX_ByteStringC& str);
258   static CFX_WideString FromCodePage(const CFX_ByteStringC& str,
259                                      uint16_t codepage);
260 
261   static CFX_WideString FromUTF8(const CFX_ByteStringC& str);
262   static CFX_WideString FromUTF16LE(const unsigned short* str, FX_STRSIZE len);
263 
264   static FX_STRSIZE WStringLength(const unsigned short* str);
265 
266   // Explicit conversion to C-style wide string.
267   // Note: Any subsequent modification of |this| will invalidate the result.
c_str()268   const FX_WCHAR* c_str() const { return m_pData ? m_pData->m_String : L""; }
269 
270   // Explicit conversion to CFX_WideStringC.
271   // Note: Any subsequent modification of |this| will invalidate the result.
AsStringC()272   CFX_WideStringC AsStringC() const {
273     return CFX_WideStringC(c_str(), GetLength());
274   }
275 
clear()276   void clear() { m_pData.Reset(); }
277 
GetLength()278   FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
IsEmpty()279   bool IsEmpty() const { return !GetLength(); }
280 
281   const CFX_WideString& operator=(const FX_WCHAR* str);
282   const CFX_WideString& operator=(const CFX_WideString& stringSrc);
283   const CFX_WideString& operator=(const CFX_WideStringC& stringSrc);
284 
285   const CFX_WideString& operator+=(const FX_WCHAR* str);
286   const CFX_WideString& operator+=(FX_WCHAR ch);
287   const CFX_WideString& operator+=(const CFX_WideString& str);
288   const CFX_WideString& operator+=(const CFX_WideStringC& str);
289 
290   bool operator==(const wchar_t* ptr) const;
291   bool operator==(const CFX_WideStringC& str) const;
292   bool operator==(const CFX_WideString& other) const;
293 
294   bool operator!=(const wchar_t* ptr) const { return !(*this == ptr); }
295   bool operator!=(const CFX_WideStringC& str) const { return !(*this == str); }
296   bool operator!=(const CFX_WideString& other) const {
297     return !(*this == other);
298   }
299 
300   bool operator<(const CFX_WideString& str) const;
301 
GetAt(FX_STRSIZE nIndex)302   FX_WCHAR GetAt(FX_STRSIZE nIndex) const {
303     return m_pData ? m_pData->m_String[nIndex] : 0;
304   }
305 
306   FX_WCHAR operator[](FX_STRSIZE nIndex) const {
307     return m_pData ? m_pData->m_String[nIndex] : 0;
308   }
309 
310   void SetAt(FX_STRSIZE nIndex, FX_WCHAR ch);
311 
312   int Compare(const FX_WCHAR* str) const;
313   int Compare(const CFX_WideString& str) const;
314   int CompareNoCase(const FX_WCHAR* str) const;
315 
316   CFX_WideString Mid(FX_STRSIZE first) const;
317   CFX_WideString Mid(FX_STRSIZE first, FX_STRSIZE count) const;
318   CFX_WideString Left(FX_STRSIZE count) const;
319   CFX_WideString Right(FX_STRSIZE count) const;
320 
321   FX_STRSIZE Insert(FX_STRSIZE index, FX_WCHAR ch);
322   FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1);
323 
324   void Format(const FX_WCHAR* lpszFormat, ...);
325   void FormatV(const FX_WCHAR* lpszFormat, va_list argList);
326 
327   void MakeLower();
328   void MakeUpper();
329 
330   void TrimRight();
331   void TrimRight(FX_WCHAR chTarget);
332   void TrimRight(const CFX_WideStringC& pTargets);
333 
334   void TrimLeft();
335   void TrimLeft(FX_WCHAR chTarget);
336   void TrimLeft(const CFX_WideStringC& pTargets);
337 
338   void Reserve(FX_STRSIZE len);
339   FX_WCHAR* GetBuffer(FX_STRSIZE len);
340   void ReleaseBuffer(FX_STRSIZE len = -1);
341 
342   int GetInteger() const;
343   FX_FLOAT GetFloat() const;
344 
345   FX_STRSIZE Find(const CFX_WideStringC& pSub, FX_STRSIZE start = 0) const;
346   FX_STRSIZE Find(FX_WCHAR ch, FX_STRSIZE start = 0) const;
347   FX_STRSIZE Replace(const CFX_WideStringC& pOld, const CFX_WideStringC& pNew);
348   FX_STRSIZE Remove(FX_WCHAR ch);
349 
350   CFX_ByteString UTF8Encode() const;
351   CFX_ByteString UTF16LE_Encode() const;
352 
353  protected:
354   using StringData = CFX_StringDataTemplate<FX_WCHAR>;
355 
356   void ReallocBeforeWrite(FX_STRSIZE nLen);
357   void AllocBeforeWrite(FX_STRSIZE nLen);
358   void AllocCopy(CFX_WideString& dest,
359                  FX_STRSIZE nCopyLen,
360                  FX_STRSIZE nCopyIndex) const;
361   void AssignCopy(const FX_WCHAR* pSrcData, FX_STRSIZE nSrcLen);
362   void Concat(const FX_WCHAR* lpszSrcData, FX_STRSIZE nSrcLen);
363 
364   CFX_RetainPtr<StringData> m_pData;
365 
366   friend class fxcrt_WideStringConcatInPlace_Test;
367   friend class fxcrt_WideStringPool_Test;
368 };
369 
370 inline CFX_WideString operator+(const CFX_WideStringC& str1,
371                                 const CFX_WideStringC& str2) {
372   return CFX_WideString(str1, str2);
373 }
374 inline CFX_WideString operator+(const CFX_WideStringC& str1,
375                                 const FX_WCHAR* str2) {
376   return CFX_WideString(str1, str2);
377 }
378 inline CFX_WideString operator+(const FX_WCHAR* str1,
379                                 const CFX_WideStringC& str2) {
380   return CFX_WideString(str1, str2);
381 }
382 inline CFX_WideString operator+(const CFX_WideStringC& str1, FX_WCHAR ch) {
383   return CFX_WideString(str1, CFX_WideStringC(ch));
384 }
385 inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideStringC& str2) {
386   return CFX_WideString(ch, str2);
387 }
388 inline CFX_WideString operator+(const CFX_WideString& str1,
389                                 const CFX_WideString& str2) {
390   return CFX_WideString(str1.AsStringC(), str2.AsStringC());
391 }
392 inline CFX_WideString operator+(const CFX_WideString& str1, FX_WCHAR ch) {
393   return CFX_WideString(str1.AsStringC(), CFX_WideStringC(ch));
394 }
395 inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideString& str2) {
396   return CFX_WideString(ch, str2.AsStringC());
397 }
398 inline CFX_WideString operator+(const CFX_WideString& str1,
399                                 const FX_WCHAR* str2) {
400   return CFX_WideString(str1.AsStringC(), str2);
401 }
402 inline CFX_WideString operator+(const FX_WCHAR* str1,
403                                 const CFX_WideString& str2) {
404   return CFX_WideString(str1, str2.AsStringC());
405 }
406 inline CFX_WideString operator+(const CFX_WideString& str1,
407                                 const CFX_WideStringC& str2) {
408   return CFX_WideString(str1.AsStringC(), str2);
409 }
410 inline CFX_WideString operator+(const CFX_WideStringC& str1,
411                                 const CFX_WideString& str2) {
412   return CFX_WideString(str1, str2.AsStringC());
413 }
414 inline bool operator==(const wchar_t* lhs, const CFX_WideString& rhs) {
415   return rhs == lhs;
416 }
417 inline bool operator==(const CFX_WideStringC& lhs, const CFX_WideString& rhs) {
418   return rhs == lhs;
419 }
420 inline bool operator!=(const wchar_t* lhs, const CFX_WideString& rhs) {
421   return rhs != lhs;
422 }
423 inline bool operator!=(const CFX_WideStringC& lhs, const CFX_WideString& rhs) {
424   return rhs != lhs;
425 }
426 
427 CFX_ByteString FX_UTF8Encode(const CFX_WideStringC& wsStr);
428 FX_FLOAT FX_atof(const CFX_ByteStringC& str);
FX_atof(const CFX_WideStringC & wsStr)429 inline FX_FLOAT FX_atof(const CFX_WideStringC& wsStr) {
430   return FX_atof(FX_UTF8Encode(wsStr).c_str());
431 }
432 bool FX_atonum(const CFX_ByteStringC& str, void* pData);
433 FX_STRSIZE FX_ftoa(FX_FLOAT f, FX_CHAR* buf);
434 
435 uint32_t FX_HashCode_GetA(const CFX_ByteStringC& str, bool bIgnoreCase);
436 uint32_t FX_HashCode_GetW(const CFX_WideStringC& str, bool bIgnoreCase);
437 
438 namespace std {
439 
440 template <>
441 struct hash<CFX_ByteString> {
442   std::size_t operator()(const CFX_ByteString& str) const {
443     return FX_HashCode_GetA(str.AsStringC(), false);
444   }
445 };
446 
447 template <>
448 struct hash<CFX_WideString> {
449   std::size_t operator()(const CFX_WideString& str) const {
450     return FX_HashCode_GetW(str.AsStringC(), false);
451   }
452 };
453 
454 }  // namespace std
455 
456 extern template struct std::hash<CFX_ByteString>;
457 extern template struct std::hash<CFX_WideString>;
458 
459 #endif  // CORE_FXCRT_FX_STRING_H_
460