• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/widestring.h"
8 
9 #include <stddef.h>
10 #include <string.h>
11 
12 #include <algorithm>
13 #include <sstream>
14 
15 #include "core/fxcrt/fx_codepage.h"
16 #include "core/fxcrt/fx_extension.h"
17 #include "core/fxcrt/fx_safe_types.h"
18 #include "core/fxcrt/fx_system.h"
19 #include "core/fxcrt/string_pool_template.h"
20 #include "third_party/base/check.h"
21 #include "third_party/base/check_op.h"
22 #include "third_party/base/cxx17_backports.h"
23 #include "third_party/base/numerics/safe_math.h"
24 
25 template class fxcrt::StringDataTemplate<wchar_t>;
26 template class fxcrt::StringViewTemplate<wchar_t>;
27 template class fxcrt::StringPoolTemplate<WideString>;
28 template struct std::hash<WideString>;
29 
30 #define FORCE_ANSI 0x10000
31 #define FORCE_UNICODE 0x20000
32 #define FORCE_INT64 0x40000
33 
34 namespace {
35 
36 constexpr wchar_t kWideTrimChars[] = L"\x09\x0a\x0b\x0c\x0d\x20";
37 
FX_wcsstr(const wchar_t * haystack,size_t haystack_len,const wchar_t * needle,size_t needle_len)38 const wchar_t* FX_wcsstr(const wchar_t* haystack,
39                          size_t haystack_len,
40                          const wchar_t* needle,
41                          size_t needle_len) {
42   if (needle_len > haystack_len || needle_len == 0)
43     return nullptr;
44 
45   const wchar_t* end_ptr = haystack + haystack_len - needle_len;
46   while (haystack <= end_ptr) {
47     size_t i = 0;
48     while (true) {
49       if (haystack[i] != needle[i])
50         break;
51 
52       i++;
53       if (i == needle_len)
54         return haystack;
55     }
56     haystack++;
57   }
58   return nullptr;
59 }
60 
GuessSizeForVSWPrintf(const wchar_t * pFormat,va_list argList)61 absl::optional<size_t> GuessSizeForVSWPrintf(const wchar_t* pFormat,
62                                              va_list argList) {
63   size_t nMaxLen = 0;
64   for (const wchar_t* pStr = pFormat; *pStr != 0; pStr++) {
65     if (*pStr != '%' || *(pStr = pStr + 1) == '%') {
66       ++nMaxLen;
67       continue;
68     }
69     int iWidth = 0;
70     for (; *pStr != 0; pStr++) {
71       if (*pStr == '#') {
72         nMaxLen += 2;
73       } else if (*pStr == '*') {
74         iWidth = va_arg(argList, int);
75       } else if (*pStr != '-' && *pStr != '+' && *pStr != '0' && *pStr != ' ') {
76         break;
77       }
78     }
79     if (iWidth == 0) {
80       iWidth = FXSYS_wtoi(pStr);
81       while (FXSYS_IsDecimalDigit(*pStr))
82         ++pStr;
83     }
84     if (iWidth < 0 || iWidth > 128 * 1024)
85       return absl::nullopt;
86     uint32_t nWidth = static_cast<uint32_t>(iWidth);
87     int iPrecision = 0;
88     if (*pStr == '.') {
89       pStr++;
90       if (*pStr == '*') {
91         iPrecision = va_arg(argList, int);
92         pStr++;
93       } else {
94         iPrecision = FXSYS_wtoi(pStr);
95         while (FXSYS_IsDecimalDigit(*pStr))
96           ++pStr;
97       }
98     }
99     if (iPrecision < 0 || iPrecision > 128 * 1024)
100       return absl::nullopt;
101     uint32_t nPrecision = static_cast<uint32_t>(iPrecision);
102     int nModifier = 0;
103     if (*pStr == L'I' && *(pStr + 1) == L'6' && *(pStr + 2) == L'4') {
104       pStr += 3;
105       nModifier = FORCE_INT64;
106     } else {
107       switch (*pStr) {
108         case 'h':
109           nModifier = FORCE_ANSI;
110           pStr++;
111           break;
112         case 'l':
113           nModifier = FORCE_UNICODE;
114           pStr++;
115           break;
116         case 'F':
117         case 'N':
118         case 'L':
119           pStr++;
120           break;
121       }
122     }
123     size_t nItemLen = 0;
124     switch (*pStr | nModifier) {
125       case 'c':
126       case 'C':
127         nItemLen = 2;
128         va_arg(argList, int);
129         break;
130       case 'c' | FORCE_ANSI:
131       case 'C' | FORCE_ANSI:
132         nItemLen = 2;
133         va_arg(argList, int);
134         break;
135       case 'c' | FORCE_UNICODE:
136       case 'C' | FORCE_UNICODE:
137         nItemLen = 2;
138         va_arg(argList, int);
139         break;
140       case 's': {
141         const wchar_t* pstrNextArg = va_arg(argList, const wchar_t*);
142         if (pstrNextArg) {
143           nItemLen = wcslen(pstrNextArg);
144           if (nItemLen < 1) {
145             nItemLen = 1;
146           }
147         } else {
148           nItemLen = 6;
149         }
150       } break;
151       case 'S': {
152         const char* pstrNextArg = va_arg(argList, const char*);
153         if (pstrNextArg) {
154           nItemLen = strlen(pstrNextArg);
155           if (nItemLen < 1) {
156             nItemLen = 1;
157           }
158         } else {
159           nItemLen = 6;
160         }
161       } break;
162       case 's' | FORCE_ANSI:
163       case 'S' | FORCE_ANSI: {
164         const char* pstrNextArg = va_arg(argList, const char*);
165         if (pstrNextArg) {
166           nItemLen = strlen(pstrNextArg);
167           if (nItemLen < 1) {
168             nItemLen = 1;
169           }
170         } else {
171           nItemLen = 6;
172         }
173       } break;
174       case 's' | FORCE_UNICODE:
175       case 'S' | FORCE_UNICODE: {
176         const wchar_t* pstrNextArg = va_arg(argList, wchar_t*);
177         if (pstrNextArg) {
178           nItemLen = wcslen(pstrNextArg);
179           if (nItemLen < 1) {
180             nItemLen = 1;
181           }
182         } else {
183           nItemLen = 6;
184         }
185       } break;
186     }
187     if (nItemLen != 0) {
188       if (nPrecision != 0 && nItemLen > nPrecision) {
189         nItemLen = nPrecision;
190       }
191       if (nItemLen < nWidth) {
192         nItemLen = nWidth;
193       }
194     } else {
195       switch (*pStr) {
196         case 'd':
197         case 'i':
198         case 'u':
199         case 'x':
200         case 'X':
201         case 'o':
202           if (nModifier & FORCE_INT64) {
203             va_arg(argList, int64_t);
204           } else {
205             va_arg(argList, int);
206           }
207           nItemLen = 32;
208           if (nItemLen < nWidth + nPrecision) {
209             nItemLen = nWidth + nPrecision;
210           }
211           break;
212         case 'a':
213         case 'A':
214         case 'e':
215         case 'E':
216         case 'g':
217         case 'G':
218           va_arg(argList, double);
219           nItemLen = 128;
220           if (nItemLen < nWidth + nPrecision) {
221             nItemLen = nWidth + nPrecision;
222           }
223           break;
224         case 'f':
225           if (nWidth + nPrecision > 100) {
226             nItemLen = nPrecision + nWidth + 128;
227           } else {
228             double f;
229             char pszTemp[256];
230             f = va_arg(argList, double);
231             FXSYS_snprintf(pszTemp, sizeof(pszTemp), "%*.*f", nWidth,
232                            nPrecision + 6, f);
233             nItemLen = strlen(pszTemp);
234           }
235           break;
236         case 'p':
237           va_arg(argList, void*);
238           nItemLen = 32;
239           if (nItemLen < nWidth + nPrecision) {
240             nItemLen = nWidth + nPrecision;
241           }
242           break;
243         case 'n':
244           va_arg(argList, int*);
245           break;
246       }
247     }
248     nMaxLen += nItemLen;
249   }
250   nMaxLen += 32;  // Fudge factor.
251   return nMaxLen;
252 }
253 
254 // Returns string unless we ran out of space.
TryVSWPrintf(size_t size,const wchar_t * pFormat,va_list argList)255 absl::optional<WideString> TryVSWPrintf(size_t size,
256                                         const wchar_t* pFormat,
257                                         va_list argList) {
258   if (!size)
259     return absl::nullopt;
260 
261   WideString str;
262   {
263     // Span's lifetime must end before ReleaseBuffer() below.
264     pdfium::span<wchar_t> buffer = str.GetBuffer(size);
265 
266     // In the following two calls, there's always space in the WideString
267     // for a terminating NUL that's not included in the span.
268     // For vswprintf(), MSAN won't untaint the buffer on a truncated write's
269     // -1 return code even though the buffer is written. Probably just as well
270     // not to trust the vendor's implementation to write anything anyways.
271     // See https://crbug.com/705912.
272     memset(buffer.data(), 0, (size + 1) * sizeof(wchar_t));
273     int ret = vswprintf(buffer.data(), size + 1, pFormat, argList);
274 
275     bool bSufficientBuffer = ret >= 0 || buffer[size - 1] == 0;
276     if (!bSufficientBuffer)
277       return absl::nullopt;
278   }
279   str.ReleaseBuffer(str.GetStringLength());
280   return str;
281 }
282 
283 }  // namespace
284 
285 namespace fxcrt {
286 
287 static_assert(sizeof(WideString) <= sizeof(wchar_t*),
288               "Strings must not require more space than pointers");
289 
290 // static
FormatInteger(int i)291 WideString WideString::FormatInteger(int i) {
292   wchar_t wbuf[32];
293   swprintf(wbuf, std::size(wbuf), L"%d", i);
294   return WideString(wbuf);
295 }
296 
297 // static
FormatV(const wchar_t * format,va_list argList)298 WideString WideString::FormatV(const wchar_t* format, va_list argList) {
299   va_list argListCopy;
300   va_copy(argListCopy, argList);
301   auto guess = GuessSizeForVSWPrintf(format, argListCopy);
302   va_end(argListCopy);
303 
304   if (!guess.has_value()) {
305     return WideString();
306   }
307   int maxLen = pdfium::base::checked_cast<int>(guess.value());
308 
309   while (maxLen < 32 * 1024) {
310     va_copy(argListCopy, argList);
311     absl::optional<WideString> ret =
312         TryVSWPrintf(static_cast<size_t>(maxLen), format, argListCopy);
313     va_end(argListCopy);
314     if (ret.has_value())
315       return ret.value();
316 
317     maxLen *= 2;
318   }
319   return WideString();
320 }
321 
322 // static
Format(const wchar_t * pFormat,...)323 WideString WideString::Format(const wchar_t* pFormat, ...) {
324   va_list argList;
325   va_start(argList, pFormat);
326   WideString ret = FormatV(pFormat, argList);
327   va_end(argList);
328   return ret;
329 }
330 
331 WideString::WideString() = default;
332 
WideString(const WideString & other)333 WideString::WideString(const WideString& other) : m_pData(other.m_pData) {}
334 
WideString(WideString && other)335 WideString::WideString(WideString&& other) noexcept {
336   m_pData.Swap(other.m_pData);
337 }
338 
WideString(const wchar_t * pStr,size_t nLen)339 WideString::WideString(const wchar_t* pStr, size_t nLen) {
340   if (nLen)
341     m_pData.Reset(StringData::Create(pStr, nLen));
342 }
343 
WideString(wchar_t ch)344 WideString::WideString(wchar_t ch) {
345   m_pData.Reset(StringData::Create(1));
346   m_pData->m_String[0] = ch;
347 }
348 
WideString(const wchar_t * ptr)349 WideString::WideString(const wchar_t* ptr)
350     : WideString(ptr, ptr ? wcslen(ptr) : 0) {}
351 
WideString(WideStringView stringSrc)352 WideString::WideString(WideStringView stringSrc) {
353   if (!stringSrc.IsEmpty()) {
354     m_pData.Reset(StringData::Create(stringSrc.unterminated_c_str(),
355                                      stringSrc.GetLength()));
356   }
357 }
358 
WideString(WideStringView str1,WideStringView str2)359 WideString::WideString(WideStringView str1, WideStringView str2) {
360   FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
361   nSafeLen += str2.GetLength();
362 
363   size_t nNewLen = nSafeLen.ValueOrDie();
364   if (nNewLen == 0)
365     return;
366 
367   m_pData.Reset(StringData::Create(nNewLen));
368   m_pData->CopyContents(str1.unterminated_c_str(), str1.GetLength());
369   m_pData->CopyContentsAt(str1.GetLength(), str2.unterminated_c_str(),
370                           str2.GetLength());
371 }
372 
WideString(const std::initializer_list<WideStringView> & list)373 WideString::WideString(const std::initializer_list<WideStringView>& list) {
374   FX_SAFE_SIZE_T nSafeLen = 0;
375   for (const auto& item : list)
376     nSafeLen += item.GetLength();
377 
378   size_t nNewLen = nSafeLen.ValueOrDie();
379   if (nNewLen == 0)
380     return;
381 
382   m_pData.Reset(StringData::Create(nNewLen));
383 
384   size_t nOffset = 0;
385   for (const auto& item : list) {
386     m_pData->CopyContentsAt(nOffset, item.unterminated_c_str(),
387                             item.GetLength());
388     nOffset += item.GetLength();
389   }
390 }
391 
392 WideString::~WideString() = default;
393 
clear()394 void WideString::clear() {
395   if (m_pData && m_pData->CanOperateInPlace(0)) {
396     m_pData->m_nDataLength = 0;
397     return;
398   }
399   m_pData.Reset();
400 }
401 
operator =(const wchar_t * str)402 WideString& WideString::operator=(const wchar_t* str) {
403   if (!str || !str[0])
404     clear();
405   else
406     AssignCopy(str, wcslen(str));
407 
408   return *this;
409 }
410 
operator =(WideStringView str)411 WideString& WideString::operator=(WideStringView str) {
412   if (str.IsEmpty())
413     clear();
414   else
415     AssignCopy(str.unterminated_c_str(), str.GetLength());
416 
417   return *this;
418 }
419 
operator =(const WideString & that)420 WideString& WideString::operator=(const WideString& that) {
421   if (m_pData != that.m_pData)
422     m_pData = that.m_pData;
423 
424   return *this;
425 }
426 
operator =(WideString && that)427 WideString& WideString::operator=(WideString&& that) noexcept {
428   if (m_pData != that.m_pData)
429     m_pData = std::move(that.m_pData);
430 
431   return *this;
432 }
433 
operator +=(const wchar_t * str)434 WideString& WideString::operator+=(const wchar_t* str) {
435   if (str)
436     Concat(str, wcslen(str));
437 
438   return *this;
439 }
440 
operator +=(wchar_t ch)441 WideString& WideString::operator+=(wchar_t ch) {
442   Concat(&ch, 1);
443   return *this;
444 }
445 
operator +=(const WideString & str)446 WideString& WideString::operator+=(const WideString& str) {
447   if (str.m_pData)
448     Concat(str.m_pData->m_String, str.m_pData->m_nDataLength);
449 
450   return *this;
451 }
452 
operator +=(WideStringView str)453 WideString& WideString::operator+=(WideStringView str) {
454   if (!str.IsEmpty())
455     Concat(str.unterminated_c_str(), str.GetLength());
456 
457   return *this;
458 }
459 
operator ==(const wchar_t * ptr) const460 bool WideString::operator==(const wchar_t* ptr) const {
461   if (!m_pData)
462     return !ptr || !ptr[0];
463 
464   if (!ptr)
465     return m_pData->m_nDataLength == 0;
466 
467   return wcslen(ptr) == m_pData->m_nDataLength &&
468          wmemcmp(ptr, m_pData->m_String, m_pData->m_nDataLength) == 0;
469 }
470 
operator ==(WideStringView str) const471 bool WideString::operator==(WideStringView str) const {
472   if (!m_pData)
473     return str.IsEmpty();
474 
475   return m_pData->m_nDataLength == str.GetLength() &&
476          wmemcmp(m_pData->m_String, str.unterminated_c_str(),
477                  str.GetLength()) == 0;
478 }
479 
operator ==(const WideString & other) const480 bool WideString::operator==(const WideString& other) const {
481   if (m_pData == other.m_pData)
482     return true;
483 
484   if (IsEmpty())
485     return other.IsEmpty();
486 
487   if (other.IsEmpty())
488     return false;
489 
490   return other.m_pData->m_nDataLength == m_pData->m_nDataLength &&
491          wmemcmp(other.m_pData->m_String, m_pData->m_String,
492                  m_pData->m_nDataLength) == 0;
493 }
494 
operator <(const wchar_t * ptr) const495 bool WideString::operator<(const wchar_t* ptr) const {
496   return Compare(ptr) < 0;
497 }
498 
operator <(WideStringView str) const499 bool WideString::operator<(WideStringView str) const {
500   if (!m_pData && !str.unterminated_c_str())
501     return false;
502   if (c_str() == str.unterminated_c_str())
503     return false;
504 
505   size_t len = GetLength();
506   size_t other_len = str.GetLength();
507   int result =
508       wmemcmp(c_str(), str.unterminated_c_str(), std::min(len, other_len));
509   return result < 0 || (result == 0 && len < other_len);
510 }
511 
operator <(const WideString & other) const512 bool WideString::operator<(const WideString& other) const {
513   return Compare(other) < 0;
514 }
515 
AssignCopy(const wchar_t * pSrcData,size_t nSrcLen)516 void WideString::AssignCopy(const wchar_t* pSrcData, size_t nSrcLen) {
517   AllocBeforeWrite(nSrcLen);
518   m_pData->CopyContents(pSrcData, nSrcLen);
519   m_pData->m_nDataLength = nSrcLen;
520 }
521 
ReallocBeforeWrite(size_t nNewLength)522 void WideString::ReallocBeforeWrite(size_t nNewLength) {
523   if (m_pData && m_pData->CanOperateInPlace(nNewLength))
524     return;
525 
526   if (nNewLength == 0) {
527     clear();
528     return;
529   }
530 
531   RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
532   if (m_pData) {
533     size_t nCopyLength = std::min(m_pData->m_nDataLength, nNewLength);
534     pNewData->CopyContents(m_pData->m_String, nCopyLength);
535     pNewData->m_nDataLength = nCopyLength;
536   } else {
537     pNewData->m_nDataLength = 0;
538   }
539   pNewData->m_String[pNewData->m_nDataLength] = 0;
540   m_pData.Swap(pNewData);
541 }
542 
AllocBeforeWrite(size_t nNewLength)543 void WideString::AllocBeforeWrite(size_t nNewLength) {
544   if (m_pData && m_pData->CanOperateInPlace(nNewLength))
545     return;
546 
547   if (nNewLength == 0) {
548     clear();
549     return;
550   }
551 
552   m_pData.Reset(StringData::Create(nNewLength));
553 }
554 
ReleaseBuffer(size_t nNewLength)555 void WideString::ReleaseBuffer(size_t nNewLength) {
556   if (!m_pData)
557     return;
558 
559   nNewLength = std::min(nNewLength, m_pData->m_nAllocLength);
560   if (nNewLength == 0) {
561     clear();
562     return;
563   }
564 
565   DCHECK_EQ(m_pData->m_nRefs, 1);
566   m_pData->m_nDataLength = nNewLength;
567   m_pData->m_String[nNewLength] = 0;
568   if (m_pData->m_nAllocLength - nNewLength >= 32) {
569     // Over arbitrary threshold, so pay the price to relocate.  Force copy to
570     // always occur by holding a second reference to the string.
571     WideString preserve(*this);
572     ReallocBeforeWrite(nNewLength);
573   }
574 }
575 
Reserve(size_t len)576 void WideString::Reserve(size_t len) {
577   GetBuffer(len);
578 }
579 
GetBuffer(size_t nMinBufLength)580 pdfium::span<wchar_t> WideString::GetBuffer(size_t nMinBufLength) {
581   if (!m_pData) {
582     if (nMinBufLength == 0)
583       return pdfium::span<wchar_t>();
584 
585     m_pData.Reset(StringData::Create(nMinBufLength));
586     m_pData->m_nDataLength = 0;
587     m_pData->m_String[0] = 0;
588     return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
589   }
590 
591   if (m_pData->CanOperateInPlace(nMinBufLength))
592     return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
593 
594   nMinBufLength = std::max(nMinBufLength, m_pData->m_nDataLength);
595   if (nMinBufLength == 0)
596     return pdfium::span<wchar_t>();
597 
598   RetainPtr<StringData> pNewData(StringData::Create(nMinBufLength));
599   pNewData->CopyContents(*m_pData);
600   pNewData->m_nDataLength = m_pData->m_nDataLength;
601   m_pData.Swap(pNewData);
602   return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
603 }
604 
Delete(size_t index,size_t count)605 size_t WideString::Delete(size_t index, size_t count) {
606   if (!m_pData)
607     return 0;
608 
609   size_t old_length = m_pData->m_nDataLength;
610   if (count == 0 || index != pdfium::clamp<size_t>(index, 0, old_length))
611     return old_length;
612 
613   size_t removal_length = index + count;
614   if (removal_length > old_length)
615     return old_length;
616 
617   ReallocBeforeWrite(old_length);
618   size_t chars_to_copy = old_length - removal_length + 1;
619   wmemmove(m_pData->m_String + index, m_pData->m_String + removal_length,
620            chars_to_copy);
621   m_pData->m_nDataLength = old_length - count;
622   return m_pData->m_nDataLength;
623 }
624 
Concat(const wchar_t * pSrcData,size_t nSrcLen)625 void WideString::Concat(const wchar_t* pSrcData, size_t nSrcLen) {
626   if (!pSrcData || nSrcLen == 0)
627     return;
628 
629   if (!m_pData) {
630     m_pData.Reset(StringData::Create(pSrcData, nSrcLen));
631     return;
632   }
633 
634   if (m_pData->CanOperateInPlace(m_pData->m_nDataLength + nSrcLen)) {
635     m_pData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
636     m_pData->m_nDataLength += nSrcLen;
637     return;
638   }
639 
640   size_t nConcatLen = std::max(m_pData->m_nDataLength / 2, nSrcLen);
641   RetainPtr<StringData> pNewData(
642       StringData::Create(m_pData->m_nDataLength + nConcatLen));
643   pNewData->CopyContents(*m_pData);
644   pNewData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
645   pNewData->m_nDataLength = m_pData->m_nDataLength + nSrcLen;
646   m_pData.Swap(pNewData);
647 }
648 
ReferenceCountForTesting() const649 intptr_t WideString::ReferenceCountForTesting() const {
650   return m_pData ? m_pData->m_nRefs : 0;
651 }
652 
ToASCII() const653 ByteString WideString::ToASCII() const {
654   ByteString result;
655   result.Reserve(GetLength());
656   for (wchar_t wc : *this)
657     result.InsertAtBack(static_cast<char>(wc & 0x7f));
658   return result;
659 }
660 
ToLatin1() const661 ByteString WideString::ToLatin1() const {
662   ByteString result;
663   result.Reserve(GetLength());
664   for (wchar_t wc : *this)
665     result.InsertAtBack(static_cast<char>(wc & 0xff));
666   return result;
667 }
668 
ToDefANSI() const669 ByteString WideString::ToDefANSI() const {
670   size_t dest_len =
671       FX_WideCharToMultiByte(FX_CodePage::kDefANSI, AsStringView(), {});
672   if (!dest_len)
673     return ByteString();
674 
675   ByteString bstr;
676   {
677     // Span's lifetime must end before ReleaseBuffer() below.
678     pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len);
679     FX_WideCharToMultiByte(FX_CodePage::kDefANSI, AsStringView(), dest_buf);
680   }
681   bstr.ReleaseBuffer(dest_len);
682   return bstr;
683 }
684 
ToUTF8() const685 ByteString WideString::ToUTF8() const {
686   return FX_UTF8Encode(AsStringView());
687 }
688 
ToUTF16LE() const689 ByteString WideString::ToUTF16LE() const {
690   if (!m_pData)
691     return ByteString("\0\0", 2);
692 
693   ByteString result;
694   size_t len = m_pData->m_nDataLength;
695   {
696     // Span's lifetime must end before ReleaseBuffer() below.
697     pdfium::span<char> buffer = result.GetBuffer(len * 2 + 2);
698     for (size_t i = 0; i < len; i++) {
699       buffer[i * 2] = m_pData->m_String[i] & 0xff;
700       buffer[i * 2 + 1] = m_pData->m_String[i] >> 8;
701     }
702     buffer[len * 2] = 0;
703     buffer[len * 2 + 1] = 0;
704   }
705   result.ReleaseBuffer(len * 2 + 2);
706   return result;
707 }
708 
EncodeEntities() const709 WideString WideString::EncodeEntities() const {
710   WideString ret = *this;
711   ret.Replace(L"&", L"&amp;");
712   ret.Replace(L"<", L"&lt;");
713   ret.Replace(L">", L"&gt;");
714   ret.Replace(L"\'", L"&apos;");
715   ret.Replace(L"\"", L"&quot;");
716   return ret;
717 }
718 
Substr(size_t offset) const719 WideString WideString::Substr(size_t offset) const {
720   // Unsigned underflow is well-defined and out-of-range is handled by Substr().
721   return Substr(offset, GetLength() - offset);
722 }
723 
Substr(size_t first,size_t count) const724 WideString WideString::Substr(size_t first, size_t count) const {
725   if (!m_pData)
726     return WideString();
727 
728   if (!IsValidIndex(first))
729     return WideString();
730 
731   if (count == 0 || !IsValidLength(count))
732     return WideString();
733 
734   if (!IsValidIndex(first + count - 1))
735     return WideString();
736 
737   if (first == 0 && count == GetLength())
738     return *this;
739 
740   WideString dest;
741   AllocCopy(dest, count, first);
742   return dest;
743 }
744 
First(size_t count) const745 WideString WideString::First(size_t count) const {
746   return Substr(0, count);
747 }
748 
Last(size_t count) const749 WideString WideString::Last(size_t count) const {
750   // Unsigned underflow is well-defined and out-of-range is handled by Substr().
751   return Substr(GetLength() - count, count);
752 }
753 
AllocCopy(WideString & dest,size_t nCopyLen,size_t nCopyIndex) const754 void WideString::AllocCopy(WideString& dest,
755                            size_t nCopyLen,
756                            size_t nCopyIndex) const {
757   if (nCopyLen == 0)
758     return;
759 
760   RetainPtr<StringData> pNewData(
761       StringData::Create(m_pData->m_String + nCopyIndex, nCopyLen));
762   dest.m_pData.Swap(pNewData);
763 }
764 
Insert(size_t index,wchar_t ch)765 size_t WideString::Insert(size_t index, wchar_t ch) {
766   const size_t cur_length = GetLength();
767   if (!IsValidLength(index))
768     return cur_length;
769 
770   const size_t new_length = cur_length + 1;
771   ReallocBeforeWrite(new_length);
772   wmemmove(m_pData->m_String + index + 1, m_pData->m_String + index,
773            new_length - index);
774   m_pData->m_String[index] = ch;
775   m_pData->m_nDataLength = new_length;
776   return new_length;
777 }
778 
Find(wchar_t ch,size_t start) const779 absl::optional<size_t> WideString::Find(wchar_t ch, size_t start) const {
780   if (!m_pData)
781     return absl::nullopt;
782 
783   if (!IsValidIndex(start))
784     return absl::nullopt;
785 
786   const wchar_t* pStr =
787       wmemchr(m_pData->m_String + start, ch, m_pData->m_nDataLength - start);
788   return pStr ? absl::optional<size_t>(
789                     static_cast<size_t>(pStr - m_pData->m_String))
790               : absl::nullopt;
791 }
792 
Find(WideStringView subStr,size_t start) const793 absl::optional<size_t> WideString::Find(WideStringView subStr,
794                                         size_t start) const {
795   if (!m_pData)
796     return absl::nullopt;
797 
798   if (!IsValidIndex(start))
799     return absl::nullopt;
800 
801   const wchar_t* pStr =
802       FX_wcsstr(m_pData->m_String + start, m_pData->m_nDataLength - start,
803                 subStr.unterminated_c_str(), subStr.GetLength());
804   return pStr ? absl::optional<size_t>(
805                     static_cast<size_t>(pStr - m_pData->m_String))
806               : absl::nullopt;
807 }
808 
ReverseFind(wchar_t ch) const809 absl::optional<size_t> WideString::ReverseFind(wchar_t ch) const {
810   if (!m_pData)
811     return absl::nullopt;
812 
813   size_t nLength = m_pData->m_nDataLength;
814   while (nLength--) {
815     if (m_pData->m_String[nLength] == ch)
816       return nLength;
817   }
818   return absl::nullopt;
819 }
820 
MakeLower()821 void WideString::MakeLower() {
822   if (IsEmpty())
823     return;
824 
825   ReallocBeforeWrite(m_pData->m_nDataLength);
826   FXSYS_wcslwr(m_pData->m_String);
827 }
828 
MakeUpper()829 void WideString::MakeUpper() {
830   if (IsEmpty())
831     return;
832 
833   ReallocBeforeWrite(m_pData->m_nDataLength);
834   FXSYS_wcsupr(m_pData->m_String);
835 }
836 
Remove(wchar_t chRemove)837 size_t WideString::Remove(wchar_t chRemove) {
838   if (IsEmpty())
839     return 0;
840 
841   wchar_t* pstrSource = m_pData->m_String;
842   wchar_t* pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
843   while (pstrSource < pstrEnd) {
844     if (*pstrSource == chRemove)
845       break;
846     pstrSource++;
847   }
848   if (pstrSource == pstrEnd)
849     return 0;
850 
851   ptrdiff_t copied = pstrSource - m_pData->m_String;
852   ReallocBeforeWrite(m_pData->m_nDataLength);
853   pstrSource = m_pData->m_String + copied;
854   pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
855 
856   wchar_t* pstrDest = pstrSource;
857   while (pstrSource < pstrEnd) {
858     if (*pstrSource != chRemove) {
859       *pstrDest = *pstrSource;
860       pstrDest++;
861     }
862     pstrSource++;
863   }
864 
865   *pstrDest = 0;
866   size_t count = static_cast<size_t>(pstrSource - pstrDest);
867   m_pData->m_nDataLength -= count;
868   return count;
869 }
870 
Replace(WideStringView pOld,WideStringView pNew)871 size_t WideString::Replace(WideStringView pOld, WideStringView pNew) {
872   if (!m_pData || pOld.IsEmpty())
873     return 0;
874 
875   size_t nSourceLen = pOld.GetLength();
876   size_t nReplacementLen = pNew.GetLength();
877   size_t count = 0;
878   const wchar_t* pStart = m_pData->m_String;
879   wchar_t* pEnd = m_pData->m_String + m_pData->m_nDataLength;
880   while (true) {
881     const wchar_t* pTarget =
882         FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
883                   pOld.unterminated_c_str(), nSourceLen);
884     if (!pTarget)
885       break;
886 
887     count++;
888     pStart = pTarget + nSourceLen;
889   }
890   if (count == 0)
891     return 0;
892 
893   size_t nNewLength =
894       m_pData->m_nDataLength + (nReplacementLen - nSourceLen) * count;
895 
896   if (nNewLength == 0) {
897     clear();
898     return count;
899   }
900 
901   RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
902   pStart = m_pData->m_String;
903   wchar_t* pDest = pNewData->m_String;
904   for (size_t i = 0; i < count; i++) {
905     const wchar_t* pTarget =
906         FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
907                   pOld.unterminated_c_str(), nSourceLen);
908     wmemcpy(pDest, pStart, pTarget - pStart);
909     pDest += pTarget - pStart;
910     wmemcpy(pDest, pNew.unterminated_c_str(), pNew.GetLength());
911     pDest += pNew.GetLength();
912     pStart = pTarget + nSourceLen;
913   }
914   wmemcpy(pDest, pStart, pEnd - pStart);
915   m_pData.Swap(pNewData);
916   return count;
917 }
918 
919 // static
FromASCII(ByteStringView bstr)920 WideString WideString::FromASCII(ByteStringView bstr) {
921   WideString result;
922   result.Reserve(bstr.GetLength());
923   for (char c : bstr)
924     result.InsertAtBack(static_cast<wchar_t>(c & 0x7f));
925   return result;
926 }
927 
928 // static
FromLatin1(ByteStringView bstr)929 WideString WideString::FromLatin1(ByteStringView bstr) {
930   WideString result;
931   result.Reserve(bstr.GetLength());
932   for (char c : bstr)
933     result.InsertAtBack(static_cast<wchar_t>(c & 0xff));
934   return result;
935 }
936 
937 // static
FromDefANSI(ByteStringView bstr)938 WideString WideString::FromDefANSI(ByteStringView bstr) {
939   size_t dest_len = FX_MultiByteToWideChar(FX_CodePage::kDefANSI, bstr, {});
940   if (!dest_len)
941     return WideString();
942 
943   WideString wstr;
944   {
945     // Span's lifetime must end before ReleaseBuffer() below.
946     pdfium::span<wchar_t> dest_buf = wstr.GetBuffer(dest_len);
947     FX_MultiByteToWideChar(FX_CodePage::kDefANSI, bstr, dest_buf);
948   }
949   wstr.ReleaseBuffer(dest_len);
950   return wstr;
951 }
952 
953 // static
FromUTF8(ByteStringView str)954 WideString WideString::FromUTF8(ByteStringView str) {
955   return FX_UTF8Decode(str);
956 }
957 
958 // static
FromUTF16LE(const unsigned short * wstr,size_t wlen)959 WideString WideString::FromUTF16LE(const unsigned short* wstr, size_t wlen) {
960   if (!wstr || wlen == 0)
961     return WideString();
962 
963   WideString result;
964   {
965     // Span's lifetime must end before ReleaseBuffer() below.
966     pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
967     for (size_t i = 0; i < wlen; i++)
968       buf[i] = wstr[i];
969   }
970   result.ReleaseBuffer(wlen);
971   return result;
972 }
973 
FromUTF16BE(const unsigned short * wstr,size_t wlen)974 WideString WideString::FromUTF16BE(const unsigned short* wstr, size_t wlen) {
975   if (!wstr || wlen == 0)
976     return WideString();
977 
978   WideString result;
979   {
980     // Span's lifetime must end before ReleaseBuffer() below.
981     pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
982     for (size_t i = 0; i < wlen; i++) {
983       auto wch = wstr[i];
984       wch = (wch >> 8) | (wch << 8);
985       buf[i] = wch;
986     }
987   }
988   result.ReleaseBuffer(wlen);
989   return result;
990 }
991 
SetAt(size_t index,wchar_t c)992 void WideString::SetAt(size_t index, wchar_t c) {
993   DCHECK(IsValidIndex(index));
994   ReallocBeforeWrite(m_pData->m_nDataLength);
995   m_pData->m_String[index] = c;
996 }
997 
Compare(const wchar_t * str) const998 int WideString::Compare(const wchar_t* str) const {
999   if (m_pData)
1000     return str ? wcscmp(m_pData->m_String, str) : 1;
1001   return (!str || str[0] == 0) ? 0 : -1;
1002 }
1003 
Compare(const WideString & str) const1004 int WideString::Compare(const WideString& str) const {
1005   if (!m_pData)
1006     return str.m_pData ? -1 : 0;
1007   if (!str.m_pData)
1008     return 1;
1009 
1010   size_t this_len = m_pData->m_nDataLength;
1011   size_t that_len = str.m_pData->m_nDataLength;
1012   size_t min_len = std::min(this_len, that_len);
1013   int result = wmemcmp(m_pData->m_String, str.m_pData->m_String, min_len);
1014   if (result != 0)
1015     return result;
1016   if (this_len == that_len)
1017     return 0;
1018   return this_len < that_len ? -1 : 1;
1019 }
1020 
CompareNoCase(const wchar_t * str) const1021 int WideString::CompareNoCase(const wchar_t* str) const {
1022   if (m_pData)
1023     return str ? FXSYS_wcsicmp(m_pData->m_String, str) : 1;
1024   return (!str || str[0] == 0) ? 0 : -1;
1025 }
1026 
WStringLength(const unsigned short * str)1027 size_t WideString::WStringLength(const unsigned short* str) {
1028   size_t len = 0;
1029   if (str)
1030     while (str[len])
1031       len++;
1032   return len;
1033 }
1034 
Trim()1035 void WideString::Trim() {
1036   TrimRight(kWideTrimChars);
1037   TrimLeft(kWideTrimChars);
1038 }
1039 
Trim(wchar_t target)1040 void WideString::Trim(wchar_t target) {
1041   wchar_t str[2] = {target, 0};
1042   TrimRight(str);
1043   TrimLeft(str);
1044 }
1045 
Trim(WideStringView targets)1046 void WideString::Trim(WideStringView targets) {
1047   TrimRight(targets);
1048   TrimLeft(targets);
1049 }
1050 
TrimLeft()1051 void WideString::TrimLeft() {
1052   TrimLeft(kWideTrimChars);
1053 }
1054 
TrimLeft(wchar_t target)1055 void WideString::TrimLeft(wchar_t target) {
1056   wchar_t str[2] = {target, 0};
1057   TrimLeft(str);
1058 }
1059 
TrimLeft(WideStringView targets)1060 void WideString::TrimLeft(WideStringView targets) {
1061   if (!m_pData || targets.IsEmpty())
1062     return;
1063 
1064   size_t len = GetLength();
1065   if (len == 0)
1066     return;
1067 
1068   size_t pos = 0;
1069   while (pos < len) {
1070     size_t i = 0;
1071     while (i < targets.GetLength() &&
1072            targets.CharAt(i) != m_pData->m_String[pos]) {
1073       i++;
1074     }
1075     if (i == targets.GetLength())
1076       break;
1077     pos++;
1078   }
1079   if (!pos)
1080     return;
1081 
1082   ReallocBeforeWrite(len);
1083   size_t nDataLength = len - pos;
1084   memmove(m_pData->m_String, m_pData->m_String + pos,
1085           (nDataLength + 1) * sizeof(wchar_t));
1086   m_pData->m_nDataLength = nDataLength;
1087 }
1088 
TrimRight()1089 void WideString::TrimRight() {
1090   TrimRight(kWideTrimChars);
1091 }
1092 
TrimRight(wchar_t target)1093 void WideString::TrimRight(wchar_t target) {
1094   wchar_t str[2] = {target, 0};
1095   TrimRight(str);
1096 }
1097 
TrimRight(WideStringView targets)1098 void WideString::TrimRight(WideStringView targets) {
1099   if (IsEmpty() || targets.IsEmpty())
1100     return;
1101 
1102   size_t pos = GetLength();
1103   while (pos && targets.Contains(m_pData->m_String[pos - 1]))
1104     pos--;
1105 
1106   if (pos < m_pData->m_nDataLength) {
1107     ReallocBeforeWrite(m_pData->m_nDataLength);
1108     m_pData->m_String[pos] = 0;
1109     m_pData->m_nDataLength = pos;
1110   }
1111 }
1112 
GetInteger() const1113 int WideString::GetInteger() const {
1114   return m_pData ? FXSYS_wtoi(m_pData->m_String) : 0;
1115 }
1116 
operator <<(std::wostream & os,const WideString & str)1117 std::wostream& operator<<(std::wostream& os, const WideString& str) {
1118   return os.write(str.c_str(), str.GetLength());
1119 }
1120 
operator <<(std::ostream & os,const WideString & str)1121 std::ostream& operator<<(std::ostream& os, const WideString& str) {
1122   os << str.ToUTF8();
1123   return os;
1124 }
1125 
operator <<(std::wostream & os,WideStringView str)1126 std::wostream& operator<<(std::wostream& os, WideStringView str) {
1127   return os.write(str.unterminated_c_str(), str.GetLength());
1128 }
1129 
operator <<(std::ostream & os,WideStringView str)1130 std::ostream& operator<<(std::ostream& os, WideStringView str) {
1131   os << FX_UTF8Encode(str);
1132   return os;
1133 }
1134 
1135 }  // namespace fxcrt
1136 
FX_HashCode_GetW(WideStringView str)1137 uint32_t FX_HashCode_GetW(WideStringView str) {
1138   uint32_t dwHashCode = 0;
1139   for (WideStringView::UnsignedType c : str)
1140     dwHashCode = 1313 * dwHashCode + c;
1141   return dwHashCode;
1142 }
1143 
FX_HashCode_GetLoweredW(WideStringView str)1144 uint32_t FX_HashCode_GetLoweredW(WideStringView str) {
1145   uint32_t dwHashCode = 0;
1146   for (wchar_t c : str)  // match FXSYS_towlower() arg type.
1147     dwHashCode = 1313 * dwHashCode + FXSYS_towlower(c);
1148   return dwHashCode;
1149 }
1150