1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/widestring.h"
8
9 #include <stddef.h>
10 #include <string.h>
11
12 #include <algorithm>
13 #include <sstream>
14
15 #include "core/fxcrt/fx_codepage.h"
16 #include "core/fxcrt/fx_extension.h"
17 #include "core/fxcrt/fx_safe_types.h"
18 #include "core/fxcrt/fx_system.h"
19 #include "core/fxcrt/string_pool_template.h"
20 #include "third_party/base/check.h"
21 #include "third_party/base/check_op.h"
22 #include "third_party/base/cxx17_backports.h"
23 #include "third_party/base/numerics/safe_math.h"
24
25 template class fxcrt::StringDataTemplate<wchar_t>;
26 template class fxcrt::StringViewTemplate<wchar_t>;
27 template class fxcrt::StringPoolTemplate<WideString>;
28 template struct std::hash<WideString>;
29
30 #define FORCE_ANSI 0x10000
31 #define FORCE_UNICODE 0x20000
32 #define FORCE_INT64 0x40000
33
34 namespace {
35
36 constexpr wchar_t kWideTrimChars[] = L"\x09\x0a\x0b\x0c\x0d\x20";
37
FX_wcsstr(const wchar_t * haystack,size_t haystack_len,const wchar_t * needle,size_t needle_len)38 const wchar_t* FX_wcsstr(const wchar_t* haystack,
39 size_t haystack_len,
40 const wchar_t* needle,
41 size_t needle_len) {
42 if (needle_len > haystack_len || needle_len == 0)
43 return nullptr;
44
45 const wchar_t* end_ptr = haystack + haystack_len - needle_len;
46 while (haystack <= end_ptr) {
47 size_t i = 0;
48 while (true) {
49 if (haystack[i] != needle[i])
50 break;
51
52 i++;
53 if (i == needle_len)
54 return haystack;
55 }
56 haystack++;
57 }
58 return nullptr;
59 }
60
GuessSizeForVSWPrintf(const wchar_t * pFormat,va_list argList)61 absl::optional<size_t> GuessSizeForVSWPrintf(const wchar_t* pFormat,
62 va_list argList) {
63 size_t nMaxLen = 0;
64 for (const wchar_t* pStr = pFormat; *pStr != 0; pStr++) {
65 if (*pStr != '%' || *(pStr = pStr + 1) == '%') {
66 ++nMaxLen;
67 continue;
68 }
69 int iWidth = 0;
70 for (; *pStr != 0; pStr++) {
71 if (*pStr == '#') {
72 nMaxLen += 2;
73 } else if (*pStr == '*') {
74 iWidth = va_arg(argList, int);
75 } else if (*pStr != '-' && *pStr != '+' && *pStr != '0' && *pStr != ' ') {
76 break;
77 }
78 }
79 if (iWidth == 0) {
80 iWidth = FXSYS_wtoi(pStr);
81 while (FXSYS_IsDecimalDigit(*pStr))
82 ++pStr;
83 }
84 if (iWidth < 0 || iWidth > 128 * 1024)
85 return absl::nullopt;
86 uint32_t nWidth = static_cast<uint32_t>(iWidth);
87 int iPrecision = 0;
88 if (*pStr == '.') {
89 pStr++;
90 if (*pStr == '*') {
91 iPrecision = va_arg(argList, int);
92 pStr++;
93 } else {
94 iPrecision = FXSYS_wtoi(pStr);
95 while (FXSYS_IsDecimalDigit(*pStr))
96 ++pStr;
97 }
98 }
99 if (iPrecision < 0 || iPrecision > 128 * 1024)
100 return absl::nullopt;
101 uint32_t nPrecision = static_cast<uint32_t>(iPrecision);
102 int nModifier = 0;
103 if (*pStr == L'I' && *(pStr + 1) == L'6' && *(pStr + 2) == L'4') {
104 pStr += 3;
105 nModifier = FORCE_INT64;
106 } else {
107 switch (*pStr) {
108 case 'h':
109 nModifier = FORCE_ANSI;
110 pStr++;
111 break;
112 case 'l':
113 nModifier = FORCE_UNICODE;
114 pStr++;
115 break;
116 case 'F':
117 case 'N':
118 case 'L':
119 pStr++;
120 break;
121 }
122 }
123 size_t nItemLen = 0;
124 switch (*pStr | nModifier) {
125 case 'c':
126 case 'C':
127 nItemLen = 2;
128 va_arg(argList, int);
129 break;
130 case 'c' | FORCE_ANSI:
131 case 'C' | FORCE_ANSI:
132 nItemLen = 2;
133 va_arg(argList, int);
134 break;
135 case 'c' | FORCE_UNICODE:
136 case 'C' | FORCE_UNICODE:
137 nItemLen = 2;
138 va_arg(argList, int);
139 break;
140 case 's': {
141 const wchar_t* pstrNextArg = va_arg(argList, const wchar_t*);
142 if (pstrNextArg) {
143 nItemLen = wcslen(pstrNextArg);
144 if (nItemLen < 1) {
145 nItemLen = 1;
146 }
147 } else {
148 nItemLen = 6;
149 }
150 } break;
151 case 'S': {
152 const char* pstrNextArg = va_arg(argList, const char*);
153 if (pstrNextArg) {
154 nItemLen = strlen(pstrNextArg);
155 if (nItemLen < 1) {
156 nItemLen = 1;
157 }
158 } else {
159 nItemLen = 6;
160 }
161 } break;
162 case 's' | FORCE_ANSI:
163 case 'S' | FORCE_ANSI: {
164 const char* pstrNextArg = va_arg(argList, const char*);
165 if (pstrNextArg) {
166 nItemLen = strlen(pstrNextArg);
167 if (nItemLen < 1) {
168 nItemLen = 1;
169 }
170 } else {
171 nItemLen = 6;
172 }
173 } break;
174 case 's' | FORCE_UNICODE:
175 case 'S' | FORCE_UNICODE: {
176 const wchar_t* pstrNextArg = va_arg(argList, wchar_t*);
177 if (pstrNextArg) {
178 nItemLen = wcslen(pstrNextArg);
179 if (nItemLen < 1) {
180 nItemLen = 1;
181 }
182 } else {
183 nItemLen = 6;
184 }
185 } break;
186 }
187 if (nItemLen != 0) {
188 if (nPrecision != 0 && nItemLen > nPrecision) {
189 nItemLen = nPrecision;
190 }
191 if (nItemLen < nWidth) {
192 nItemLen = nWidth;
193 }
194 } else {
195 switch (*pStr) {
196 case 'd':
197 case 'i':
198 case 'u':
199 case 'x':
200 case 'X':
201 case 'o':
202 if (nModifier & FORCE_INT64) {
203 va_arg(argList, int64_t);
204 } else {
205 va_arg(argList, int);
206 }
207 nItemLen = 32;
208 if (nItemLen < nWidth + nPrecision) {
209 nItemLen = nWidth + nPrecision;
210 }
211 break;
212 case 'a':
213 case 'A':
214 case 'e':
215 case 'E':
216 case 'g':
217 case 'G':
218 va_arg(argList, double);
219 nItemLen = 128;
220 if (nItemLen < nWidth + nPrecision) {
221 nItemLen = nWidth + nPrecision;
222 }
223 break;
224 case 'f':
225 if (nWidth + nPrecision > 100) {
226 nItemLen = nPrecision + nWidth + 128;
227 } else {
228 double f;
229 char pszTemp[256];
230 f = va_arg(argList, double);
231 FXSYS_snprintf(pszTemp, sizeof(pszTemp), "%*.*f", nWidth,
232 nPrecision + 6, f);
233 nItemLen = strlen(pszTemp);
234 }
235 break;
236 case 'p':
237 va_arg(argList, void*);
238 nItemLen = 32;
239 if (nItemLen < nWidth + nPrecision) {
240 nItemLen = nWidth + nPrecision;
241 }
242 break;
243 case 'n':
244 va_arg(argList, int*);
245 break;
246 }
247 }
248 nMaxLen += nItemLen;
249 }
250 nMaxLen += 32; // Fudge factor.
251 return nMaxLen;
252 }
253
254 // Returns string unless we ran out of space.
TryVSWPrintf(size_t size,const wchar_t * pFormat,va_list argList)255 absl::optional<WideString> TryVSWPrintf(size_t size,
256 const wchar_t* pFormat,
257 va_list argList) {
258 if (!size)
259 return absl::nullopt;
260
261 WideString str;
262 {
263 // Span's lifetime must end before ReleaseBuffer() below.
264 pdfium::span<wchar_t> buffer = str.GetBuffer(size);
265
266 // In the following two calls, there's always space in the WideString
267 // for a terminating NUL that's not included in the span.
268 // For vswprintf(), MSAN won't untaint the buffer on a truncated write's
269 // -1 return code even though the buffer is written. Probably just as well
270 // not to trust the vendor's implementation to write anything anyways.
271 // See https://crbug.com/705912.
272 memset(buffer.data(), 0, (size + 1) * sizeof(wchar_t));
273 int ret = vswprintf(buffer.data(), size + 1, pFormat, argList);
274
275 bool bSufficientBuffer = ret >= 0 || buffer[size - 1] == 0;
276 if (!bSufficientBuffer)
277 return absl::nullopt;
278 }
279 str.ReleaseBuffer(str.GetStringLength());
280 return str;
281 }
282
283 } // namespace
284
285 namespace fxcrt {
286
287 static_assert(sizeof(WideString) <= sizeof(wchar_t*),
288 "Strings must not require more space than pointers");
289
290 // static
FormatInteger(int i)291 WideString WideString::FormatInteger(int i) {
292 wchar_t wbuf[32];
293 swprintf(wbuf, std::size(wbuf), L"%d", i);
294 return WideString(wbuf);
295 }
296
297 // static
FormatV(const wchar_t * format,va_list argList)298 WideString WideString::FormatV(const wchar_t* format, va_list argList) {
299 va_list argListCopy;
300 va_copy(argListCopy, argList);
301 auto guess = GuessSizeForVSWPrintf(format, argListCopy);
302 va_end(argListCopy);
303
304 if (!guess.has_value()) {
305 return WideString();
306 }
307 int maxLen = pdfium::base::checked_cast<int>(guess.value());
308
309 while (maxLen < 32 * 1024) {
310 va_copy(argListCopy, argList);
311 absl::optional<WideString> ret =
312 TryVSWPrintf(static_cast<size_t>(maxLen), format, argListCopy);
313 va_end(argListCopy);
314 if (ret.has_value())
315 return ret.value();
316
317 maxLen *= 2;
318 }
319 return WideString();
320 }
321
322 // static
Format(const wchar_t * pFormat,...)323 WideString WideString::Format(const wchar_t* pFormat, ...) {
324 va_list argList;
325 va_start(argList, pFormat);
326 WideString ret = FormatV(pFormat, argList);
327 va_end(argList);
328 return ret;
329 }
330
331 WideString::WideString() = default;
332
WideString(const WideString & other)333 WideString::WideString(const WideString& other) : m_pData(other.m_pData) {}
334
WideString(WideString && other)335 WideString::WideString(WideString&& other) noexcept {
336 m_pData.Swap(other.m_pData);
337 }
338
WideString(const wchar_t * pStr,size_t nLen)339 WideString::WideString(const wchar_t* pStr, size_t nLen) {
340 if (nLen)
341 m_pData.Reset(StringData::Create(pStr, nLen));
342 }
343
WideString(wchar_t ch)344 WideString::WideString(wchar_t ch) {
345 m_pData.Reset(StringData::Create(1));
346 m_pData->m_String[0] = ch;
347 }
348
WideString(const wchar_t * ptr)349 WideString::WideString(const wchar_t* ptr)
350 : WideString(ptr, ptr ? wcslen(ptr) : 0) {}
351
WideString(WideStringView stringSrc)352 WideString::WideString(WideStringView stringSrc) {
353 if (!stringSrc.IsEmpty()) {
354 m_pData.Reset(StringData::Create(stringSrc.unterminated_c_str(),
355 stringSrc.GetLength()));
356 }
357 }
358
WideString(WideStringView str1,WideStringView str2)359 WideString::WideString(WideStringView str1, WideStringView str2) {
360 FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
361 nSafeLen += str2.GetLength();
362
363 size_t nNewLen = nSafeLen.ValueOrDie();
364 if (nNewLen == 0)
365 return;
366
367 m_pData.Reset(StringData::Create(nNewLen));
368 m_pData->CopyContents(str1.unterminated_c_str(), str1.GetLength());
369 m_pData->CopyContentsAt(str1.GetLength(), str2.unterminated_c_str(),
370 str2.GetLength());
371 }
372
WideString(const std::initializer_list<WideStringView> & list)373 WideString::WideString(const std::initializer_list<WideStringView>& list) {
374 FX_SAFE_SIZE_T nSafeLen = 0;
375 for (const auto& item : list)
376 nSafeLen += item.GetLength();
377
378 size_t nNewLen = nSafeLen.ValueOrDie();
379 if (nNewLen == 0)
380 return;
381
382 m_pData.Reset(StringData::Create(nNewLen));
383
384 size_t nOffset = 0;
385 for (const auto& item : list) {
386 m_pData->CopyContentsAt(nOffset, item.unterminated_c_str(),
387 item.GetLength());
388 nOffset += item.GetLength();
389 }
390 }
391
392 WideString::~WideString() = default;
393
clear()394 void WideString::clear() {
395 if (m_pData && m_pData->CanOperateInPlace(0)) {
396 m_pData->m_nDataLength = 0;
397 return;
398 }
399 m_pData.Reset();
400 }
401
operator =(const wchar_t * str)402 WideString& WideString::operator=(const wchar_t* str) {
403 if (!str || !str[0])
404 clear();
405 else
406 AssignCopy(str, wcslen(str));
407
408 return *this;
409 }
410
operator =(WideStringView str)411 WideString& WideString::operator=(WideStringView str) {
412 if (str.IsEmpty())
413 clear();
414 else
415 AssignCopy(str.unterminated_c_str(), str.GetLength());
416
417 return *this;
418 }
419
operator =(const WideString & that)420 WideString& WideString::operator=(const WideString& that) {
421 if (m_pData != that.m_pData)
422 m_pData = that.m_pData;
423
424 return *this;
425 }
426
operator =(WideString && that)427 WideString& WideString::operator=(WideString&& that) noexcept {
428 if (m_pData != that.m_pData)
429 m_pData = std::move(that.m_pData);
430
431 return *this;
432 }
433
operator +=(const wchar_t * str)434 WideString& WideString::operator+=(const wchar_t* str) {
435 if (str)
436 Concat(str, wcslen(str));
437
438 return *this;
439 }
440
operator +=(wchar_t ch)441 WideString& WideString::operator+=(wchar_t ch) {
442 Concat(&ch, 1);
443 return *this;
444 }
445
operator +=(const WideString & str)446 WideString& WideString::operator+=(const WideString& str) {
447 if (str.m_pData)
448 Concat(str.m_pData->m_String, str.m_pData->m_nDataLength);
449
450 return *this;
451 }
452
operator +=(WideStringView str)453 WideString& WideString::operator+=(WideStringView str) {
454 if (!str.IsEmpty())
455 Concat(str.unterminated_c_str(), str.GetLength());
456
457 return *this;
458 }
459
operator ==(const wchar_t * ptr) const460 bool WideString::operator==(const wchar_t* ptr) const {
461 if (!m_pData)
462 return !ptr || !ptr[0];
463
464 if (!ptr)
465 return m_pData->m_nDataLength == 0;
466
467 return wcslen(ptr) == m_pData->m_nDataLength &&
468 wmemcmp(ptr, m_pData->m_String, m_pData->m_nDataLength) == 0;
469 }
470
operator ==(WideStringView str) const471 bool WideString::operator==(WideStringView str) const {
472 if (!m_pData)
473 return str.IsEmpty();
474
475 return m_pData->m_nDataLength == str.GetLength() &&
476 wmemcmp(m_pData->m_String, str.unterminated_c_str(),
477 str.GetLength()) == 0;
478 }
479
operator ==(const WideString & other) const480 bool WideString::operator==(const WideString& other) const {
481 if (m_pData == other.m_pData)
482 return true;
483
484 if (IsEmpty())
485 return other.IsEmpty();
486
487 if (other.IsEmpty())
488 return false;
489
490 return other.m_pData->m_nDataLength == m_pData->m_nDataLength &&
491 wmemcmp(other.m_pData->m_String, m_pData->m_String,
492 m_pData->m_nDataLength) == 0;
493 }
494
operator <(const wchar_t * ptr) const495 bool WideString::operator<(const wchar_t* ptr) const {
496 return Compare(ptr) < 0;
497 }
498
operator <(WideStringView str) const499 bool WideString::operator<(WideStringView str) const {
500 if (!m_pData && !str.unterminated_c_str())
501 return false;
502 if (c_str() == str.unterminated_c_str())
503 return false;
504
505 size_t len = GetLength();
506 size_t other_len = str.GetLength();
507 int result =
508 wmemcmp(c_str(), str.unterminated_c_str(), std::min(len, other_len));
509 return result < 0 || (result == 0 && len < other_len);
510 }
511
operator <(const WideString & other) const512 bool WideString::operator<(const WideString& other) const {
513 return Compare(other) < 0;
514 }
515
AssignCopy(const wchar_t * pSrcData,size_t nSrcLen)516 void WideString::AssignCopy(const wchar_t* pSrcData, size_t nSrcLen) {
517 AllocBeforeWrite(nSrcLen);
518 m_pData->CopyContents(pSrcData, nSrcLen);
519 m_pData->m_nDataLength = nSrcLen;
520 }
521
ReallocBeforeWrite(size_t nNewLength)522 void WideString::ReallocBeforeWrite(size_t nNewLength) {
523 if (m_pData && m_pData->CanOperateInPlace(nNewLength))
524 return;
525
526 if (nNewLength == 0) {
527 clear();
528 return;
529 }
530
531 RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
532 if (m_pData) {
533 size_t nCopyLength = std::min(m_pData->m_nDataLength, nNewLength);
534 pNewData->CopyContents(m_pData->m_String, nCopyLength);
535 pNewData->m_nDataLength = nCopyLength;
536 } else {
537 pNewData->m_nDataLength = 0;
538 }
539 pNewData->m_String[pNewData->m_nDataLength] = 0;
540 m_pData.Swap(pNewData);
541 }
542
AllocBeforeWrite(size_t nNewLength)543 void WideString::AllocBeforeWrite(size_t nNewLength) {
544 if (m_pData && m_pData->CanOperateInPlace(nNewLength))
545 return;
546
547 if (nNewLength == 0) {
548 clear();
549 return;
550 }
551
552 m_pData.Reset(StringData::Create(nNewLength));
553 }
554
ReleaseBuffer(size_t nNewLength)555 void WideString::ReleaseBuffer(size_t nNewLength) {
556 if (!m_pData)
557 return;
558
559 nNewLength = std::min(nNewLength, m_pData->m_nAllocLength);
560 if (nNewLength == 0) {
561 clear();
562 return;
563 }
564
565 DCHECK_EQ(m_pData->m_nRefs, 1);
566 m_pData->m_nDataLength = nNewLength;
567 m_pData->m_String[nNewLength] = 0;
568 if (m_pData->m_nAllocLength - nNewLength >= 32) {
569 // Over arbitrary threshold, so pay the price to relocate. Force copy to
570 // always occur by holding a second reference to the string.
571 WideString preserve(*this);
572 ReallocBeforeWrite(nNewLength);
573 }
574 }
575
Reserve(size_t len)576 void WideString::Reserve(size_t len) {
577 GetBuffer(len);
578 }
579
GetBuffer(size_t nMinBufLength)580 pdfium::span<wchar_t> WideString::GetBuffer(size_t nMinBufLength) {
581 if (!m_pData) {
582 if (nMinBufLength == 0)
583 return pdfium::span<wchar_t>();
584
585 m_pData.Reset(StringData::Create(nMinBufLength));
586 m_pData->m_nDataLength = 0;
587 m_pData->m_String[0] = 0;
588 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
589 }
590
591 if (m_pData->CanOperateInPlace(nMinBufLength))
592 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
593
594 nMinBufLength = std::max(nMinBufLength, m_pData->m_nDataLength);
595 if (nMinBufLength == 0)
596 return pdfium::span<wchar_t>();
597
598 RetainPtr<StringData> pNewData(StringData::Create(nMinBufLength));
599 pNewData->CopyContents(*m_pData);
600 pNewData->m_nDataLength = m_pData->m_nDataLength;
601 m_pData.Swap(pNewData);
602 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
603 }
604
Delete(size_t index,size_t count)605 size_t WideString::Delete(size_t index, size_t count) {
606 if (!m_pData)
607 return 0;
608
609 size_t old_length = m_pData->m_nDataLength;
610 if (count == 0 || index != pdfium::clamp<size_t>(index, 0, old_length))
611 return old_length;
612
613 size_t removal_length = index + count;
614 if (removal_length > old_length)
615 return old_length;
616
617 ReallocBeforeWrite(old_length);
618 size_t chars_to_copy = old_length - removal_length + 1;
619 wmemmove(m_pData->m_String + index, m_pData->m_String + removal_length,
620 chars_to_copy);
621 m_pData->m_nDataLength = old_length - count;
622 return m_pData->m_nDataLength;
623 }
624
Concat(const wchar_t * pSrcData,size_t nSrcLen)625 void WideString::Concat(const wchar_t* pSrcData, size_t nSrcLen) {
626 if (!pSrcData || nSrcLen == 0)
627 return;
628
629 if (!m_pData) {
630 m_pData.Reset(StringData::Create(pSrcData, nSrcLen));
631 return;
632 }
633
634 if (m_pData->CanOperateInPlace(m_pData->m_nDataLength + nSrcLen)) {
635 m_pData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
636 m_pData->m_nDataLength += nSrcLen;
637 return;
638 }
639
640 size_t nConcatLen = std::max(m_pData->m_nDataLength / 2, nSrcLen);
641 RetainPtr<StringData> pNewData(
642 StringData::Create(m_pData->m_nDataLength + nConcatLen));
643 pNewData->CopyContents(*m_pData);
644 pNewData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
645 pNewData->m_nDataLength = m_pData->m_nDataLength + nSrcLen;
646 m_pData.Swap(pNewData);
647 }
648
ReferenceCountForTesting() const649 intptr_t WideString::ReferenceCountForTesting() const {
650 return m_pData ? m_pData->m_nRefs : 0;
651 }
652
ToASCII() const653 ByteString WideString::ToASCII() const {
654 ByteString result;
655 result.Reserve(GetLength());
656 for (wchar_t wc : *this)
657 result.InsertAtBack(static_cast<char>(wc & 0x7f));
658 return result;
659 }
660
ToLatin1() const661 ByteString WideString::ToLatin1() const {
662 ByteString result;
663 result.Reserve(GetLength());
664 for (wchar_t wc : *this)
665 result.InsertAtBack(static_cast<char>(wc & 0xff));
666 return result;
667 }
668
ToDefANSI() const669 ByteString WideString::ToDefANSI() const {
670 size_t dest_len =
671 FX_WideCharToMultiByte(FX_CodePage::kDefANSI, AsStringView(), {});
672 if (!dest_len)
673 return ByteString();
674
675 ByteString bstr;
676 {
677 // Span's lifetime must end before ReleaseBuffer() below.
678 pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len);
679 FX_WideCharToMultiByte(FX_CodePage::kDefANSI, AsStringView(), dest_buf);
680 }
681 bstr.ReleaseBuffer(dest_len);
682 return bstr;
683 }
684
ToUTF8() const685 ByteString WideString::ToUTF8() const {
686 return FX_UTF8Encode(AsStringView());
687 }
688
ToUTF16LE() const689 ByteString WideString::ToUTF16LE() const {
690 if (!m_pData)
691 return ByteString("\0\0", 2);
692
693 ByteString result;
694 size_t len = m_pData->m_nDataLength;
695 {
696 // Span's lifetime must end before ReleaseBuffer() below.
697 pdfium::span<char> buffer = result.GetBuffer(len * 2 + 2);
698 for (size_t i = 0; i < len; i++) {
699 buffer[i * 2] = m_pData->m_String[i] & 0xff;
700 buffer[i * 2 + 1] = m_pData->m_String[i] >> 8;
701 }
702 buffer[len * 2] = 0;
703 buffer[len * 2 + 1] = 0;
704 }
705 result.ReleaseBuffer(len * 2 + 2);
706 return result;
707 }
708
EncodeEntities() const709 WideString WideString::EncodeEntities() const {
710 WideString ret = *this;
711 ret.Replace(L"&", L"&");
712 ret.Replace(L"<", L"<");
713 ret.Replace(L">", L">");
714 ret.Replace(L"\'", L"'");
715 ret.Replace(L"\"", L""");
716 return ret;
717 }
718
Substr(size_t offset) const719 WideString WideString::Substr(size_t offset) const {
720 // Unsigned underflow is well-defined and out-of-range is handled by Substr().
721 return Substr(offset, GetLength() - offset);
722 }
723
Substr(size_t first,size_t count) const724 WideString WideString::Substr(size_t first, size_t count) const {
725 if (!m_pData)
726 return WideString();
727
728 if (!IsValidIndex(first))
729 return WideString();
730
731 if (count == 0 || !IsValidLength(count))
732 return WideString();
733
734 if (!IsValidIndex(first + count - 1))
735 return WideString();
736
737 if (first == 0 && count == GetLength())
738 return *this;
739
740 WideString dest;
741 AllocCopy(dest, count, first);
742 return dest;
743 }
744
First(size_t count) const745 WideString WideString::First(size_t count) const {
746 return Substr(0, count);
747 }
748
Last(size_t count) const749 WideString WideString::Last(size_t count) const {
750 // Unsigned underflow is well-defined and out-of-range is handled by Substr().
751 return Substr(GetLength() - count, count);
752 }
753
AllocCopy(WideString & dest,size_t nCopyLen,size_t nCopyIndex) const754 void WideString::AllocCopy(WideString& dest,
755 size_t nCopyLen,
756 size_t nCopyIndex) const {
757 if (nCopyLen == 0)
758 return;
759
760 RetainPtr<StringData> pNewData(
761 StringData::Create(m_pData->m_String + nCopyIndex, nCopyLen));
762 dest.m_pData.Swap(pNewData);
763 }
764
Insert(size_t index,wchar_t ch)765 size_t WideString::Insert(size_t index, wchar_t ch) {
766 const size_t cur_length = GetLength();
767 if (!IsValidLength(index))
768 return cur_length;
769
770 const size_t new_length = cur_length + 1;
771 ReallocBeforeWrite(new_length);
772 wmemmove(m_pData->m_String + index + 1, m_pData->m_String + index,
773 new_length - index);
774 m_pData->m_String[index] = ch;
775 m_pData->m_nDataLength = new_length;
776 return new_length;
777 }
778
Find(wchar_t ch,size_t start) const779 absl::optional<size_t> WideString::Find(wchar_t ch, size_t start) const {
780 if (!m_pData)
781 return absl::nullopt;
782
783 if (!IsValidIndex(start))
784 return absl::nullopt;
785
786 const wchar_t* pStr =
787 wmemchr(m_pData->m_String + start, ch, m_pData->m_nDataLength - start);
788 return pStr ? absl::optional<size_t>(
789 static_cast<size_t>(pStr - m_pData->m_String))
790 : absl::nullopt;
791 }
792
Find(WideStringView subStr,size_t start) const793 absl::optional<size_t> WideString::Find(WideStringView subStr,
794 size_t start) const {
795 if (!m_pData)
796 return absl::nullopt;
797
798 if (!IsValidIndex(start))
799 return absl::nullopt;
800
801 const wchar_t* pStr =
802 FX_wcsstr(m_pData->m_String + start, m_pData->m_nDataLength - start,
803 subStr.unterminated_c_str(), subStr.GetLength());
804 return pStr ? absl::optional<size_t>(
805 static_cast<size_t>(pStr - m_pData->m_String))
806 : absl::nullopt;
807 }
808
ReverseFind(wchar_t ch) const809 absl::optional<size_t> WideString::ReverseFind(wchar_t ch) const {
810 if (!m_pData)
811 return absl::nullopt;
812
813 size_t nLength = m_pData->m_nDataLength;
814 while (nLength--) {
815 if (m_pData->m_String[nLength] == ch)
816 return nLength;
817 }
818 return absl::nullopt;
819 }
820
MakeLower()821 void WideString::MakeLower() {
822 if (IsEmpty())
823 return;
824
825 ReallocBeforeWrite(m_pData->m_nDataLength);
826 FXSYS_wcslwr(m_pData->m_String);
827 }
828
MakeUpper()829 void WideString::MakeUpper() {
830 if (IsEmpty())
831 return;
832
833 ReallocBeforeWrite(m_pData->m_nDataLength);
834 FXSYS_wcsupr(m_pData->m_String);
835 }
836
Remove(wchar_t chRemove)837 size_t WideString::Remove(wchar_t chRemove) {
838 if (IsEmpty())
839 return 0;
840
841 wchar_t* pstrSource = m_pData->m_String;
842 wchar_t* pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
843 while (pstrSource < pstrEnd) {
844 if (*pstrSource == chRemove)
845 break;
846 pstrSource++;
847 }
848 if (pstrSource == pstrEnd)
849 return 0;
850
851 ptrdiff_t copied = pstrSource - m_pData->m_String;
852 ReallocBeforeWrite(m_pData->m_nDataLength);
853 pstrSource = m_pData->m_String + copied;
854 pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
855
856 wchar_t* pstrDest = pstrSource;
857 while (pstrSource < pstrEnd) {
858 if (*pstrSource != chRemove) {
859 *pstrDest = *pstrSource;
860 pstrDest++;
861 }
862 pstrSource++;
863 }
864
865 *pstrDest = 0;
866 size_t count = static_cast<size_t>(pstrSource - pstrDest);
867 m_pData->m_nDataLength -= count;
868 return count;
869 }
870
Replace(WideStringView pOld,WideStringView pNew)871 size_t WideString::Replace(WideStringView pOld, WideStringView pNew) {
872 if (!m_pData || pOld.IsEmpty())
873 return 0;
874
875 size_t nSourceLen = pOld.GetLength();
876 size_t nReplacementLen = pNew.GetLength();
877 size_t count = 0;
878 const wchar_t* pStart = m_pData->m_String;
879 wchar_t* pEnd = m_pData->m_String + m_pData->m_nDataLength;
880 while (true) {
881 const wchar_t* pTarget =
882 FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
883 pOld.unterminated_c_str(), nSourceLen);
884 if (!pTarget)
885 break;
886
887 count++;
888 pStart = pTarget + nSourceLen;
889 }
890 if (count == 0)
891 return 0;
892
893 size_t nNewLength =
894 m_pData->m_nDataLength + (nReplacementLen - nSourceLen) * count;
895
896 if (nNewLength == 0) {
897 clear();
898 return count;
899 }
900
901 RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
902 pStart = m_pData->m_String;
903 wchar_t* pDest = pNewData->m_String;
904 for (size_t i = 0; i < count; i++) {
905 const wchar_t* pTarget =
906 FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
907 pOld.unterminated_c_str(), nSourceLen);
908 wmemcpy(pDest, pStart, pTarget - pStart);
909 pDest += pTarget - pStart;
910 wmemcpy(pDest, pNew.unterminated_c_str(), pNew.GetLength());
911 pDest += pNew.GetLength();
912 pStart = pTarget + nSourceLen;
913 }
914 wmemcpy(pDest, pStart, pEnd - pStart);
915 m_pData.Swap(pNewData);
916 return count;
917 }
918
919 // static
FromASCII(ByteStringView bstr)920 WideString WideString::FromASCII(ByteStringView bstr) {
921 WideString result;
922 result.Reserve(bstr.GetLength());
923 for (char c : bstr)
924 result.InsertAtBack(static_cast<wchar_t>(c & 0x7f));
925 return result;
926 }
927
928 // static
FromLatin1(ByteStringView bstr)929 WideString WideString::FromLatin1(ByteStringView bstr) {
930 WideString result;
931 result.Reserve(bstr.GetLength());
932 for (char c : bstr)
933 result.InsertAtBack(static_cast<wchar_t>(c & 0xff));
934 return result;
935 }
936
937 // static
FromDefANSI(ByteStringView bstr)938 WideString WideString::FromDefANSI(ByteStringView bstr) {
939 size_t dest_len = FX_MultiByteToWideChar(FX_CodePage::kDefANSI, bstr, {});
940 if (!dest_len)
941 return WideString();
942
943 WideString wstr;
944 {
945 // Span's lifetime must end before ReleaseBuffer() below.
946 pdfium::span<wchar_t> dest_buf = wstr.GetBuffer(dest_len);
947 FX_MultiByteToWideChar(FX_CodePage::kDefANSI, bstr, dest_buf);
948 }
949 wstr.ReleaseBuffer(dest_len);
950 return wstr;
951 }
952
953 // static
FromUTF8(ByteStringView str)954 WideString WideString::FromUTF8(ByteStringView str) {
955 return FX_UTF8Decode(str);
956 }
957
958 // static
FromUTF16LE(const unsigned short * wstr,size_t wlen)959 WideString WideString::FromUTF16LE(const unsigned short* wstr, size_t wlen) {
960 if (!wstr || wlen == 0)
961 return WideString();
962
963 WideString result;
964 {
965 // Span's lifetime must end before ReleaseBuffer() below.
966 pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
967 for (size_t i = 0; i < wlen; i++)
968 buf[i] = wstr[i];
969 }
970 result.ReleaseBuffer(wlen);
971 return result;
972 }
973
FromUTF16BE(const unsigned short * wstr,size_t wlen)974 WideString WideString::FromUTF16BE(const unsigned short* wstr, size_t wlen) {
975 if (!wstr || wlen == 0)
976 return WideString();
977
978 WideString result;
979 {
980 // Span's lifetime must end before ReleaseBuffer() below.
981 pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
982 for (size_t i = 0; i < wlen; i++) {
983 auto wch = wstr[i];
984 wch = (wch >> 8) | (wch << 8);
985 buf[i] = wch;
986 }
987 }
988 result.ReleaseBuffer(wlen);
989 return result;
990 }
991
SetAt(size_t index,wchar_t c)992 void WideString::SetAt(size_t index, wchar_t c) {
993 DCHECK(IsValidIndex(index));
994 ReallocBeforeWrite(m_pData->m_nDataLength);
995 m_pData->m_String[index] = c;
996 }
997
Compare(const wchar_t * str) const998 int WideString::Compare(const wchar_t* str) const {
999 if (m_pData)
1000 return str ? wcscmp(m_pData->m_String, str) : 1;
1001 return (!str || str[0] == 0) ? 0 : -1;
1002 }
1003
Compare(const WideString & str) const1004 int WideString::Compare(const WideString& str) const {
1005 if (!m_pData)
1006 return str.m_pData ? -1 : 0;
1007 if (!str.m_pData)
1008 return 1;
1009
1010 size_t this_len = m_pData->m_nDataLength;
1011 size_t that_len = str.m_pData->m_nDataLength;
1012 size_t min_len = std::min(this_len, that_len);
1013 int result = wmemcmp(m_pData->m_String, str.m_pData->m_String, min_len);
1014 if (result != 0)
1015 return result;
1016 if (this_len == that_len)
1017 return 0;
1018 return this_len < that_len ? -1 : 1;
1019 }
1020
CompareNoCase(const wchar_t * str) const1021 int WideString::CompareNoCase(const wchar_t* str) const {
1022 if (m_pData)
1023 return str ? FXSYS_wcsicmp(m_pData->m_String, str) : 1;
1024 return (!str || str[0] == 0) ? 0 : -1;
1025 }
1026
WStringLength(const unsigned short * str)1027 size_t WideString::WStringLength(const unsigned short* str) {
1028 size_t len = 0;
1029 if (str)
1030 while (str[len])
1031 len++;
1032 return len;
1033 }
1034
Trim()1035 void WideString::Trim() {
1036 TrimRight(kWideTrimChars);
1037 TrimLeft(kWideTrimChars);
1038 }
1039
Trim(wchar_t target)1040 void WideString::Trim(wchar_t target) {
1041 wchar_t str[2] = {target, 0};
1042 TrimRight(str);
1043 TrimLeft(str);
1044 }
1045
Trim(WideStringView targets)1046 void WideString::Trim(WideStringView targets) {
1047 TrimRight(targets);
1048 TrimLeft(targets);
1049 }
1050
TrimLeft()1051 void WideString::TrimLeft() {
1052 TrimLeft(kWideTrimChars);
1053 }
1054
TrimLeft(wchar_t target)1055 void WideString::TrimLeft(wchar_t target) {
1056 wchar_t str[2] = {target, 0};
1057 TrimLeft(str);
1058 }
1059
TrimLeft(WideStringView targets)1060 void WideString::TrimLeft(WideStringView targets) {
1061 if (!m_pData || targets.IsEmpty())
1062 return;
1063
1064 size_t len = GetLength();
1065 if (len == 0)
1066 return;
1067
1068 size_t pos = 0;
1069 while (pos < len) {
1070 size_t i = 0;
1071 while (i < targets.GetLength() &&
1072 targets.CharAt(i) != m_pData->m_String[pos]) {
1073 i++;
1074 }
1075 if (i == targets.GetLength())
1076 break;
1077 pos++;
1078 }
1079 if (!pos)
1080 return;
1081
1082 ReallocBeforeWrite(len);
1083 size_t nDataLength = len - pos;
1084 memmove(m_pData->m_String, m_pData->m_String + pos,
1085 (nDataLength + 1) * sizeof(wchar_t));
1086 m_pData->m_nDataLength = nDataLength;
1087 }
1088
TrimRight()1089 void WideString::TrimRight() {
1090 TrimRight(kWideTrimChars);
1091 }
1092
TrimRight(wchar_t target)1093 void WideString::TrimRight(wchar_t target) {
1094 wchar_t str[2] = {target, 0};
1095 TrimRight(str);
1096 }
1097
TrimRight(WideStringView targets)1098 void WideString::TrimRight(WideStringView targets) {
1099 if (IsEmpty() || targets.IsEmpty())
1100 return;
1101
1102 size_t pos = GetLength();
1103 while (pos && targets.Contains(m_pData->m_String[pos - 1]))
1104 pos--;
1105
1106 if (pos < m_pData->m_nDataLength) {
1107 ReallocBeforeWrite(m_pData->m_nDataLength);
1108 m_pData->m_String[pos] = 0;
1109 m_pData->m_nDataLength = pos;
1110 }
1111 }
1112
GetInteger() const1113 int WideString::GetInteger() const {
1114 return m_pData ? FXSYS_wtoi(m_pData->m_String) : 0;
1115 }
1116
operator <<(std::wostream & os,const WideString & str)1117 std::wostream& operator<<(std::wostream& os, const WideString& str) {
1118 return os.write(str.c_str(), str.GetLength());
1119 }
1120
operator <<(std::ostream & os,const WideString & str)1121 std::ostream& operator<<(std::ostream& os, const WideString& str) {
1122 os << str.ToUTF8();
1123 return os;
1124 }
1125
operator <<(std::wostream & os,WideStringView str)1126 std::wostream& operator<<(std::wostream& os, WideStringView str) {
1127 return os.write(str.unterminated_c_str(), str.GetLength());
1128 }
1129
operator <<(std::ostream & os,WideStringView str)1130 std::ostream& operator<<(std::ostream& os, WideStringView str) {
1131 os << FX_UTF8Encode(str);
1132 return os;
1133 }
1134
1135 } // namespace fxcrt
1136
FX_HashCode_GetW(WideStringView str)1137 uint32_t FX_HashCode_GetW(WideStringView str) {
1138 uint32_t dwHashCode = 0;
1139 for (WideStringView::UnsignedType c : str)
1140 dwHashCode = 1313 * dwHashCode + c;
1141 return dwHashCode;
1142 }
1143
FX_HashCode_GetLoweredW(WideStringView str)1144 uint32_t FX_HashCode_GetLoweredW(WideStringView str) {
1145 uint32_t dwHashCode = 0;
1146 for (wchar_t c : str) // match FXSYS_towlower() arg type.
1147 dwHashCode = 1313 * dwHashCode + FXSYS_towlower(c);
1148 return dwHashCode;
1149 }
1150