1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/widestring.h"
8
9 #include <stddef.h>
10
11 #include <algorithm>
12 #include <cctype>
13 #include <cwctype>
14
15 #include "core/fxcrt/fx_codepage.h"
16 #include "core/fxcrt/fx_extension.h"
17 #include "core/fxcrt/fx_safe_types.h"
18 #include "core/fxcrt/string_pool_template.h"
19 #include "third_party/base/numerics/safe_math.h"
20 #include "third_party/base/stl_util.h"
21
22 template class fxcrt::StringDataTemplate<wchar_t>;
23 template class fxcrt::StringViewTemplate<wchar_t>;
24 template class fxcrt::StringPoolTemplate<WideString>;
25 template struct std::hash<WideString>;
26
27 #define FORCE_ANSI 0x10000
28 #define FORCE_UNICODE 0x20000
29 #define FORCE_INT64 0x40000
30
31 namespace {
32
33 constexpr wchar_t kWideTrimChars[] = L"\x09\x0a\x0b\x0c\x0d\x20";
34
FX_wcsstr(const wchar_t * haystack,int haystack_len,const wchar_t * needle,int needle_len)35 const wchar_t* FX_wcsstr(const wchar_t* haystack,
36 int haystack_len,
37 const wchar_t* needle,
38 int needle_len) {
39 if (needle_len > haystack_len || needle_len == 0) {
40 return nullptr;
41 }
42 const wchar_t* end_ptr = haystack + haystack_len - needle_len;
43 while (haystack <= end_ptr) {
44 int i = 0;
45 while (1) {
46 if (haystack[i] != needle[i]) {
47 break;
48 }
49 i++;
50 if (i == needle_len) {
51 return haystack;
52 }
53 }
54 haystack++;
55 }
56 return nullptr;
57 }
58
GuessSizeForVSWPrintf(const wchar_t * pFormat,va_list argList)59 Optional<size_t> GuessSizeForVSWPrintf(const wchar_t* pFormat,
60 va_list argList) {
61 size_t nMaxLen = 0;
62 for (const wchar_t* pStr = pFormat; *pStr != 0; pStr++) {
63 if (*pStr != '%' || *(pStr = pStr + 1) == '%') {
64 ++nMaxLen;
65 continue;
66 }
67 int nItemLen = 0;
68 int nWidth = 0;
69 for (; *pStr != 0; pStr++) {
70 if (*pStr == '#') {
71 nMaxLen += 2;
72 } else if (*pStr == '*') {
73 nWidth = va_arg(argList, int);
74 } else if (*pStr != '-' && *pStr != '+' && *pStr != '0' && *pStr != ' ') {
75 break;
76 }
77 }
78 if (nWidth == 0) {
79 nWidth = FXSYS_wtoi(pStr);
80 while (FXSYS_IsDecimalDigit(*pStr))
81 ++pStr;
82 }
83 if (nWidth < 0 || nWidth > 128 * 1024)
84 return pdfium::nullopt;
85 int nPrecision = 0;
86 if (*pStr == '.') {
87 pStr++;
88 if (*pStr == '*') {
89 nPrecision = va_arg(argList, int);
90 pStr++;
91 } else {
92 nPrecision = FXSYS_wtoi(pStr);
93 while (FXSYS_IsDecimalDigit(*pStr))
94 ++pStr;
95 }
96 }
97 if (nPrecision < 0 || nPrecision > 128 * 1024)
98 return pdfium::nullopt;
99 int nModifier = 0;
100 if (*pStr == L'I' && *(pStr + 1) == L'6' && *(pStr + 2) == L'4') {
101 pStr += 3;
102 nModifier = FORCE_INT64;
103 } else {
104 switch (*pStr) {
105 case 'h':
106 nModifier = FORCE_ANSI;
107 pStr++;
108 break;
109 case 'l':
110 nModifier = FORCE_UNICODE;
111 pStr++;
112 break;
113 case 'F':
114 case 'N':
115 case 'L':
116 pStr++;
117 break;
118 }
119 }
120 switch (*pStr | nModifier) {
121 case 'c':
122 case 'C':
123 nItemLen = 2;
124 va_arg(argList, int);
125 break;
126 case 'c' | FORCE_ANSI:
127 case 'C' | FORCE_ANSI:
128 nItemLen = 2;
129 va_arg(argList, int);
130 break;
131 case 'c' | FORCE_UNICODE:
132 case 'C' | FORCE_UNICODE:
133 nItemLen = 2;
134 va_arg(argList, int);
135 break;
136 case 's': {
137 const wchar_t* pstrNextArg = va_arg(argList, const wchar_t*);
138 if (pstrNextArg) {
139 nItemLen = wcslen(pstrNextArg);
140 if (nItemLen < 1) {
141 nItemLen = 1;
142 }
143 } else {
144 nItemLen = 6;
145 }
146 } break;
147 case 'S': {
148 const char* pstrNextArg = va_arg(argList, const char*);
149 if (pstrNextArg) {
150 nItemLen = strlen(pstrNextArg);
151 if (nItemLen < 1) {
152 nItemLen = 1;
153 }
154 } else {
155 nItemLen = 6;
156 }
157 } break;
158 case 's' | FORCE_ANSI:
159 case 'S' | FORCE_ANSI: {
160 const char* pstrNextArg = va_arg(argList, const char*);
161 if (pstrNextArg) {
162 nItemLen = strlen(pstrNextArg);
163 if (nItemLen < 1) {
164 nItemLen = 1;
165 }
166 } else {
167 nItemLen = 6;
168 }
169 } break;
170 case 's' | FORCE_UNICODE:
171 case 'S' | FORCE_UNICODE: {
172 const wchar_t* pstrNextArg = va_arg(argList, wchar_t*);
173 if (pstrNextArg) {
174 nItemLen = wcslen(pstrNextArg);
175 if (nItemLen < 1) {
176 nItemLen = 1;
177 }
178 } else {
179 nItemLen = 6;
180 }
181 } break;
182 }
183 if (nItemLen != 0) {
184 if (nPrecision != 0 && nItemLen > nPrecision) {
185 nItemLen = nPrecision;
186 }
187 if (nItemLen < nWidth) {
188 nItemLen = nWidth;
189 }
190 } else {
191 switch (*pStr) {
192 case 'd':
193 case 'i':
194 case 'u':
195 case 'x':
196 case 'X':
197 case 'o':
198 if (nModifier & FORCE_INT64) {
199 va_arg(argList, int64_t);
200 } else {
201 va_arg(argList, int);
202 }
203 nItemLen = 32;
204 if (nItemLen < nWidth + nPrecision) {
205 nItemLen = nWidth + nPrecision;
206 }
207 break;
208 case 'a':
209 case 'A':
210 case 'e':
211 case 'E':
212 case 'g':
213 case 'G':
214 va_arg(argList, double);
215 nItemLen = 128;
216 if (nItemLen < nWidth + nPrecision) {
217 nItemLen = nWidth + nPrecision;
218 }
219 break;
220 case 'f':
221 if (nWidth + nPrecision > 100) {
222 nItemLen = nPrecision + nWidth + 128;
223 } else {
224 double f;
225 char pszTemp[256];
226 f = va_arg(argList, double);
227 FXSYS_snprintf(pszTemp, sizeof(pszTemp), "%*.*f", nWidth,
228 nPrecision + 6, f);
229 nItemLen = strlen(pszTemp);
230 }
231 break;
232 case 'p':
233 va_arg(argList, void*);
234 nItemLen = 32;
235 if (nItemLen < nWidth + nPrecision) {
236 nItemLen = nWidth + nPrecision;
237 }
238 break;
239 case 'n':
240 va_arg(argList, int*);
241 break;
242 }
243 }
244 nMaxLen += nItemLen;
245 }
246 nMaxLen += 32; // Fudge factor.
247 return nMaxLen;
248 }
249
250 // Returns string unless we ran out of space.
TryVSWPrintf(size_t size,const wchar_t * pFormat,va_list argList)251 Optional<WideString> TryVSWPrintf(size_t size,
252 const wchar_t* pFormat,
253 va_list argList) {
254 if (!size)
255 return {};
256
257 WideString str;
258 {
259 // Span's lifetime must end before ReleaseBuffer() below.
260 pdfium::span<wchar_t> buffer = str.GetBuffer(size);
261
262 // In the following two calls, there's always space in the WideString
263 // for a terminating NUL that's not included in the span.
264 // For vswprintf(), MSAN won't untaint the buffer on a truncated write's
265 // -1 return code even though the buffer is written. Probably just as well
266 // not to trust the vendor's implementation to write anything anyways.
267 // See https://crbug.com/705912.
268 memset(buffer.data(), 0, (size + 1) * sizeof(wchar_t));
269 int ret = vswprintf(buffer.data(), size + 1, pFormat, argList);
270
271 bool bSufficientBuffer = ret >= 0 || buffer[size - 1] == 0;
272 if (!bSufficientBuffer)
273 return {};
274 }
275 str.ReleaseBuffer(str.GetStringLength());
276 return {str};
277 }
278
279 } // namespace
280
281 namespace fxcrt {
282
283 static_assert(sizeof(WideString) <= sizeof(wchar_t*),
284 "Strings must not require more space than pointers");
285
286 // static
FormatV(const wchar_t * format,va_list argList)287 WideString WideString::FormatV(const wchar_t* format, va_list argList) {
288 va_list argListCopy;
289 va_copy(argListCopy, argList);
290 int maxLen = vswprintf(nullptr, 0, format, argListCopy);
291 va_end(argListCopy);
292
293 if (maxLen <= 0) {
294 va_copy(argListCopy, argList);
295 auto guess = GuessSizeForVSWPrintf(format, argListCopy);
296 va_end(argListCopy);
297
298 if (!guess.has_value())
299 return WideString();
300 maxLen = pdfium::base::checked_cast<int>(guess.value());
301 }
302
303 while (maxLen < 32 * 1024) {
304 va_copy(argListCopy, argList);
305 Optional<WideString> ret =
306 TryVSWPrintf(static_cast<size_t>(maxLen), format, argListCopy);
307 va_end(argListCopy);
308
309 if (ret)
310 return *ret;
311 maxLen *= 2;
312 }
313 return WideString();
314 }
315
316 // static
Format(const wchar_t * pFormat,...)317 WideString WideString::Format(const wchar_t* pFormat, ...) {
318 va_list argList;
319 va_start(argList, pFormat);
320 WideString ret = FormatV(pFormat, argList);
321 va_end(argList);
322 return ret;
323 }
324
WideString()325 WideString::WideString() {}
326
WideString(const WideString & other)327 WideString::WideString(const WideString& other) : m_pData(other.m_pData) {}
328
WideString(WideString && other)329 WideString::WideString(WideString&& other) noexcept {
330 m_pData.Swap(other.m_pData);
331 }
332
WideString(const wchar_t * pStr,size_t nLen)333 WideString::WideString(const wchar_t* pStr, size_t nLen) {
334 if (nLen)
335 m_pData.Reset(StringData::Create(pStr, nLen));
336 }
337
WideString(wchar_t ch)338 WideString::WideString(wchar_t ch) {
339 m_pData.Reset(StringData::Create(1));
340 m_pData->m_String[0] = ch;
341 }
342
WideString(const wchar_t * ptr)343 WideString::WideString(const wchar_t* ptr)
344 : WideString(ptr, ptr ? wcslen(ptr) : 0) {}
345
WideString(WideStringView stringSrc)346 WideString::WideString(WideStringView stringSrc) {
347 if (!stringSrc.IsEmpty()) {
348 m_pData.Reset(StringData::Create(stringSrc.unterminated_c_str(),
349 stringSrc.GetLength()));
350 }
351 }
352
WideString(WideStringView str1,WideStringView str2)353 WideString::WideString(WideStringView str1, WideStringView str2) {
354 FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
355 nSafeLen += str2.GetLength();
356
357 size_t nNewLen = nSafeLen.ValueOrDie();
358 if (nNewLen == 0)
359 return;
360
361 m_pData.Reset(StringData::Create(nNewLen));
362 m_pData->CopyContents(str1.unterminated_c_str(), str1.GetLength());
363 m_pData->CopyContentsAt(str1.GetLength(), str2.unterminated_c_str(),
364 str2.GetLength());
365 }
366
WideString(const std::initializer_list<WideStringView> & list)367 WideString::WideString(const std::initializer_list<WideStringView>& list) {
368 FX_SAFE_SIZE_T nSafeLen = 0;
369 for (const auto& item : list)
370 nSafeLen += item.GetLength();
371
372 size_t nNewLen = nSafeLen.ValueOrDie();
373 if (nNewLen == 0)
374 return;
375
376 m_pData.Reset(StringData::Create(nNewLen));
377
378 size_t nOffset = 0;
379 for (const auto& item : list) {
380 m_pData->CopyContentsAt(nOffset, item.unterminated_c_str(),
381 item.GetLength());
382 nOffset += item.GetLength();
383 }
384 }
385
~WideString()386 WideString::~WideString() {}
387
operator =(const wchar_t * str)388 WideString& WideString::operator=(const wchar_t* str) {
389 if (!str || !str[0])
390 clear();
391 else
392 AssignCopy(str, wcslen(str));
393
394 return *this;
395 }
396
operator =(WideStringView str)397 WideString& WideString::operator=(WideStringView str) {
398 if (str.IsEmpty())
399 clear();
400 else
401 AssignCopy(str.unterminated_c_str(), str.GetLength());
402
403 return *this;
404 }
405
operator =(const WideString & that)406 WideString& WideString::operator=(const WideString& that) {
407 if (m_pData != that.m_pData)
408 m_pData = that.m_pData;
409
410 return *this;
411 }
412
operator =(WideString && that)413 WideString& WideString::operator=(WideString&& that) {
414 if (m_pData != that.m_pData)
415 m_pData = std::move(that.m_pData);
416
417 return *this;
418 }
419
operator +=(const wchar_t * str)420 WideString& WideString::operator+=(const wchar_t* str) {
421 if (str)
422 Concat(str, wcslen(str));
423
424 return *this;
425 }
426
operator +=(wchar_t ch)427 WideString& WideString::operator+=(wchar_t ch) {
428 Concat(&ch, 1);
429 return *this;
430 }
431
operator +=(const WideString & str)432 WideString& WideString::operator+=(const WideString& str) {
433 if (str.m_pData)
434 Concat(str.m_pData->m_String, str.m_pData->m_nDataLength);
435
436 return *this;
437 }
438
operator +=(WideStringView str)439 WideString& WideString::operator+=(WideStringView str) {
440 if (!str.IsEmpty())
441 Concat(str.unterminated_c_str(), str.GetLength());
442
443 return *this;
444 }
445
operator ==(const wchar_t * ptr) const446 bool WideString::operator==(const wchar_t* ptr) const {
447 if (!m_pData)
448 return !ptr || !ptr[0];
449
450 if (!ptr)
451 return m_pData->m_nDataLength == 0;
452
453 return wcslen(ptr) == m_pData->m_nDataLength &&
454 wmemcmp(ptr, m_pData->m_String, m_pData->m_nDataLength) == 0;
455 }
456
operator ==(WideStringView str) const457 bool WideString::operator==(WideStringView str) const {
458 if (!m_pData)
459 return str.IsEmpty();
460
461 return m_pData->m_nDataLength == str.GetLength() &&
462 wmemcmp(m_pData->m_String, str.unterminated_c_str(),
463 str.GetLength()) == 0;
464 }
465
operator ==(const WideString & other) const466 bool WideString::operator==(const WideString& other) const {
467 if (m_pData == other.m_pData)
468 return true;
469
470 if (IsEmpty())
471 return other.IsEmpty();
472
473 if (other.IsEmpty())
474 return false;
475
476 return other.m_pData->m_nDataLength == m_pData->m_nDataLength &&
477 wmemcmp(other.m_pData->m_String, m_pData->m_String,
478 m_pData->m_nDataLength) == 0;
479 }
480
operator <(const wchar_t * ptr) const481 bool WideString::operator<(const wchar_t* ptr) const {
482 return Compare(ptr) < 0;
483 }
484
operator <(WideStringView str) const485 bool WideString::operator<(WideStringView str) const {
486 if (!m_pData && !str.unterminated_c_str())
487 return false;
488 if (c_str() == str.unterminated_c_str())
489 return false;
490
491 size_t len = GetLength();
492 size_t other_len = str.GetLength();
493 int result =
494 wmemcmp(c_str(), str.unterminated_c_str(), std::min(len, other_len));
495 return result < 0 || (result == 0 && len < other_len);
496 }
497
operator <(const WideString & other) const498 bool WideString::operator<(const WideString& other) const {
499 return Compare(other) < 0;
500 }
501
AssignCopy(const wchar_t * pSrcData,size_t nSrcLen)502 void WideString::AssignCopy(const wchar_t* pSrcData, size_t nSrcLen) {
503 AllocBeforeWrite(nSrcLen);
504 m_pData->CopyContents(pSrcData, nSrcLen);
505 m_pData->m_nDataLength = nSrcLen;
506 }
507
ReallocBeforeWrite(size_t nNewLength)508 void WideString::ReallocBeforeWrite(size_t nNewLength) {
509 if (m_pData && m_pData->CanOperateInPlace(nNewLength))
510 return;
511
512 if (nNewLength == 0) {
513 clear();
514 return;
515 }
516
517 RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
518 if (m_pData) {
519 size_t nCopyLength = std::min(m_pData->m_nDataLength, nNewLength);
520 pNewData->CopyContents(m_pData->m_String, nCopyLength);
521 pNewData->m_nDataLength = nCopyLength;
522 } else {
523 pNewData->m_nDataLength = 0;
524 }
525 pNewData->m_String[pNewData->m_nDataLength] = 0;
526 m_pData.Swap(pNewData);
527 }
528
AllocBeforeWrite(size_t nNewLength)529 void WideString::AllocBeforeWrite(size_t nNewLength) {
530 if (m_pData && m_pData->CanOperateInPlace(nNewLength))
531 return;
532
533 if (nNewLength == 0) {
534 clear();
535 return;
536 }
537
538 m_pData.Reset(StringData::Create(nNewLength));
539 }
540
ReleaseBuffer(size_t nNewLength)541 void WideString::ReleaseBuffer(size_t nNewLength) {
542 if (!m_pData)
543 return;
544
545 nNewLength = std::min(nNewLength, m_pData->m_nAllocLength);
546 if (nNewLength == 0) {
547 clear();
548 return;
549 }
550
551 ASSERT(m_pData->m_nRefs == 1);
552 m_pData->m_nDataLength = nNewLength;
553 m_pData->m_String[nNewLength] = 0;
554 if (m_pData->m_nAllocLength - nNewLength >= 32) {
555 // Over arbitrary threshold, so pay the price to relocate. Force copy to
556 // always occur by holding a second reference to the string.
557 WideString preserve(*this);
558 ReallocBeforeWrite(nNewLength);
559 }
560 }
561
Reserve(size_t len)562 void WideString::Reserve(size_t len) {
563 GetBuffer(len);
564 }
565
GetBuffer(size_t nMinBufLength)566 pdfium::span<wchar_t> WideString::GetBuffer(size_t nMinBufLength) {
567 if (!m_pData) {
568 if (nMinBufLength == 0)
569 return pdfium::span<wchar_t>();
570
571 m_pData.Reset(StringData::Create(nMinBufLength));
572 m_pData->m_nDataLength = 0;
573 m_pData->m_String[0] = 0;
574 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
575 }
576
577 if (m_pData->CanOperateInPlace(nMinBufLength))
578 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
579
580 nMinBufLength = std::max(nMinBufLength, m_pData->m_nDataLength);
581 if (nMinBufLength == 0)
582 return pdfium::span<wchar_t>();
583
584 RetainPtr<StringData> pNewData(StringData::Create(nMinBufLength));
585 pNewData->CopyContents(*m_pData);
586 pNewData->m_nDataLength = m_pData->m_nDataLength;
587 m_pData.Swap(pNewData);
588 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
589 }
590
Delete(size_t index,size_t count)591 size_t WideString::Delete(size_t index, size_t count) {
592 if (!m_pData)
593 return 0;
594
595 size_t old_length = m_pData->m_nDataLength;
596 if (count == 0 || index != pdfium::clamp<size_t>(index, 0, old_length))
597 return old_length;
598
599 size_t removal_length = index + count;
600 if (removal_length > old_length)
601 return old_length;
602
603 ReallocBeforeWrite(old_length);
604 size_t chars_to_copy = old_length - removal_length + 1;
605 wmemmove(m_pData->m_String + index, m_pData->m_String + removal_length,
606 chars_to_copy);
607 m_pData->m_nDataLength = old_length - count;
608 return m_pData->m_nDataLength;
609 }
610
Concat(const wchar_t * pSrcData,size_t nSrcLen)611 void WideString::Concat(const wchar_t* pSrcData, size_t nSrcLen) {
612 if (!pSrcData || nSrcLen == 0)
613 return;
614
615 if (!m_pData) {
616 m_pData.Reset(StringData::Create(pSrcData, nSrcLen));
617 return;
618 }
619
620 if (m_pData->CanOperateInPlace(m_pData->m_nDataLength + nSrcLen)) {
621 m_pData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
622 m_pData->m_nDataLength += nSrcLen;
623 return;
624 }
625
626 size_t nConcatLen = std::max(m_pData->m_nDataLength / 2, nSrcLen);
627 RetainPtr<StringData> pNewData(
628 StringData::Create(m_pData->m_nDataLength + nConcatLen));
629 pNewData->CopyContents(*m_pData);
630 pNewData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
631 pNewData->m_nDataLength = m_pData->m_nDataLength + nSrcLen;
632 m_pData.Swap(pNewData);
633 }
634
ReferenceCountForTesting() const635 intptr_t WideString::ReferenceCountForTesting() const {
636 return m_pData ? m_pData->m_nRefs : 0;
637 }
638
ToASCII() const639 ByteString WideString::ToASCII() const {
640 ByteString result;
641 result.Reserve(GetLength());
642 for (wchar_t wc : *this)
643 result.InsertAtBack(static_cast<char>(wc & 0x7f));
644 return result;
645 }
646
ToLatin1() const647 ByteString WideString::ToLatin1() const {
648 ByteString result;
649 result.Reserve(GetLength());
650 for (wchar_t wc : *this)
651 result.InsertAtBack(static_cast<char>(wc & 0xff));
652 return result;
653 }
654
ToDefANSI() const655 ByteString WideString::ToDefANSI() const {
656 int src_len = GetLength();
657 int dest_len = FXSYS_WideCharToMultiByte(
658 FX_CODEPAGE_DefANSI, 0, c_str(), src_len, nullptr, 0, nullptr, nullptr);
659 if (!dest_len)
660 return ByteString();
661
662 ByteString bstr;
663 {
664 // Span's lifetime must end before ReleaseBuffer() below.
665 pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len);
666 FXSYS_WideCharToMultiByte(FX_CODEPAGE_DefANSI, 0, c_str(), src_len,
667 dest_buf.data(), dest_len, nullptr, nullptr);
668 }
669 bstr.ReleaseBuffer(dest_len);
670 return bstr;
671 }
672
ToUTF8() const673 ByteString WideString::ToUTF8() const {
674 return FX_UTF8Encode(AsStringView());
675 }
676
ToUTF16LE() const677 ByteString WideString::ToUTF16LE() const {
678 if (!m_pData)
679 return ByteString("\0\0", 2);
680
681 ByteString result;
682 int len = m_pData->m_nDataLength;
683 {
684 // Span's lifetime must end before ReleaseBuffer() below.
685 pdfium::span<char> buffer = result.GetBuffer(len * 2 + 2);
686 for (int i = 0; i < len; i++) {
687 buffer[i * 2] = m_pData->m_String[i] & 0xff;
688 buffer[i * 2 + 1] = m_pData->m_String[i] >> 8;
689 }
690 buffer[len * 2] = 0;
691 buffer[len * 2 + 1] = 0;
692 }
693 result.ReleaseBuffer(len * 2 + 2);
694 return result;
695 }
696
Substr(size_t first,size_t count) const697 WideString WideString::Substr(size_t first, size_t count) const {
698 if (!m_pData)
699 return WideString();
700
701 if (!IsValidIndex(first))
702 return WideString();
703
704 if (count == 0 || !IsValidLength(count))
705 return WideString();
706
707 if (!IsValidIndex(first + count - 1))
708 return WideString();
709
710 if (first == 0 && count == GetLength())
711 return *this;
712
713 WideString dest;
714 AllocCopy(dest, count, first);
715 return dest;
716 }
717
First(size_t count) const718 WideString WideString::First(size_t count) const {
719 if (count == 0 || !IsValidLength(count))
720 return WideString();
721 return Substr(0, count);
722 }
723
Last(size_t count) const724 WideString WideString::Last(size_t count) const {
725 if (count == 0 || !IsValidLength(count))
726 return WideString();
727 return Substr(GetLength() - count, count);
728 }
729
AllocCopy(WideString & dest,size_t nCopyLen,size_t nCopyIndex) const730 void WideString::AllocCopy(WideString& dest,
731 size_t nCopyLen,
732 size_t nCopyIndex) const {
733 if (nCopyLen == 0)
734 return;
735
736 RetainPtr<StringData> pNewData(
737 StringData::Create(m_pData->m_String + nCopyIndex, nCopyLen));
738 dest.m_pData.Swap(pNewData);
739 }
740
Insert(size_t index,wchar_t ch)741 size_t WideString::Insert(size_t index, wchar_t ch) {
742 const size_t cur_length = GetLength();
743 if (!IsValidLength(index))
744 return cur_length;
745
746 const size_t new_length = cur_length + 1;
747 ReallocBeforeWrite(new_length);
748 wmemmove(m_pData->m_String + index + 1, m_pData->m_String + index,
749 new_length - index);
750 m_pData->m_String[index] = ch;
751 m_pData->m_nDataLength = new_length;
752 return new_length;
753 }
754
Find(wchar_t ch,size_t start) const755 Optional<size_t> WideString::Find(wchar_t ch, size_t start) const {
756 if (!m_pData)
757 return pdfium::nullopt;
758
759 if (!IsValidIndex(start))
760 return pdfium::nullopt;
761
762 const wchar_t* pStr =
763 wmemchr(m_pData->m_String + start, ch, m_pData->m_nDataLength - start);
764 return pStr ? Optional<size_t>(static_cast<size_t>(pStr - m_pData->m_String))
765 : pdfium::nullopt;
766 }
767
Find(WideStringView subStr,size_t start) const768 Optional<size_t> WideString::Find(WideStringView subStr, size_t start) const {
769 if (!m_pData)
770 return pdfium::nullopt;
771
772 if (!IsValidIndex(start))
773 return pdfium::nullopt;
774
775 const wchar_t* pStr =
776 FX_wcsstr(m_pData->m_String + start, m_pData->m_nDataLength - start,
777 subStr.unterminated_c_str(), subStr.GetLength());
778 return pStr ? Optional<size_t>(static_cast<size_t>(pStr - m_pData->m_String))
779 : pdfium::nullopt;
780 }
781
ReverseFind(wchar_t ch) const782 Optional<size_t> WideString::ReverseFind(wchar_t ch) const {
783 if (!m_pData)
784 return pdfium::nullopt;
785
786 size_t nLength = m_pData->m_nDataLength;
787 while (nLength--) {
788 if (m_pData->m_String[nLength] == ch)
789 return nLength;
790 }
791 return pdfium::nullopt;
792 }
793
MakeLower()794 void WideString::MakeLower() {
795 if (!m_pData)
796 return;
797
798 ReallocBeforeWrite(m_pData->m_nDataLength);
799 FXSYS_wcslwr(m_pData->m_String);
800 }
801
MakeUpper()802 void WideString::MakeUpper() {
803 if (!m_pData)
804 return;
805
806 ReallocBeforeWrite(m_pData->m_nDataLength);
807 FXSYS_wcsupr(m_pData->m_String);
808 }
809
Remove(wchar_t chRemove)810 size_t WideString::Remove(wchar_t chRemove) {
811 if (!m_pData || m_pData->m_nDataLength == 0)
812 return 0;
813
814 wchar_t* pstrSource = m_pData->m_String;
815 wchar_t* pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
816 while (pstrSource < pstrEnd) {
817 if (*pstrSource == chRemove)
818 break;
819 pstrSource++;
820 }
821 if (pstrSource == pstrEnd)
822 return 0;
823
824 ptrdiff_t copied = pstrSource - m_pData->m_String;
825 ReallocBeforeWrite(m_pData->m_nDataLength);
826 pstrSource = m_pData->m_String + copied;
827 pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
828
829 wchar_t* pstrDest = pstrSource;
830 while (pstrSource < pstrEnd) {
831 if (*pstrSource != chRemove) {
832 *pstrDest = *pstrSource;
833 pstrDest++;
834 }
835 pstrSource++;
836 }
837
838 *pstrDest = 0;
839 size_t count = static_cast<size_t>(pstrSource - pstrDest);
840 m_pData->m_nDataLength -= count;
841 return count;
842 }
843
Replace(WideStringView pOld,WideStringView pNew)844 size_t WideString::Replace(WideStringView pOld, WideStringView pNew) {
845 if (!m_pData || pOld.IsEmpty())
846 return 0;
847
848 size_t nSourceLen = pOld.GetLength();
849 size_t nReplacementLen = pNew.GetLength();
850 size_t count = 0;
851 const wchar_t* pStart = m_pData->m_String;
852 wchar_t* pEnd = m_pData->m_String + m_pData->m_nDataLength;
853 while (1) {
854 const wchar_t* pTarget =
855 FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
856 pOld.unterminated_c_str(), nSourceLen);
857 if (!pTarget)
858 break;
859
860 count++;
861 pStart = pTarget + nSourceLen;
862 }
863 if (count == 0)
864 return 0;
865
866 size_t nNewLength =
867 m_pData->m_nDataLength + (nReplacementLen - nSourceLen) * count;
868
869 if (nNewLength == 0) {
870 clear();
871 return count;
872 }
873
874 RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
875 pStart = m_pData->m_String;
876 wchar_t* pDest = pNewData->m_String;
877 for (size_t i = 0; i < count; i++) {
878 const wchar_t* pTarget =
879 FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
880 pOld.unterminated_c_str(), nSourceLen);
881 wmemcpy(pDest, pStart, pTarget - pStart);
882 pDest += pTarget - pStart;
883 wmemcpy(pDest, pNew.unterminated_c_str(), pNew.GetLength());
884 pDest += pNew.GetLength();
885 pStart = pTarget + nSourceLen;
886 }
887 wmemcpy(pDest, pStart, pEnd - pStart);
888 m_pData.Swap(pNewData);
889 return count;
890 }
891
892 // static
FromASCII(ByteStringView bstr)893 WideString WideString::FromASCII(ByteStringView bstr) {
894 WideString result;
895 result.Reserve(bstr.GetLength());
896 for (char c : bstr)
897 result.InsertAtBack(static_cast<wchar_t>(c & 0x7f));
898 return result;
899 }
900
901 // static
FromLatin1(ByteStringView bstr)902 WideString WideString::FromLatin1(ByteStringView bstr) {
903 WideString result;
904 result.Reserve(bstr.GetLength());
905 for (char c : bstr)
906 result.InsertAtBack(static_cast<wchar_t>(c & 0xff));
907 return result;
908 }
909
910 // static
FromDefANSI(ByteStringView bstr)911 WideString WideString::FromDefANSI(ByteStringView bstr) {
912 int src_len = bstr.GetLength();
913 int dest_len = FXSYS_MultiByteToWideChar(
914 FX_CODEPAGE_DefANSI, 0, bstr.unterminated_c_str(), src_len, nullptr, 0);
915 if (!dest_len)
916 return WideString();
917
918 WideString wstr;
919 {
920 // Span's lifetime must end before ReleaseBuffer() below.
921 pdfium::span<wchar_t> dest_buf = wstr.GetBuffer(dest_len);
922 FXSYS_MultiByteToWideChar(FX_CODEPAGE_DefANSI, 0, bstr.unterminated_c_str(),
923 src_len, dest_buf.data(), dest_len);
924 }
925 wstr.ReleaseBuffer(dest_len);
926 return wstr;
927 }
928
929 // static
FromUTF8(ByteStringView str)930 WideString WideString::FromUTF8(ByteStringView str) {
931 return FX_UTF8Decode(str);
932 }
933
934 // static
FromUTF16LE(const unsigned short * wstr,size_t wlen)935 WideString WideString::FromUTF16LE(const unsigned short* wstr, size_t wlen) {
936 if (!wstr || wlen == 0)
937 return WideString();
938
939 WideString result;
940 {
941 // Span's lifetime must end before ReleaseBuffer() below.
942 pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
943 for (size_t i = 0; i < wlen; i++)
944 buf[i] = wstr[i];
945 }
946 result.ReleaseBuffer(wlen);
947 return result;
948 }
949
FromUTF16BE(const unsigned short * wstr,size_t wlen)950 WideString WideString::FromUTF16BE(const unsigned short* wstr, size_t wlen) {
951 if (!wstr || wlen == 0)
952 return WideString();
953
954 WideString result;
955 {
956 // Span's lifetime must end before ReleaseBuffer() below.
957 pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
958 for (size_t i = 0; i < wlen; i++) {
959 auto wch = wstr[i];
960 wch = (wch >> 8) | (wch << 8);
961 buf[i] = wch;
962 }
963 }
964 result.ReleaseBuffer(wlen);
965 return result;
966 }
967
SetAt(size_t index,wchar_t c)968 void WideString::SetAt(size_t index, wchar_t c) {
969 ASSERT(IsValidIndex(index));
970 ReallocBeforeWrite(m_pData->m_nDataLength);
971 m_pData->m_String[index] = c;
972 }
973
Compare(const wchar_t * str) const974 int WideString::Compare(const wchar_t* str) const {
975 if (m_pData)
976 return str ? wcscmp(m_pData->m_String, str) : 1;
977 return (!str || str[0] == 0) ? 0 : -1;
978 }
979
Compare(const WideString & str) const980 int WideString::Compare(const WideString& str) const {
981 if (!m_pData)
982 return str.m_pData ? -1 : 0;
983 if (!str.m_pData)
984 return 1;
985
986 size_t this_len = m_pData->m_nDataLength;
987 size_t that_len = str.m_pData->m_nDataLength;
988 size_t min_len = std::min(this_len, that_len);
989 int result = wmemcmp(m_pData->m_String, str.m_pData->m_String, min_len);
990 if (result != 0)
991 return result;
992 if (this_len == that_len)
993 return 0;
994 return this_len < that_len ? -1 : 1;
995 }
996
CompareNoCase(const wchar_t * str) const997 int WideString::CompareNoCase(const wchar_t* str) const {
998 if (m_pData)
999 return str ? FXSYS_wcsicmp(m_pData->m_String, str) : 1;
1000 return (!str || str[0] == 0) ? 0 : -1;
1001 }
1002
WStringLength(const unsigned short * str)1003 size_t WideString::WStringLength(const unsigned short* str) {
1004 size_t len = 0;
1005 if (str)
1006 while (str[len])
1007 len++;
1008 return len;
1009 }
1010
Trim()1011 void WideString::Trim() {
1012 TrimRight(kWideTrimChars);
1013 TrimLeft(kWideTrimChars);
1014 }
1015
Trim(wchar_t target)1016 void WideString::Trim(wchar_t target) {
1017 wchar_t str[2] = {target, 0};
1018 TrimRight(str);
1019 TrimLeft(str);
1020 }
1021
Trim(WideStringView targets)1022 void WideString::Trim(WideStringView targets) {
1023 TrimRight(targets);
1024 TrimLeft(targets);
1025 }
1026
TrimLeft()1027 void WideString::TrimLeft() {
1028 TrimLeft(kWideTrimChars);
1029 }
1030
TrimLeft(wchar_t target)1031 void WideString::TrimLeft(wchar_t target) {
1032 wchar_t str[2] = {target, 0};
1033 TrimLeft(str);
1034 }
1035
TrimLeft(WideStringView targets)1036 void WideString::TrimLeft(WideStringView targets) {
1037 if (!m_pData || targets.IsEmpty())
1038 return;
1039
1040 size_t len = GetLength();
1041 if (len == 0)
1042 return;
1043
1044 size_t pos = 0;
1045 while (pos < len) {
1046 size_t i = 0;
1047 while (i < targets.GetLength() &&
1048 targets.CharAt(i) != m_pData->m_String[pos]) {
1049 i++;
1050 }
1051 if (i == targets.GetLength())
1052 break;
1053 pos++;
1054 }
1055 if (!pos)
1056 return;
1057
1058 ReallocBeforeWrite(len);
1059 size_t nDataLength = len - pos;
1060 memmove(m_pData->m_String, m_pData->m_String + pos,
1061 (nDataLength + 1) * sizeof(wchar_t));
1062 m_pData->m_nDataLength = nDataLength;
1063 }
1064
TrimRight()1065 void WideString::TrimRight() {
1066 TrimRight(kWideTrimChars);
1067 }
1068
TrimRight(wchar_t target)1069 void WideString::TrimRight(wchar_t target) {
1070 wchar_t str[2] = {target, 0};
1071 TrimRight(str);
1072 }
1073
TrimRight(WideStringView targets)1074 void WideString::TrimRight(WideStringView targets) {
1075 if (IsEmpty() || targets.IsEmpty())
1076 return;
1077
1078 size_t pos = GetLength();
1079 while (pos && targets.Contains(m_pData->m_String[pos - 1]))
1080 pos--;
1081
1082 if (pos < m_pData->m_nDataLength) {
1083 ReallocBeforeWrite(m_pData->m_nDataLength);
1084 m_pData->m_String[pos] = 0;
1085 m_pData->m_nDataLength = pos;
1086 }
1087 }
1088
GetInteger() const1089 int WideString::GetInteger() const {
1090 return m_pData ? FXSYS_wtoi(m_pData->m_String) : 0;
1091 }
1092
operator <<(std::wostream & os,const WideString & str)1093 std::wostream& operator<<(std::wostream& os, const WideString& str) {
1094 return os.write(str.c_str(), str.GetLength());
1095 }
1096
operator <<(std::ostream & os,const WideString & str)1097 std::ostream& operator<<(std::ostream& os, const WideString& str) {
1098 os << str.ToUTF8();
1099 return os;
1100 }
1101
operator <<(std::wostream & os,WideStringView str)1102 std::wostream& operator<<(std::wostream& os, WideStringView str) {
1103 return os.write(str.unterminated_c_str(), str.GetLength());
1104 }
1105
operator <<(std::ostream & os,WideStringView str)1106 std::ostream& operator<<(std::ostream& os, WideStringView str) {
1107 os << FX_UTF8Encode(str);
1108 return os;
1109 }
1110
1111 } // namespace fxcrt
1112
FX_HashCode_GetW(WideStringView str,bool bIgnoreCase)1113 uint32_t FX_HashCode_GetW(WideStringView str, bool bIgnoreCase) {
1114 uint32_t dwHashCode = 0;
1115 if (bIgnoreCase) {
1116 for (wchar_t c : str) // match FXSYS_towlower() arg type.
1117 dwHashCode = 1313 * dwHashCode + FXSYS_towlower(c);
1118 } else {
1119 for (WideStringView::UnsignedType c : str)
1120 dwHashCode = 1313 * dwHashCode + c;
1121 }
1122 return dwHashCode;
1123 }
1124