1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/bytestring.h"
8
9 #include <ctype.h>
10 #include <stddef.h>
11
12 #include <algorithm>
13 #include <sstream>
14 #include <string>
15 #include <utility>
16
17 #include "core/fxcrt/check.h"
18 #include "core/fxcrt/check_op.h"
19 #include "core/fxcrt/fx_codepage.h"
20 #include "core/fxcrt/fx_extension.h"
21 #include "core/fxcrt/fx_memcpy_wrappers.h"
22 #include "core/fxcrt/fx_safe_types.h"
23 #include "core/fxcrt/fx_system.h"
24 #include "core/fxcrt/span.h"
25 #include "core/fxcrt/span_util.h"
26 #include "core/fxcrt/string_pool_template.h"
27
28 // Instantiate.
29 template class fxcrt::StringViewTemplate<char>;
30 template class fxcrt::StringPoolTemplate<ByteString>;
31 template struct std::hash<ByteString>;
32
33 namespace {
34
35 constexpr char kTrimChars[] = "\x09\x0a\x0b\x0c\x0d\x20";
36
37 } // namespace
38
39 namespace fxcrt {
40
41 static_assert(sizeof(ByteString) <= sizeof(char*),
42 "Strings must not require more space than pointers");
43
44 // static
FormatInteger(int i)45 ByteString ByteString::FormatInteger(int i) {
46 char buf[32];
47 FXSYS_snprintf(buf, sizeof(buf), "%d", i);
48 return ByteString(buf);
49 }
50
51 // static
FormatV(const char * pFormat,va_list argList)52 ByteString ByteString::FormatV(const char* pFormat, va_list argList) {
53 va_list argListCopy;
54 va_copy(argListCopy, argList);
55 int nMaxLen = vsnprintf(nullptr, 0, pFormat, argListCopy);
56 va_end(argListCopy);
57
58 if (nMaxLen <= 0)
59 return ByteString();
60
61 ByteString ret;
62 {
63 // Span's lifetime must end before ReleaseBuffer() below.
64 pdfium::span<char> buf = ret.GetBuffer(nMaxLen);
65
66 // SAFETY: In the following two calls, there's always space in the buffer
67 // for a terminating NUL that's not included in nMaxLen, and hence not
68 // included in the span.
69 UNSAFE_BUFFERS(FXSYS_memset(buf.data(), 0, nMaxLen + 1));
70 va_copy(argListCopy, argList);
71 vsnprintf(buf.data(), nMaxLen + 1, pFormat, argListCopy);
72 va_end(argListCopy);
73 }
74 ret.ReleaseBuffer(ret.GetStringLength());
75 return ret;
76 }
77
78 // static
Format(const char * pFormat,...)79 ByteString ByteString::Format(const char* pFormat, ...) {
80 va_list argList;
81 va_start(argList, pFormat);
82 ByteString ret = FormatV(pFormat, argList);
83 va_end(argList);
84
85 return ret;
86 }
87
ByteString(const char * pStr,size_t nLen)88 ByteString::ByteString(const char* pStr, size_t nLen) {
89 if (nLen) {
90 // SAFETY: caller ensures `pStr` points to at least `nLen` chars.
91 m_pData = StringData::Create(UNSAFE_BUFFERS(pdfium::make_span(pStr, nLen)));
92 }
93 }
94
ByteString(const uint8_t * pStr,size_t nLen)95 ByteString::ByteString(const uint8_t* pStr, size_t nLen)
96 : ByteString(reinterpret_cast<const char*>(pStr), nLen) {}
97
ByteString(char ch)98 ByteString::ByteString(char ch) {
99 m_pData = StringData::Create(1);
100 m_pData->m_String[0] = ch;
101 }
102
ByteString(const char * ptr)103 ByteString::ByteString(const char* ptr)
104 : ByteString(ptr, ptr ? strlen(ptr) : 0) {}
105
ByteString(ByteStringView bstrc)106 ByteString::ByteString(ByteStringView bstrc) {
107 if (!bstrc.IsEmpty()) {
108 m_pData = StringData::Create(bstrc.span());
109 }
110 }
111
ByteString(ByteStringView str1,ByteStringView str2)112 ByteString::ByteString(ByteStringView str1, ByteStringView str2) {
113 FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
114 nSafeLen += str2.GetLength();
115
116 size_t nNewLen = nSafeLen.ValueOrDie();
117 if (nNewLen == 0)
118 return;
119
120 m_pData = StringData::Create(nNewLen);
121 m_pData->CopyContents(str1.span());
122 m_pData->CopyContentsAt(str1.GetLength(), str2.span());
123 }
124
ByteString(const std::initializer_list<ByteStringView> & list)125 ByteString::ByteString(const std::initializer_list<ByteStringView>& list) {
126 FX_SAFE_SIZE_T nSafeLen = 0;
127 for (const auto& item : list)
128 nSafeLen += item.GetLength();
129
130 size_t nNewLen = nSafeLen.ValueOrDie();
131 if (nNewLen == 0)
132 return;
133
134 m_pData = StringData::Create(nNewLen);
135
136 size_t nOffset = 0;
137 for (const auto& item : list) {
138 m_pData->CopyContentsAt(nOffset, item.span());
139 nOffset += item.GetLength();
140 }
141 }
142
ByteString(const fxcrt::ostringstream & outStream)143 ByteString::ByteString(const fxcrt::ostringstream& outStream) {
144 auto str = outStream.str();
145 if (!str.empty()) {
146 m_pData = StringData::Create(pdfium::make_span(str));
147 }
148 }
149
operator =(const char * str)150 ByteString& ByteString::operator=(const char* str) {
151 if (!str || !str[0])
152 clear();
153 else
154 AssignCopy(str, strlen(str));
155
156 return *this;
157 }
158
operator =(ByteStringView str)159 ByteString& ByteString::operator=(ByteStringView str) {
160 if (str.IsEmpty())
161 clear();
162 else
163 AssignCopy(str.unterminated_c_str(), str.GetLength());
164
165 return *this;
166 }
167
operator =(const ByteString & that)168 ByteString& ByteString::operator=(const ByteString& that) {
169 if (m_pData != that.m_pData)
170 m_pData = that.m_pData;
171
172 return *this;
173 }
174
operator =(ByteString && that)175 ByteString& ByteString::operator=(ByteString&& that) noexcept {
176 if (m_pData != that.m_pData)
177 m_pData = std::move(that.m_pData);
178
179 return *this;
180 }
181
operator +=(const char * str)182 ByteString& ByteString::operator+=(const char* str) {
183 if (str)
184 Concat(str, strlen(str));
185
186 return *this;
187 }
188
operator +=(char ch)189 ByteString& ByteString::operator+=(char ch) {
190 Concat(&ch, 1);
191 return *this;
192 }
193
operator +=(const ByteString & str)194 ByteString& ByteString::operator+=(const ByteString& str) {
195 if (str.m_pData)
196 Concat(str.m_pData->m_String, str.m_pData->m_nDataLength);
197
198 return *this;
199 }
200
operator +=(ByteStringView str)201 ByteString& ByteString::operator+=(ByteStringView str) {
202 if (!str.IsEmpty())
203 Concat(str.unterminated_c_str(), str.GetLength());
204
205 return *this;
206 }
207
operator ==(const char * ptr) const208 bool ByteString::operator==(const char* ptr) const {
209 if (!m_pData)
210 return !ptr || !ptr[0];
211
212 if (!ptr)
213 return m_pData->m_nDataLength == 0;
214
215 // SAFETY: `m_nDataLength` is within `m_String`, and the strlen() call
216 // ensures there are `m_nDataLength` bytes at `ptr` before the terminator.
217 return strlen(ptr) == m_pData->m_nDataLength &&
218 UNSAFE_BUFFERS(
219 FXSYS_memcmp(ptr, m_pData->m_String, m_pData->m_nDataLength)) == 0;
220 }
221
operator ==(ByteStringView str) const222 bool ByteString::operator==(ByteStringView str) const {
223 if (!m_pData)
224 return str.IsEmpty();
225
226 // SAFETY: `str` has `GetLength()` valid bytes in `unterminated_c_str()`,
227 // `m_nDataLength` is within `m_String`, and equality comparison.
228 return m_pData->m_nDataLength == str.GetLength() &&
229 UNSAFE_BUFFERS(FXSYS_memcmp(
230 m_pData->m_String, str.unterminated_c_str(), str.GetLength())) ==
231 0;
232 }
233
operator ==(const ByteString & other) const234 bool ByteString::operator==(const ByteString& other) const {
235 if (m_pData == other.m_pData)
236 return true;
237
238 if (IsEmpty())
239 return other.IsEmpty();
240
241 if (other.IsEmpty())
242 return false;
243
244 return other.m_pData->m_nDataLength == m_pData->m_nDataLength &&
245 memcmp(other.m_pData->m_String, m_pData->m_String,
246 m_pData->m_nDataLength) == 0;
247 }
248
operator <(const char * ptr) const249 bool ByteString::operator<(const char* ptr) const {
250 if (!m_pData && !ptr)
251 return false;
252 if (c_str() == ptr)
253 return false;
254
255 size_t len = GetLength();
256 size_t other_len = ptr ? strlen(ptr) : 0;
257
258 // SAFETY: Comparison limited to minimum valid length of either argument.
259 int result =
260 UNSAFE_BUFFERS(FXSYS_memcmp(c_str(), ptr, std::min(len, other_len)));
261 return result < 0 || (result == 0 && len < other_len);
262 }
263
operator <(ByteStringView str) const264 bool ByteString::operator<(ByteStringView str) const {
265 return Compare(str) < 0;
266 }
267
operator <(const ByteString & other) const268 bool ByteString::operator<(const ByteString& other) const {
269 if (m_pData == other.m_pData)
270 return false;
271
272 size_t len = GetLength();
273 size_t other_len = other.GetLength();
274
275 // SAFETY: Comparison limited to minimum valid length of either argument.
276 int result = UNSAFE_BUFFERS(
277 FXSYS_memcmp(c_str(), other.c_str(), std::min(len, other_len)));
278 return result < 0 || (result == 0 && len < other_len);
279 }
280
EqualNoCase(ByteStringView str) const281 bool ByteString::EqualNoCase(ByteStringView str) const {
282 if (!m_pData) {
283 return str.IsEmpty();
284 }
285 if (m_pData->m_nDataLength != str.GetLength()) {
286 return false;
287 }
288 pdfium::span<const uint8_t> this_span = pdfium::as_bytes(m_pData->span());
289 pdfium::span<const uint8_t> that_span = str.unsigned_span();
290 while (!this_span.empty()) {
291 uint8_t this_char = this_span.front();
292 uint8_t that_char = that_span.front();
293 if (this_char != that_char && tolower(this_char) != tolower(that_char)) {
294 return false;
295 }
296 this_span = this_span.subspan(1);
297 that_span = that_span.subspan(1);
298 }
299 return true;
300 }
301
ReferenceCountForTesting() const302 intptr_t ByteString::ReferenceCountForTesting() const {
303 return m_pData ? m_pData->m_nRefs : 0;
304 }
305
Substr(size_t offset) const306 ByteString ByteString::Substr(size_t offset) const {
307 // Unsigned underflow is well-defined and out-of-range is handled by Substr().
308 return Substr(offset, GetLength() - offset);
309 }
310
Substr(size_t first,size_t count) const311 ByteString ByteString::Substr(size_t first, size_t count) const {
312 if (!m_pData) {
313 return ByteString();
314 }
315 if (first == 0 && count == m_pData->m_nDataLength) {
316 return *this;
317 }
318 return ByteString(AsStringView().Substr(first, count));
319 }
320
First(size_t count) const321 ByteString ByteString::First(size_t count) const {
322 return Substr(0, count);
323 }
324
Last(size_t count) const325 ByteString ByteString::Last(size_t count) const {
326 // Unsigned underflow is well-defined and out-of-range is handled by Substr().
327 return Substr(GetLength() - count, count);
328 }
329
MakeLower()330 void ByteString::MakeLower() {
331 if (IsEmpty())
332 return;
333
334 ReallocBeforeWrite(m_pData->m_nDataLength);
335 FXSYS_strlwr(m_pData->m_String);
336 }
337
MakeUpper()338 void ByteString::MakeUpper() {
339 if (IsEmpty())
340 return;
341
342 ReallocBeforeWrite(m_pData->m_nDataLength);
343 FXSYS_strupr(m_pData->m_String);
344 }
345
Compare(ByteStringView str) const346 int ByteString::Compare(ByteStringView str) const {
347 if (!m_pData)
348 return str.IsEmpty() ? 0 : -1;
349
350 size_t this_len = m_pData->m_nDataLength;
351 size_t that_len = str.GetLength();
352 size_t min_len = std::min(this_len, that_len);
353
354 // SAFETY: Comparison limited to minimum valid length of either argument.
355 int result = UNSAFE_BUFFERS(
356 FXSYS_memcmp(m_pData->m_String, str.unterminated_c_str(), min_len));
357 if (result != 0)
358 return result;
359 if (this_len == that_len)
360 return 0;
361 return this_len < that_len ? -1 : 1;
362 }
363
TrimWhitespace()364 void ByteString::TrimWhitespace() {
365 TrimWhitespaceBack();
366 TrimWhitespaceFront();
367 }
368
TrimWhitespaceFront()369 void ByteString::TrimWhitespaceFront() {
370 TrimFront(kTrimChars);
371 }
372
TrimWhitespaceBack()373 void ByteString::TrimWhitespaceBack() {
374 TrimBack(kTrimChars);
375 }
376
operator <<(std::ostream & os,const ByteString & str)377 std::ostream& operator<<(std::ostream& os, const ByteString& str) {
378 return os.write(str.c_str(), str.GetLength());
379 }
380
operator <<(std::ostream & os,ByteStringView str)381 std::ostream& operator<<(std::ostream& os, ByteStringView str) {
382 return os.write(str.unterminated_c_str(), str.GetLength());
383 }
384
385 } // namespace fxcrt
386
FX_HashCode_GetA(ByteStringView str)387 uint32_t FX_HashCode_GetA(ByteStringView str) {
388 uint32_t dwHashCode = 0;
389 for (ByteStringView::UnsignedType c : str)
390 dwHashCode = 31 * dwHashCode + c;
391 return dwHashCode;
392 }
393
FX_HashCode_GetLoweredA(ByteStringView str)394 uint32_t FX_HashCode_GetLoweredA(ByteStringView str) {
395 uint32_t dwHashCode = 0;
396 for (ByteStringView::UnsignedType c : str)
397 dwHashCode = 31 * dwHashCode + tolower(c);
398 return dwHashCode;
399 }
400
FX_HashCode_GetAsIfW(ByteStringView str)401 uint32_t FX_HashCode_GetAsIfW(ByteStringView str) {
402 uint32_t dwHashCode = 0;
403 for (ByteStringView::UnsignedType c : str)
404 dwHashCode = 1313 * dwHashCode + c;
405 return dwHashCode;
406 }
407
FX_HashCode_GetLoweredAsIfW(ByteStringView str)408 uint32_t FX_HashCode_GetLoweredAsIfW(ByteStringView str) {
409 uint32_t dwHashCode = 0;
410 for (ByteStringView::UnsignedType c : str)
411 dwHashCode = 1313 * dwHashCode + FXSYS_towlower(c);
412 return dwHashCode;
413 }
414