1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9
10 #include "base/strings/string_util.h"
11
12 #include <errno.h>
13 #include <math.h>
14 #include <stdarg.h>
15 #include <stdint.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <time.h>
20 #include <wchar.h>
21
22 #include <limits>
23 #include <optional>
24 #include <string_view>
25 #include <type_traits>
26 #include <vector>
27
28 #include "base/check_op.h"
29 #include "base/no_destructor.h"
30 #include "base/ranges/algorithm.h"
31 #include "base/strings/string_util_impl_helpers.h"
32 #include "base/strings/string_util_internal.h"
33 #include "base/strings/utf_string_conversion_utils.h"
34 #include "base/strings/utf_string_conversions.h"
35 #include "base/third_party/icu/icu_utf.h"
36 #include "build/build_config.h"
37
38 namespace base {
39
IsWprintfFormatPortable(const wchar_t * format)40 bool IsWprintfFormatPortable(const wchar_t* format) {
41 for (const wchar_t* position = format; *position != '\0'; ++position) {
42 if (*position == '%') {
43 bool in_specification = true;
44 bool modifier_l = false;
45 while (in_specification) {
46 // Eat up characters until reaching a known specifier.
47 if (*++position == '\0') {
48 // The format string ended in the middle of a specification. Call
49 // it portable because no unportable specifications were found. The
50 // string is equally broken on all platforms.
51 return true;
52 }
53
54 if (*position == 'l') {
55 // 'l' is the only thing that can save the 's' and 'c' specifiers.
56 modifier_l = true;
57 } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
58 *position == 'S' || *position == 'C' || *position == 'F' ||
59 *position == 'D' || *position == 'O' || *position == 'U') {
60 // Not portable.
61 return false;
62 }
63
64 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
65 // Portable, keep scanning the rest of the format string.
66 in_specification = false;
67 }
68 }
69 }
70 }
71
72 return true;
73 }
74
ToLowerASCII(std::string_view str)75 std::string ToLowerASCII(std::string_view str) {
76 return internal::ToLowerASCIIImpl(str);
77 }
78
ToLowerASCII(std::u16string_view str)79 std::u16string ToLowerASCII(std::u16string_view str) {
80 return internal::ToLowerASCIIImpl(str);
81 }
82
ToUpperASCII(std::string_view str)83 std::string ToUpperASCII(std::string_view str) {
84 return internal::ToUpperASCIIImpl(str);
85 }
86
ToUpperASCII(std::u16string_view str)87 std::u16string ToUpperASCII(std::u16string_view str) {
88 return internal::ToUpperASCIIImpl(str);
89 }
90
EmptyString()91 const std::string& EmptyString() {
92 static const base::NoDestructor<std::string> s;
93 return *s;
94 }
95
EmptyString16()96 const std::u16string& EmptyString16() {
97 static const base::NoDestructor<std::u16string> s16;
98 return *s16;
99 }
100
ReplaceChars(std::u16string_view input,std::u16string_view replace_chars,std::u16string_view replace_with,std::u16string * output)101 bool ReplaceChars(std::u16string_view input,
102 std::u16string_view replace_chars,
103 std::u16string_view replace_with,
104 std::u16string* output) {
105 return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
106 }
107
ReplaceChars(std::string_view input,std::string_view replace_chars,std::string_view replace_with,std::string * output)108 bool ReplaceChars(std::string_view input,
109 std::string_view replace_chars,
110 std::string_view replace_with,
111 std::string* output) {
112 return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
113 }
114
RemoveChars(std::u16string_view input,std::u16string_view remove_chars,std::u16string * output)115 bool RemoveChars(std::u16string_view input,
116 std::u16string_view remove_chars,
117 std::u16string* output) {
118 return internal::ReplaceCharsT(input, remove_chars, std::u16string_view(),
119 output);
120 }
121
RemoveChars(std::string_view input,std::string_view remove_chars,std::string * output)122 bool RemoveChars(std::string_view input,
123 std::string_view remove_chars,
124 std::string* output) {
125 return internal::ReplaceCharsT(input, remove_chars, std::string_view(),
126 output);
127 }
128
TrimString(std::u16string_view input,std::u16string_view trim_chars,std::u16string * output)129 bool TrimString(std::u16string_view input,
130 std::u16string_view trim_chars,
131 std::u16string* output) {
132 return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
133 TRIM_NONE;
134 }
135
TrimString(std::string_view input,std::string_view trim_chars,std::string * output)136 bool TrimString(std::string_view input,
137 std::string_view trim_chars,
138 std::string* output) {
139 return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
140 TRIM_NONE;
141 }
142
TrimString(std::u16string_view input,std::u16string_view trim_chars,TrimPositions positions)143 std::u16string_view TrimString(std::u16string_view input,
144 std::u16string_view trim_chars,
145 TrimPositions positions) {
146 return internal::TrimStringPieceT(input, trim_chars, positions);
147 }
148
TrimString(std::string_view input,std::string_view trim_chars,TrimPositions positions)149 std::string_view TrimString(std::string_view input,
150 std::string_view trim_chars,
151 TrimPositions positions) {
152 return internal::TrimStringPieceT(input, trim_chars, positions);
153 }
154
TruncateUTF8ToByteSize(const std::string & input,const size_t byte_size,std::string * output)155 void TruncateUTF8ToByteSize(const std::string& input,
156 const size_t byte_size,
157 std::string* output) {
158 DCHECK(output);
159 if (byte_size > input.length()) {
160 *output = input;
161 return;
162 }
163 DCHECK_LE(byte_size,
164 static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
165 // Note: This cast is necessary because CBU8_NEXT uses int32_ts.
166 int32_t truncation_length = static_cast<int32_t>(byte_size);
167 int32_t char_index = truncation_length - 1;
168 const char* data = input.data();
169
170 // Using CBU8, we will move backwards from the truncation point
171 // to the beginning of the string looking for a valid UTF8
172 // character. Once a full UTF8 character is found, we will
173 // truncate the string to the end of that character.
174 while (char_index >= 0) {
175 int32_t prev = char_index;
176 base_icu::UChar32 code_point = 0;
177 CBU8_NEXT(reinterpret_cast<const uint8_t*>(data), char_index,
178 truncation_length, code_point);
179 if (!IsValidCharacter(code_point)) {
180 char_index = prev - 1;
181 } else {
182 break;
183 }
184 }
185
186 if (char_index >= 0 )
187 *output = input.substr(0, static_cast<size_t>(char_index));
188 else
189 output->clear();
190 }
191
TrimWhitespace(std::u16string_view input,TrimPositions positions,std::u16string * output)192 TrimPositions TrimWhitespace(std::u16string_view input,
193 TrimPositions positions,
194 std::u16string* output) {
195 return internal::TrimStringT(input, std::u16string_view(kWhitespaceUTF16),
196 positions, output);
197 }
198
TrimWhitespace(std::u16string_view input,TrimPositions positions)199 std::u16string_view TrimWhitespace(std::u16string_view input,
200 TrimPositions positions) {
201 return internal::TrimStringPieceT(
202 input, std::u16string_view(kWhitespaceUTF16), positions);
203 }
204
TrimWhitespaceASCII(std::string_view input,TrimPositions positions,std::string * output)205 TrimPositions TrimWhitespaceASCII(std::string_view input,
206 TrimPositions positions,
207 std::string* output) {
208 return internal::TrimStringT(input, std::string_view(kWhitespaceASCII),
209 positions, output);
210 }
211
TrimWhitespaceASCII(std::string_view input,TrimPositions positions)212 std::string_view TrimWhitespaceASCII(std::string_view input,
213 TrimPositions positions) {
214 return internal::TrimStringPieceT(input, std::string_view(kWhitespaceASCII),
215 positions);
216 }
217
CollapseWhitespace(std::u16string_view text,bool trim_sequences_with_line_breaks)218 std::u16string CollapseWhitespace(std::u16string_view text,
219 bool trim_sequences_with_line_breaks) {
220 return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
221 }
222
CollapseWhitespaceASCII(std::string_view text,bool trim_sequences_with_line_breaks)223 std::string CollapseWhitespaceASCII(std::string_view text,
224 bool trim_sequences_with_line_breaks) {
225 return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
226 }
227
ContainsOnlyChars(std::string_view input,std::string_view characters)228 bool ContainsOnlyChars(std::string_view input, std::string_view characters) {
229 return input.find_first_not_of(characters) == std::string_view::npos;
230 }
231
ContainsOnlyChars(std::u16string_view input,std::u16string_view characters)232 bool ContainsOnlyChars(std::u16string_view input,
233 std::u16string_view characters) {
234 return input.find_first_not_of(characters) == std::u16string_view::npos;
235 }
236
IsStringASCII(std::string_view str)237 bool IsStringASCII(std::string_view str) {
238 return internal::DoIsStringASCII(str.data(), str.length());
239 }
240
IsStringASCII(std::u16string_view str)241 bool IsStringASCII(std::u16string_view str) {
242 return internal::DoIsStringASCII(str.data(), str.length());
243 }
244
245 #if defined(WCHAR_T_IS_32_BIT)
IsStringASCII(std::wstring_view str)246 bool IsStringASCII(std::wstring_view str) {
247 return internal::DoIsStringASCII(str.data(), str.length());
248 }
249 #endif
250
IsStringUTF8(std::string_view str)251 bool IsStringUTF8(std::string_view str) {
252 return internal::DoIsStringUTF8<IsValidCharacter>(str);
253 }
254
IsStringUTF8AllowingNoncharacters(std::string_view str)255 bool IsStringUTF8AllowingNoncharacters(std::string_view str) {
256 return internal::DoIsStringUTF8<IsValidCodepoint>(str);
257 }
258
EqualsASCII(std::u16string_view str,std::string_view ascii)259 bool EqualsASCII(std::u16string_view str, std::string_view ascii) {
260 return ranges::equal(ascii, str);
261 }
262
StartsWith(std::string_view str,std::string_view search_for,CompareCase case_sensitivity)263 bool StartsWith(std::string_view str,
264 std::string_view search_for,
265 CompareCase case_sensitivity) {
266 return internal::StartsWithT(str, search_for, case_sensitivity);
267 }
268
StartsWith(std::u16string_view str,std::u16string_view search_for,CompareCase case_sensitivity)269 bool StartsWith(std::u16string_view str,
270 std::u16string_view search_for,
271 CompareCase case_sensitivity) {
272 return internal::StartsWithT(str, search_for, case_sensitivity);
273 }
274
EndsWith(std::string_view str,std::string_view search_for,CompareCase case_sensitivity)275 bool EndsWith(std::string_view str,
276 std::string_view search_for,
277 CompareCase case_sensitivity) {
278 return internal::EndsWithT(str, search_for, case_sensitivity);
279 }
280
EndsWith(std::u16string_view str,std::u16string_view search_for,CompareCase case_sensitivity)281 bool EndsWith(std::u16string_view str,
282 std::u16string_view search_for,
283 CompareCase case_sensitivity) {
284 return internal::EndsWithT(str, search_for, case_sensitivity);
285 }
286
HexDigitToInt(char c)287 char HexDigitToInt(char c) {
288 DCHECK(IsHexDigit(c));
289 if (c >= '0' && c <= '9')
290 return static_cast<char>(c - '0');
291 return (c >= 'A' && c <= 'F') ? static_cast<char>(c - 'A' + 10)
292 : static_cast<char>(c - 'a' + 10);
293 }
294
295 static const char* const kByteStringsUnlocalized[] = {
296 " B",
297 " kB",
298 " MB",
299 " GB",
300 " TB",
301 " PB"
302 };
303
FormatBytesUnlocalized(int64_t bytes)304 std::u16string FormatBytesUnlocalized(int64_t bytes) {
305 double unit_amount = static_cast<double>(bytes);
306 size_t dimension = 0;
307 const int kKilo = 1024;
308 while (unit_amount >= kKilo &&
309 dimension < std::size(kByteStringsUnlocalized) - 1) {
310 unit_amount /= kKilo;
311 dimension++;
312 }
313
314 char buf[64];
315 if (bytes != 0 && dimension > 0 && unit_amount < 100) {
316 base::snprintf(buf, std::size(buf), "%.1lf%s", unit_amount,
317 kByteStringsUnlocalized[dimension]);
318 } else {
319 base::snprintf(buf, std::size(buf), "%.0lf%s", unit_amount,
320 kByteStringsUnlocalized[dimension]);
321 }
322
323 return ASCIIToUTF16(buf);
324 }
325
ReplaceFirstSubstringAfterOffset(std::u16string * str,size_t start_offset,std::u16string_view find_this,std::u16string_view replace_with)326 void ReplaceFirstSubstringAfterOffset(std::u16string* str,
327 size_t start_offset,
328 std::u16string_view find_this,
329 std::u16string_view replace_with) {
330 internal::DoReplaceMatchesAfterOffset(
331 str, start_offset, internal::MakeSubstringMatcher(find_this),
332 replace_with, internal::ReplaceType::REPLACE_FIRST);
333 }
334
ReplaceFirstSubstringAfterOffset(std::string * str,size_t start_offset,std::string_view find_this,std::string_view replace_with)335 void ReplaceFirstSubstringAfterOffset(std::string* str,
336 size_t start_offset,
337 std::string_view find_this,
338 std::string_view replace_with) {
339 internal::DoReplaceMatchesAfterOffset(
340 str, start_offset, internal::MakeSubstringMatcher(find_this),
341 replace_with, internal::ReplaceType::REPLACE_FIRST);
342 }
343
ReplaceSubstringsAfterOffset(std::u16string * str,size_t start_offset,std::u16string_view find_this,std::u16string_view replace_with)344 void ReplaceSubstringsAfterOffset(std::u16string* str,
345 size_t start_offset,
346 std::u16string_view find_this,
347 std::u16string_view replace_with) {
348 internal::DoReplaceMatchesAfterOffset(
349 str, start_offset, internal::MakeSubstringMatcher(find_this),
350 replace_with, internal::ReplaceType::REPLACE_ALL);
351 }
352
ReplaceSubstringsAfterOffset(std::string * str,size_t start_offset,std::string_view find_this,std::string_view replace_with)353 void ReplaceSubstringsAfterOffset(std::string* str,
354 size_t start_offset,
355 std::string_view find_this,
356 std::string_view replace_with) {
357 internal::DoReplaceMatchesAfterOffset(
358 str, start_offset, internal::MakeSubstringMatcher(find_this),
359 replace_with, internal::ReplaceType::REPLACE_ALL);
360 }
361
WriteInto(std::string * str,size_t length_with_null)362 char* WriteInto(std::string* str, size_t length_with_null) {
363 return internal::WriteIntoT(str, length_with_null);
364 }
365
WriteInto(std::u16string * str,size_t length_with_null)366 char16_t* WriteInto(std::u16string* str, size_t length_with_null) {
367 return internal::WriteIntoT(str, length_with_null);
368 }
369
JoinString(span<const std::string> parts,std::string_view separator)370 std::string JoinString(span<const std::string> parts,
371 std::string_view separator) {
372 return internal::JoinStringT(parts, separator);
373 }
374
JoinString(span<const std::u16string> parts,std::u16string_view separator)375 std::u16string JoinString(span<const std::u16string> parts,
376 std::u16string_view separator) {
377 return internal::JoinStringT(parts, separator);
378 }
379
JoinString(span<const std::string_view> parts,std::string_view separator)380 std::string JoinString(span<const std::string_view> parts,
381 std::string_view separator) {
382 return internal::JoinStringT(parts, separator);
383 }
384
JoinString(span<const std::u16string_view> parts,std::u16string_view separator)385 std::u16string JoinString(span<const std::u16string_view> parts,
386 std::u16string_view separator) {
387 return internal::JoinStringT(parts, separator);
388 }
389
JoinString(std::initializer_list<std::string_view> parts,std::string_view separator)390 std::string JoinString(std::initializer_list<std::string_view> parts,
391 std::string_view separator) {
392 return internal::JoinStringT(parts, separator);
393 }
394
JoinString(std::initializer_list<std::u16string_view> parts,std::u16string_view separator)395 std::u16string JoinString(std::initializer_list<std::u16string_view> parts,
396 std::u16string_view separator) {
397 return internal::JoinStringT(parts, separator);
398 }
399
ReplaceStringPlaceholders(std::u16string_view format_string,const std::vector<std::u16string> & subst,std::vector<size_t> * offsets)400 std::u16string ReplaceStringPlaceholders(
401 std::u16string_view format_string,
402 const std::vector<std::u16string>& subst,
403 std::vector<size_t>* offsets) {
404 std::optional<std::u16string> replacement =
405 internal::DoReplaceStringPlaceholders(
406 format_string, subst,
407 /*placeholder_prefix*/ u'$',
408 /*should_escape_multiple_placeholder_prefixes*/ true,
409 /*is_strict_mode*/ false, offsets);
410
411 return std::move(replacement).value();
412 }
413
ReplaceStringPlaceholders(std::string_view format_string,const std::vector<std::string> & subst,std::vector<size_t> * offsets)414 std::string ReplaceStringPlaceholders(std::string_view format_string,
415 const std::vector<std::string>& subst,
416 std::vector<size_t>* offsets) {
417 std::optional<std::string> replacement =
418 internal::DoReplaceStringPlaceholders(
419 format_string, subst,
420 /*placeholder_prefix*/ '$',
421 /*should_escape_multiple_placeholder_prefixes*/ true,
422 /*is_strict_mode*/ false, offsets);
423
424 return std::move(replacement).value();
425 }
426
ReplaceStringPlaceholders(const std::u16string & format_string,const std::u16string & a,size_t * offset)427 std::u16string ReplaceStringPlaceholders(const std::u16string& format_string,
428 const std::u16string& a,
429 size_t* offset) {
430 std::vector<size_t> offsets;
431 std::u16string result =
432 ReplaceStringPlaceholders(format_string, {a}, &offsets);
433
434 DCHECK_EQ(1U, offsets.size());
435 if (offset)
436 *offset = offsets[0];
437 return result;
438 }
439
strlcpy(span<char> dst,std::string_view src)440 size_t strlcpy(span<char> dst, std::string_view src) {
441 return internal::lcpyT(dst, src);
442 }
443
u16cstrlcpy(span<char16_t> dst,std::u16string_view src)444 size_t u16cstrlcpy(span<char16_t> dst, std::u16string_view src) {
445 return internal::lcpyT(dst, src);
446 }
447
wcslcpy(span<wchar_t> dst,std::wstring_view src)448 size_t wcslcpy(span<wchar_t> dst, std::wstring_view src) {
449 return internal::lcpyT(dst, src);
450 }
451
strlcpy(char * dst,const char * src,size_t dst_size)452 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
453 return internal::lcpyT(
454 UNSAFE_TODO(base::span(dst, dst_size), std::string_view(src)));
455 }
456
u16cstrlcpy(char16_t * dst,const char16_t * src,size_t dst_size)457 size_t u16cstrlcpy(char16_t* dst, const char16_t* src, size_t dst_size) {
458 return internal::lcpyT(UNSAFE_TODO(base::span(dst, dst_size)),
459 std::u16string_view(src));
460 }
461
wcslcpy(wchar_t * dst,const wchar_t * src,size_t dst_size)462 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
463 return internal::lcpyT(UNSAFE_TODO(base::span(dst, dst_size)),
464 std::wstring_view(src));
465 }
466
467 } // namespace base
468