• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 #include "base/strings/string_util.h"
11 
12 #include <errno.h>
13 #include <math.h>
14 #include <stdarg.h>
15 #include <stdint.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <time.h>
20 #include <wchar.h>
21 
22 #include <limits>
23 #include <optional>
24 #include <string_view>
25 #include <type_traits>
26 #include <vector>
27 
28 #include "base/check_op.h"
29 #include "base/no_destructor.h"
30 #include "base/ranges/algorithm.h"
31 #include "base/strings/string_util_impl_helpers.h"
32 #include "base/strings/string_util_internal.h"
33 #include "base/strings/utf_string_conversion_utils.h"
34 #include "base/strings/utf_string_conversions.h"
35 #include "base/third_party/icu/icu_utf.h"
36 #include "build/build_config.h"
37 
38 namespace base {
39 
IsWprintfFormatPortable(const wchar_t * format)40 bool IsWprintfFormatPortable(const wchar_t* format) {
41   for (const wchar_t* position = format; *position != '\0'; ++position) {
42     if (*position == '%') {
43       bool in_specification = true;
44       bool modifier_l = false;
45       while (in_specification) {
46         // Eat up characters until reaching a known specifier.
47         if (*++position == '\0') {
48           // The format string ended in the middle of a specification.  Call
49           // it portable because no unportable specifications were found.  The
50           // string is equally broken on all platforms.
51           return true;
52         }
53 
54         if (*position == 'l') {
55           // 'l' is the only thing that can save the 's' and 'c' specifiers.
56           modifier_l = true;
57         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
58                    *position == 'S' || *position == 'C' || *position == 'F' ||
59                    *position == 'D' || *position == 'O' || *position == 'U') {
60           // Not portable.
61           return false;
62         }
63 
64         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
65           // Portable, keep scanning the rest of the format string.
66           in_specification = false;
67         }
68       }
69     }
70   }
71 
72   return true;
73 }
74 
ToLowerASCII(std::string_view str)75 std::string ToLowerASCII(std::string_view str) {
76   return internal::ToLowerASCIIImpl(str);
77 }
78 
ToLowerASCII(std::u16string_view str)79 std::u16string ToLowerASCII(std::u16string_view str) {
80   return internal::ToLowerASCIIImpl(str);
81 }
82 
ToUpperASCII(std::string_view str)83 std::string ToUpperASCII(std::string_view str) {
84   return internal::ToUpperASCIIImpl(str);
85 }
86 
ToUpperASCII(std::u16string_view str)87 std::u16string ToUpperASCII(std::u16string_view str) {
88   return internal::ToUpperASCIIImpl(str);
89 }
90 
EmptyString()91 const std::string& EmptyString() {
92   static const base::NoDestructor<std::string> s;
93   return *s;
94 }
95 
EmptyString16()96 const std::u16string& EmptyString16() {
97   static const base::NoDestructor<std::u16string> s16;
98   return *s16;
99 }
100 
ReplaceChars(std::u16string_view input,std::u16string_view replace_chars,std::u16string_view replace_with,std::u16string * output)101 bool ReplaceChars(std::u16string_view input,
102                   std::u16string_view replace_chars,
103                   std::u16string_view replace_with,
104                   std::u16string* output) {
105   return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
106 }
107 
ReplaceChars(std::string_view input,std::string_view replace_chars,std::string_view replace_with,std::string * output)108 bool ReplaceChars(std::string_view input,
109                   std::string_view replace_chars,
110                   std::string_view replace_with,
111                   std::string* output) {
112   return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
113 }
114 
RemoveChars(std::u16string_view input,std::u16string_view remove_chars,std::u16string * output)115 bool RemoveChars(std::u16string_view input,
116                  std::u16string_view remove_chars,
117                  std::u16string* output) {
118   return internal::ReplaceCharsT(input, remove_chars, std::u16string_view(),
119                                  output);
120 }
121 
RemoveChars(std::string_view input,std::string_view remove_chars,std::string * output)122 bool RemoveChars(std::string_view input,
123                  std::string_view remove_chars,
124                  std::string* output) {
125   return internal::ReplaceCharsT(input, remove_chars, std::string_view(),
126                                  output);
127 }
128 
TrimString(std::u16string_view input,std::u16string_view trim_chars,std::u16string * output)129 bool TrimString(std::u16string_view input,
130                 std::u16string_view trim_chars,
131                 std::u16string* output) {
132   return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
133          TRIM_NONE;
134 }
135 
TrimString(std::string_view input,std::string_view trim_chars,std::string * output)136 bool TrimString(std::string_view input,
137                 std::string_view trim_chars,
138                 std::string* output) {
139   return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
140          TRIM_NONE;
141 }
142 
TrimString(std::u16string_view input,std::u16string_view trim_chars,TrimPositions positions)143 std::u16string_view TrimString(std::u16string_view input,
144                                std::u16string_view trim_chars,
145                                TrimPositions positions) {
146   return internal::TrimStringPieceT(input, trim_chars, positions);
147 }
148 
TrimString(std::string_view input,std::string_view trim_chars,TrimPositions positions)149 std::string_view TrimString(std::string_view input,
150                             std::string_view trim_chars,
151                             TrimPositions positions) {
152   return internal::TrimStringPieceT(input, trim_chars, positions);
153 }
154 
TruncateUTF8ToByteSize(const std::string & input,const size_t byte_size,std::string * output)155 void TruncateUTF8ToByteSize(const std::string& input,
156                             const size_t byte_size,
157                             std::string* output) {
158   DCHECK(output);
159   if (byte_size > input.length()) {
160     *output = input;
161     return;
162   }
163   DCHECK_LE(byte_size,
164             static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
165   // Note: This cast is necessary because CBU8_NEXT uses int32_ts.
166   int32_t truncation_length = static_cast<int32_t>(byte_size);
167   int32_t char_index = truncation_length - 1;
168   const char* data = input.data();
169 
170   // Using CBU8, we will move backwards from the truncation point
171   // to the beginning of the string looking for a valid UTF8
172   // character.  Once a full UTF8 character is found, we will
173   // truncate the string to the end of that character.
174   while (char_index >= 0) {
175     int32_t prev = char_index;
176     base_icu::UChar32 code_point = 0;
177     CBU8_NEXT(reinterpret_cast<const uint8_t*>(data), char_index,
178               truncation_length, code_point);
179     if (!IsValidCharacter(code_point)) {
180       char_index = prev - 1;
181     } else {
182       break;
183     }
184   }
185 
186   if (char_index >= 0 )
187     *output = input.substr(0, static_cast<size_t>(char_index));
188   else
189     output->clear();
190 }
191 
TrimWhitespace(std::u16string_view input,TrimPositions positions,std::u16string * output)192 TrimPositions TrimWhitespace(std::u16string_view input,
193                              TrimPositions positions,
194                              std::u16string* output) {
195   return internal::TrimStringT(input, std::u16string_view(kWhitespaceUTF16),
196                                positions, output);
197 }
198 
TrimWhitespace(std::u16string_view input,TrimPositions positions)199 std::u16string_view TrimWhitespace(std::u16string_view input,
200                                    TrimPositions positions) {
201   return internal::TrimStringPieceT(
202       input, std::u16string_view(kWhitespaceUTF16), positions);
203 }
204 
TrimWhitespaceASCII(std::string_view input,TrimPositions positions,std::string * output)205 TrimPositions TrimWhitespaceASCII(std::string_view input,
206                                   TrimPositions positions,
207                                   std::string* output) {
208   return internal::TrimStringT(input, std::string_view(kWhitespaceASCII),
209                                positions, output);
210 }
211 
TrimWhitespaceASCII(std::string_view input,TrimPositions positions)212 std::string_view TrimWhitespaceASCII(std::string_view input,
213                                      TrimPositions positions) {
214   return internal::TrimStringPieceT(input, std::string_view(kWhitespaceASCII),
215                                     positions);
216 }
217 
CollapseWhitespace(std::u16string_view text,bool trim_sequences_with_line_breaks)218 std::u16string CollapseWhitespace(std::u16string_view text,
219                                   bool trim_sequences_with_line_breaks) {
220   return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
221 }
222 
CollapseWhitespaceASCII(std::string_view text,bool trim_sequences_with_line_breaks)223 std::string CollapseWhitespaceASCII(std::string_view text,
224                                     bool trim_sequences_with_line_breaks) {
225   return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
226 }
227 
ContainsOnlyChars(std::string_view input,std::string_view characters)228 bool ContainsOnlyChars(std::string_view input, std::string_view characters) {
229   return input.find_first_not_of(characters) == std::string_view::npos;
230 }
231 
ContainsOnlyChars(std::u16string_view input,std::u16string_view characters)232 bool ContainsOnlyChars(std::u16string_view input,
233                        std::u16string_view characters) {
234   return input.find_first_not_of(characters) == std::u16string_view::npos;
235 }
236 
IsStringASCII(std::string_view str)237 bool IsStringASCII(std::string_view str) {
238   return internal::DoIsStringASCII(str.data(), str.length());
239 }
240 
IsStringASCII(std::u16string_view str)241 bool IsStringASCII(std::u16string_view str) {
242   return internal::DoIsStringASCII(str.data(), str.length());
243 }
244 
245 #if defined(WCHAR_T_IS_32_BIT)
IsStringASCII(std::wstring_view str)246 bool IsStringASCII(std::wstring_view str) {
247   return internal::DoIsStringASCII(str.data(), str.length());
248 }
249 #endif
250 
IsStringUTF8(std::string_view str)251 bool IsStringUTF8(std::string_view str) {
252   return internal::DoIsStringUTF8<IsValidCharacter>(str);
253 }
254 
IsStringUTF8AllowingNoncharacters(std::string_view str)255 bool IsStringUTF8AllowingNoncharacters(std::string_view str) {
256   return internal::DoIsStringUTF8<IsValidCodepoint>(str);
257 }
258 
EqualsASCII(std::u16string_view str,std::string_view ascii)259 bool EqualsASCII(std::u16string_view str, std::string_view ascii) {
260   return ranges::equal(ascii, str);
261 }
262 
StartsWith(std::string_view str,std::string_view search_for,CompareCase case_sensitivity)263 bool StartsWith(std::string_view str,
264                 std::string_view search_for,
265                 CompareCase case_sensitivity) {
266   return internal::StartsWithT(str, search_for, case_sensitivity);
267 }
268 
StartsWith(std::u16string_view str,std::u16string_view search_for,CompareCase case_sensitivity)269 bool StartsWith(std::u16string_view str,
270                 std::u16string_view search_for,
271                 CompareCase case_sensitivity) {
272   return internal::StartsWithT(str, search_for, case_sensitivity);
273 }
274 
EndsWith(std::string_view str,std::string_view search_for,CompareCase case_sensitivity)275 bool EndsWith(std::string_view str,
276               std::string_view search_for,
277               CompareCase case_sensitivity) {
278   return internal::EndsWithT(str, search_for, case_sensitivity);
279 }
280 
EndsWith(std::u16string_view str,std::u16string_view search_for,CompareCase case_sensitivity)281 bool EndsWith(std::u16string_view str,
282               std::u16string_view search_for,
283               CompareCase case_sensitivity) {
284   return internal::EndsWithT(str, search_for, case_sensitivity);
285 }
286 
HexDigitToInt(char c)287 char HexDigitToInt(char c) {
288   DCHECK(IsHexDigit(c));
289   if (c >= '0' && c <= '9')
290     return static_cast<char>(c - '0');
291   return (c >= 'A' && c <= 'F') ? static_cast<char>(c - 'A' + 10)
292                                 : static_cast<char>(c - 'a' + 10);
293 }
294 
295 static const char* const kByteStringsUnlocalized[] = {
296   " B",
297   " kB",
298   " MB",
299   " GB",
300   " TB",
301   " PB"
302 };
303 
FormatBytesUnlocalized(int64_t bytes)304 std::u16string FormatBytesUnlocalized(int64_t bytes) {
305   double unit_amount = static_cast<double>(bytes);
306   size_t dimension = 0;
307   const int kKilo = 1024;
308   while (unit_amount >= kKilo &&
309          dimension < std::size(kByteStringsUnlocalized) - 1) {
310     unit_amount /= kKilo;
311     dimension++;
312   }
313 
314   char buf[64];
315   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
316     base::snprintf(buf, std::size(buf), "%.1lf%s", unit_amount,
317                    kByteStringsUnlocalized[dimension]);
318   } else {
319     base::snprintf(buf, std::size(buf), "%.0lf%s", unit_amount,
320                    kByteStringsUnlocalized[dimension]);
321   }
322 
323   return ASCIIToUTF16(buf);
324 }
325 
ReplaceFirstSubstringAfterOffset(std::u16string * str,size_t start_offset,std::u16string_view find_this,std::u16string_view replace_with)326 void ReplaceFirstSubstringAfterOffset(std::u16string* str,
327                                       size_t start_offset,
328                                       std::u16string_view find_this,
329                                       std::u16string_view replace_with) {
330   internal::DoReplaceMatchesAfterOffset(
331       str, start_offset, internal::MakeSubstringMatcher(find_this),
332       replace_with, internal::ReplaceType::REPLACE_FIRST);
333 }
334 
ReplaceFirstSubstringAfterOffset(std::string * str,size_t start_offset,std::string_view find_this,std::string_view replace_with)335 void ReplaceFirstSubstringAfterOffset(std::string* str,
336                                       size_t start_offset,
337                                       std::string_view find_this,
338                                       std::string_view replace_with) {
339   internal::DoReplaceMatchesAfterOffset(
340       str, start_offset, internal::MakeSubstringMatcher(find_this),
341       replace_with, internal::ReplaceType::REPLACE_FIRST);
342 }
343 
ReplaceSubstringsAfterOffset(std::u16string * str,size_t start_offset,std::u16string_view find_this,std::u16string_view replace_with)344 void ReplaceSubstringsAfterOffset(std::u16string* str,
345                                   size_t start_offset,
346                                   std::u16string_view find_this,
347                                   std::u16string_view replace_with) {
348   internal::DoReplaceMatchesAfterOffset(
349       str, start_offset, internal::MakeSubstringMatcher(find_this),
350       replace_with, internal::ReplaceType::REPLACE_ALL);
351 }
352 
ReplaceSubstringsAfterOffset(std::string * str,size_t start_offset,std::string_view find_this,std::string_view replace_with)353 void ReplaceSubstringsAfterOffset(std::string* str,
354                                   size_t start_offset,
355                                   std::string_view find_this,
356                                   std::string_view replace_with) {
357   internal::DoReplaceMatchesAfterOffset(
358       str, start_offset, internal::MakeSubstringMatcher(find_this),
359       replace_with, internal::ReplaceType::REPLACE_ALL);
360 }
361 
WriteInto(std::string * str,size_t length_with_null)362 char* WriteInto(std::string* str, size_t length_with_null) {
363   return internal::WriteIntoT(str, length_with_null);
364 }
365 
WriteInto(std::u16string * str,size_t length_with_null)366 char16_t* WriteInto(std::u16string* str, size_t length_with_null) {
367   return internal::WriteIntoT(str, length_with_null);
368 }
369 
JoinString(span<const std::string> parts,std::string_view separator)370 std::string JoinString(span<const std::string> parts,
371                        std::string_view separator) {
372   return internal::JoinStringT(parts, separator);
373 }
374 
JoinString(span<const std::u16string> parts,std::u16string_view separator)375 std::u16string JoinString(span<const std::u16string> parts,
376                           std::u16string_view separator) {
377   return internal::JoinStringT(parts, separator);
378 }
379 
JoinString(span<const std::string_view> parts,std::string_view separator)380 std::string JoinString(span<const std::string_view> parts,
381                        std::string_view separator) {
382   return internal::JoinStringT(parts, separator);
383 }
384 
JoinString(span<const std::u16string_view> parts,std::u16string_view separator)385 std::u16string JoinString(span<const std::u16string_view> parts,
386                           std::u16string_view separator) {
387   return internal::JoinStringT(parts, separator);
388 }
389 
JoinString(std::initializer_list<std::string_view> parts,std::string_view separator)390 std::string JoinString(std::initializer_list<std::string_view> parts,
391                        std::string_view separator) {
392   return internal::JoinStringT(parts, separator);
393 }
394 
JoinString(std::initializer_list<std::u16string_view> parts,std::u16string_view separator)395 std::u16string JoinString(std::initializer_list<std::u16string_view> parts,
396                           std::u16string_view separator) {
397   return internal::JoinStringT(parts, separator);
398 }
399 
ReplaceStringPlaceholders(std::u16string_view format_string,const std::vector<std::u16string> & subst,std::vector<size_t> * offsets)400 std::u16string ReplaceStringPlaceholders(
401     std::u16string_view format_string,
402     const std::vector<std::u16string>& subst,
403     std::vector<size_t>* offsets) {
404   std::optional<std::u16string> replacement =
405       internal::DoReplaceStringPlaceholders(
406           format_string, subst,
407           /*placeholder_prefix*/ u'$',
408           /*should_escape_multiple_placeholder_prefixes*/ true,
409           /*is_strict_mode*/ false, offsets);
410 
411   return std::move(replacement).value();
412 }
413 
ReplaceStringPlaceholders(std::string_view format_string,const std::vector<std::string> & subst,std::vector<size_t> * offsets)414 std::string ReplaceStringPlaceholders(std::string_view format_string,
415                                       const std::vector<std::string>& subst,
416                                       std::vector<size_t>* offsets) {
417   std::optional<std::string> replacement =
418       internal::DoReplaceStringPlaceholders(
419           format_string, subst,
420           /*placeholder_prefix*/ '$',
421           /*should_escape_multiple_placeholder_prefixes*/ true,
422           /*is_strict_mode*/ false, offsets);
423 
424   return std::move(replacement).value();
425 }
426 
ReplaceStringPlaceholders(const std::u16string & format_string,const std::u16string & a,size_t * offset)427 std::u16string ReplaceStringPlaceholders(const std::u16string& format_string,
428                                          const std::u16string& a,
429                                          size_t* offset) {
430   std::vector<size_t> offsets;
431   std::u16string result =
432       ReplaceStringPlaceholders(format_string, {a}, &offsets);
433 
434   DCHECK_EQ(1U, offsets.size());
435   if (offset)
436     *offset = offsets[0];
437   return result;
438 }
439 
strlcpy(span<char> dst,std::string_view src)440 size_t strlcpy(span<char> dst, std::string_view src) {
441   return internal::lcpyT(dst, src);
442 }
443 
u16cstrlcpy(span<char16_t> dst,std::u16string_view src)444 size_t u16cstrlcpy(span<char16_t> dst, std::u16string_view src) {
445   return internal::lcpyT(dst, src);
446 }
447 
wcslcpy(span<wchar_t> dst,std::wstring_view src)448 size_t wcslcpy(span<wchar_t> dst, std::wstring_view src) {
449   return internal::lcpyT(dst, src);
450 }
451 
strlcpy(char * dst,const char * src,size_t dst_size)452 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
453   return internal::lcpyT(
454       UNSAFE_TODO(base::span(dst, dst_size), std::string_view(src)));
455 }
456 
u16cstrlcpy(char16_t * dst,const char16_t * src,size_t dst_size)457 size_t u16cstrlcpy(char16_t* dst, const char16_t* src, size_t dst_size) {
458   return internal::lcpyT(UNSAFE_TODO(base::span(dst, dst_size)),
459                          std::u16string_view(src));
460 }
461 
wcslcpy(wchar_t * dst,const wchar_t * src,size_t dst_size)462 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
463   return internal::lcpyT(UNSAFE_TODO(base::span(dst, dst_size)),
464                          std::wstring_view(src));
465 }
466 
467 }  // namespace base
468