• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_util.h"
6 
7 #include <ctype.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <stdarg.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <time.h>
16 #include <wchar.h>
17 #include <wctype.h>
18 
19 #include <limits>
20 #include <type_traits>
21 #include <vector>
22 
23 #include "base/check_op.h"
24 #include "base/no_destructor.h"
25 #include "base/ranges/algorithm.h"
26 #include "base/strings/string_util_impl_helpers.h"
27 #include "base/strings/string_util_internal.h"
28 #include "base/strings/utf_string_conversion_utils.h"
29 #include "base/strings/utf_string_conversions.h"
30 #include "base/third_party/icu/icu_utf.h"
31 #include "build/build_config.h"
32 #include "third_party/abseil-cpp/absl/types/optional.h"
33 
34 namespace base {
35 
IsWprintfFormatPortable(const wchar_t * format)36 bool IsWprintfFormatPortable(const wchar_t* format) {
37   for (const wchar_t* position = format; *position != '\0'; ++position) {
38     if (*position == '%') {
39       bool in_specification = true;
40       bool modifier_l = false;
41       while (in_specification) {
42         // Eat up characters until reaching a known specifier.
43         if (*++position == '\0') {
44           // The format string ended in the middle of a specification.  Call
45           // it portable because no unportable specifications were found.  The
46           // string is equally broken on all platforms.
47           return true;
48         }
49 
50         if (*position == 'l') {
51           // 'l' is the only thing that can save the 's' and 'c' specifiers.
52           modifier_l = true;
53         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
54                    *position == 'S' || *position == 'C' || *position == 'F' ||
55                    *position == 'D' || *position == 'O' || *position == 'U') {
56           // Not portable.
57           return false;
58         }
59 
60         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
61           // Portable, keep scanning the rest of the format string.
62           in_specification = false;
63         }
64       }
65     }
66   }
67 
68   return true;
69 }
70 
ToLowerASCII(StringPiece str)71 std::string ToLowerASCII(StringPiece str) {
72   return internal::ToLowerASCIIImpl(str);
73 }
74 
ToLowerASCII(StringPiece16 str)75 std::u16string ToLowerASCII(StringPiece16 str) {
76   return internal::ToLowerASCIIImpl(str);
77 }
78 
ToUpperASCII(StringPiece str)79 std::string ToUpperASCII(StringPiece str) {
80   return internal::ToUpperASCIIImpl(str);
81 }
82 
ToUpperASCII(StringPiece16 str)83 std::u16string ToUpperASCII(StringPiece16 str) {
84   return internal::ToUpperASCIIImpl(str);
85 }
86 
EmptyString()87 const std::string& EmptyString() {
88   static const base::NoDestructor<std::string> s;
89   return *s;
90 }
91 
EmptyString16()92 const std::u16string& EmptyString16() {
93   static const base::NoDestructor<std::u16string> s16;
94   return *s16;
95 }
96 
ReplaceChars(StringPiece16 input,StringPiece16 replace_chars,StringPiece16 replace_with,std::u16string * output)97 bool ReplaceChars(StringPiece16 input,
98                   StringPiece16 replace_chars,
99                   StringPiece16 replace_with,
100                   std::u16string* output) {
101   return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
102 }
103 
ReplaceChars(StringPiece input,StringPiece replace_chars,StringPiece replace_with,std::string * output)104 bool ReplaceChars(StringPiece input,
105                   StringPiece replace_chars,
106                   StringPiece replace_with,
107                   std::string* output) {
108   return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
109 }
110 
RemoveChars(StringPiece16 input,StringPiece16 remove_chars,std::u16string * output)111 bool RemoveChars(StringPiece16 input,
112                  StringPiece16 remove_chars,
113                  std::u16string* output) {
114   return internal::ReplaceCharsT(input, remove_chars, StringPiece16(), output);
115 }
116 
RemoveChars(StringPiece input,StringPiece remove_chars,std::string * output)117 bool RemoveChars(StringPiece input,
118                  StringPiece remove_chars,
119                  std::string* output) {
120   return internal::ReplaceCharsT(input, remove_chars, StringPiece(), output);
121 }
122 
TrimString(StringPiece16 input,StringPiece16 trim_chars,std::u16string * output)123 bool TrimString(StringPiece16 input,
124                 StringPiece16 trim_chars,
125                 std::u16string* output) {
126   return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
127          TRIM_NONE;
128 }
129 
TrimString(StringPiece input,StringPiece trim_chars,std::string * output)130 bool TrimString(StringPiece input,
131                 StringPiece trim_chars,
132                 std::string* output) {
133   return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
134          TRIM_NONE;
135 }
136 
TrimString(StringPiece16 input,StringPiece16 trim_chars,TrimPositions positions)137 StringPiece16 TrimString(StringPiece16 input,
138                          StringPiece16 trim_chars,
139                          TrimPositions positions) {
140   return internal::TrimStringPieceT(input, trim_chars, positions);
141 }
142 
TrimString(StringPiece input,StringPiece trim_chars,TrimPositions positions)143 StringPiece TrimString(StringPiece input,
144                        StringPiece trim_chars,
145                        TrimPositions positions) {
146   return internal::TrimStringPieceT(input, trim_chars, positions);
147 }
148 
TruncateUTF8ToByteSize(const std::string & input,const size_t byte_size,std::string * output)149 void TruncateUTF8ToByteSize(const std::string& input,
150                             const size_t byte_size,
151                             std::string* output) {
152   DCHECK(output);
153   if (byte_size > input.length()) {
154     *output = input;
155     return;
156   }
157   DCHECK_LE(byte_size,
158             static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
159   // Note: This cast is necessary because CBU8_NEXT uses int32_ts.
160   int32_t truncation_length = static_cast<int32_t>(byte_size);
161   int32_t char_index = truncation_length - 1;
162   const char* data = input.data();
163 
164   // Using CBU8, we will move backwards from the truncation point
165   // to the beginning of the string looking for a valid UTF8
166   // character.  Once a full UTF8 character is found, we will
167   // truncate the string to the end of that character.
168   while (char_index >= 0) {
169     int32_t prev = char_index;
170     base_icu::UChar32 code_point = 0;
171     CBU8_NEXT(reinterpret_cast<const uint8_t*>(data), char_index,
172               truncation_length, code_point);
173     if (!IsValidCharacter(code_point)) {
174       char_index = prev - 1;
175     } else {
176       break;
177     }
178   }
179 
180   if (char_index >= 0 )
181     *output = input.substr(0, static_cast<size_t>(char_index));
182   else
183     output->clear();
184 }
185 
TrimWhitespace(StringPiece16 input,TrimPositions positions,std::u16string * output)186 TrimPositions TrimWhitespace(StringPiece16 input,
187                              TrimPositions positions,
188                              std::u16string* output) {
189   return internal::TrimStringT(input, StringPiece16(kWhitespaceUTF16),
190                                positions, output);
191 }
192 
TrimWhitespace(StringPiece16 input,TrimPositions positions)193 StringPiece16 TrimWhitespace(StringPiece16 input,
194                              TrimPositions positions) {
195   return internal::TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16),
196                                     positions);
197 }
198 
TrimWhitespaceASCII(StringPiece input,TrimPositions positions,std::string * output)199 TrimPositions TrimWhitespaceASCII(StringPiece input,
200                                   TrimPositions positions,
201                                   std::string* output) {
202   return internal::TrimStringT(input, StringPiece(kWhitespaceASCII), positions,
203                                output);
204 }
205 
TrimWhitespaceASCII(StringPiece input,TrimPositions positions)206 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
207   return internal::TrimStringPieceT(input, StringPiece(kWhitespaceASCII),
208                                     positions);
209 }
210 
CollapseWhitespace(StringPiece16 text,bool trim_sequences_with_line_breaks)211 std::u16string CollapseWhitespace(StringPiece16 text,
212                                   bool trim_sequences_with_line_breaks) {
213   return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
214 }
215 
CollapseWhitespaceASCII(StringPiece text,bool trim_sequences_with_line_breaks)216 std::string CollapseWhitespaceASCII(StringPiece text,
217                                     bool trim_sequences_with_line_breaks) {
218   return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
219 }
220 
ContainsOnlyChars(StringPiece input,StringPiece characters)221 bool ContainsOnlyChars(StringPiece input, StringPiece characters) {
222   return input.find_first_not_of(characters) == StringPiece::npos;
223 }
224 
ContainsOnlyChars(StringPiece16 input,StringPiece16 characters)225 bool ContainsOnlyChars(StringPiece16 input, StringPiece16 characters) {
226   return input.find_first_not_of(characters) == StringPiece16::npos;
227 }
228 
229 
IsStringASCII(StringPiece str)230 bool IsStringASCII(StringPiece str) {
231   return internal::DoIsStringASCII(str.data(), str.length());
232 }
233 
IsStringASCII(StringPiece16 str)234 bool IsStringASCII(StringPiece16 str) {
235   return internal::DoIsStringASCII(str.data(), str.length());
236 }
237 
238 #if defined(WCHAR_T_IS_UTF32)
IsStringASCII(WStringPiece str)239 bool IsStringASCII(WStringPiece str) {
240   return internal::DoIsStringASCII(str.data(), str.length());
241 }
242 #endif
243 
IsStringUTF8(StringPiece str)244 bool IsStringUTF8(StringPiece str) {
245   return internal::DoIsStringUTF8<IsValidCharacter>(str);
246 }
247 
IsStringUTF8AllowingNoncharacters(StringPiece str)248 bool IsStringUTF8AllowingNoncharacters(StringPiece str) {
249   return internal::DoIsStringUTF8<IsValidCodepoint>(str);
250 }
251 
EqualsASCII(StringPiece16 str,StringPiece ascii)252 bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
253   return ranges::equal(ascii, str);
254 }
255 
StartsWith(StringPiece str,StringPiece search_for,CompareCase case_sensitivity)256 bool StartsWith(StringPiece str,
257                 StringPiece search_for,
258                 CompareCase case_sensitivity) {
259   return internal::StartsWithT(str, search_for, case_sensitivity);
260 }
261 
StartsWith(StringPiece16 str,StringPiece16 search_for,CompareCase case_sensitivity)262 bool StartsWith(StringPiece16 str,
263                 StringPiece16 search_for,
264                 CompareCase case_sensitivity) {
265   return internal::StartsWithT(str, search_for, case_sensitivity);
266 }
267 
EndsWith(StringPiece str,StringPiece search_for,CompareCase case_sensitivity)268 bool EndsWith(StringPiece str,
269               StringPiece search_for,
270               CompareCase case_sensitivity) {
271   return internal::EndsWithT(str, search_for, case_sensitivity);
272 }
273 
EndsWith(StringPiece16 str,StringPiece16 search_for,CompareCase case_sensitivity)274 bool EndsWith(StringPiece16 str,
275               StringPiece16 search_for,
276               CompareCase case_sensitivity) {
277   return internal::EndsWithT(str, search_for, case_sensitivity);
278 }
279 
HexDigitToInt(char c)280 char HexDigitToInt(char c) {
281   DCHECK(IsHexDigit(c));
282   if (c >= '0' && c <= '9')
283     return static_cast<char>(c - '0');
284   return (c >= 'A' && c <= 'F') ? static_cast<char>(c - 'A' + 10)
285                                 : static_cast<char>(c - 'a' + 10);
286 }
287 
288 static const char* const kByteStringsUnlocalized[] = {
289   " B",
290   " kB",
291   " MB",
292   " GB",
293   " TB",
294   " PB"
295 };
296 
FormatBytesUnlocalized(int64_t bytes)297 std::u16string FormatBytesUnlocalized(int64_t bytes) {
298   double unit_amount = static_cast<double>(bytes);
299   size_t dimension = 0;
300   const int kKilo = 1024;
301   while (unit_amount >= kKilo &&
302          dimension < std::size(kByteStringsUnlocalized) - 1) {
303     unit_amount /= kKilo;
304     dimension++;
305   }
306 
307   char buf[64];
308   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
309     base::snprintf(buf, std::size(buf), "%.1lf%s", unit_amount,
310                    kByteStringsUnlocalized[dimension]);
311   } else {
312     base::snprintf(buf, std::size(buf), "%.0lf%s", unit_amount,
313                    kByteStringsUnlocalized[dimension]);
314   }
315 
316   return ASCIIToUTF16(buf);
317 }
318 
ReplaceFirstSubstringAfterOffset(std::u16string * str,size_t start_offset,StringPiece16 find_this,StringPiece16 replace_with)319 void ReplaceFirstSubstringAfterOffset(std::u16string* str,
320                                       size_t start_offset,
321                                       StringPiece16 find_this,
322                                       StringPiece16 replace_with) {
323   internal::DoReplaceMatchesAfterOffset(
324       str, start_offset, internal::MakeSubstringMatcher(find_this),
325       replace_with, internal::ReplaceType::REPLACE_FIRST);
326 }
327 
ReplaceFirstSubstringAfterOffset(std::string * str,size_t start_offset,StringPiece find_this,StringPiece replace_with)328 void ReplaceFirstSubstringAfterOffset(std::string* str,
329                                       size_t start_offset,
330                                       StringPiece find_this,
331                                       StringPiece replace_with) {
332   internal::DoReplaceMatchesAfterOffset(
333       str, start_offset, internal::MakeSubstringMatcher(find_this),
334       replace_with, internal::ReplaceType::REPLACE_FIRST);
335 }
336 
ReplaceSubstringsAfterOffset(std::u16string * str,size_t start_offset,StringPiece16 find_this,StringPiece16 replace_with)337 void ReplaceSubstringsAfterOffset(std::u16string* str,
338                                   size_t start_offset,
339                                   StringPiece16 find_this,
340                                   StringPiece16 replace_with) {
341   internal::DoReplaceMatchesAfterOffset(
342       str, start_offset, internal::MakeSubstringMatcher(find_this),
343       replace_with, internal::ReplaceType::REPLACE_ALL);
344 }
345 
ReplaceSubstringsAfterOffset(std::string * str,size_t start_offset,StringPiece find_this,StringPiece replace_with)346 void ReplaceSubstringsAfterOffset(std::string* str,
347                                   size_t start_offset,
348                                   StringPiece find_this,
349                                   StringPiece replace_with) {
350   internal::DoReplaceMatchesAfterOffset(
351       str, start_offset, internal::MakeSubstringMatcher(find_this),
352       replace_with, internal::ReplaceType::REPLACE_ALL);
353 }
354 
WriteInto(std::string * str,size_t length_with_null)355 char* WriteInto(std::string* str, size_t length_with_null) {
356   return internal::WriteIntoT(str, length_with_null);
357 }
358 
WriteInto(std::u16string * str,size_t length_with_null)359 char16_t* WriteInto(std::u16string* str, size_t length_with_null) {
360   return internal::WriteIntoT(str, length_with_null);
361 }
362 
JoinString(span<const std::string> parts,StringPiece separator)363 std::string JoinString(span<const std::string> parts, StringPiece separator) {
364   return internal::JoinStringT(parts, separator);
365 }
366 
JoinString(span<const std::u16string> parts,StringPiece16 separator)367 std::u16string JoinString(span<const std::u16string> parts,
368                           StringPiece16 separator) {
369   return internal::JoinStringT(parts, separator);
370 }
371 
JoinString(span<const StringPiece> parts,StringPiece separator)372 std::string JoinString(span<const StringPiece> parts, StringPiece separator) {
373   return internal::JoinStringT(parts, separator);
374 }
375 
JoinString(span<const StringPiece16> parts,StringPiece16 separator)376 std::u16string JoinString(span<const StringPiece16> parts,
377                           StringPiece16 separator) {
378   return internal::JoinStringT(parts, separator);
379 }
380 
JoinString(std::initializer_list<StringPiece> parts,StringPiece separator)381 std::string JoinString(std::initializer_list<StringPiece> parts,
382                        StringPiece separator) {
383   return internal::JoinStringT(parts, separator);
384 }
385 
JoinString(std::initializer_list<StringPiece16> parts,StringPiece16 separator)386 std::u16string JoinString(std::initializer_list<StringPiece16> parts,
387                           StringPiece16 separator) {
388   return internal::JoinStringT(parts, separator);
389 }
390 
ReplaceStringPlaceholders(StringPiece16 format_string,const std::vector<std::u16string> & subst,std::vector<size_t> * offsets)391 std::u16string ReplaceStringPlaceholders(
392     StringPiece16 format_string,
393     const std::vector<std::u16string>& subst,
394     std::vector<size_t>* offsets) {
395   absl::optional<std::u16string> replacement =
396       internal::DoReplaceStringPlaceholders(
397           format_string, subst,
398           /*placeholder_prefix*/ u'$',
399           /*should_escape_multiple_placeholder_prefixes*/ true,
400           /*is_strict_mode*/ false, offsets);
401 
402   DCHECK(replacement);
403   return replacement.value();
404 }
405 
ReplaceStringPlaceholders(StringPiece format_string,const std::vector<std::string> & subst,std::vector<size_t> * offsets)406 std::string ReplaceStringPlaceholders(StringPiece format_string,
407                                       const std::vector<std::string>& subst,
408                                       std::vector<size_t>* offsets) {
409   absl::optional<std::string> replacement =
410       internal::DoReplaceStringPlaceholders(
411           format_string, subst,
412           /*placeholder_prefix*/ '$',
413           /*should_escape_multiple_placeholder_prefixes*/ true,
414           /*is_strict_mode*/ false, offsets);
415 
416   DCHECK(replacement);
417   return replacement.value();
418 }
419 
ReplaceStringPlaceholders(const std::u16string & format_string,const std::u16string & a,size_t * offset)420 std::u16string ReplaceStringPlaceholders(const std::u16string& format_string,
421                                          const std::u16string& a,
422                                          size_t* offset) {
423   std::vector<size_t> offsets;
424   std::u16string result =
425       ReplaceStringPlaceholders(format_string, {a}, &offsets);
426 
427   DCHECK_EQ(1U, offsets.size());
428   if (offset)
429     *offset = offsets[0];
430   return result;
431 }
432 
strlcpy(char * dst,const char * src,size_t dst_size)433 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
434   return internal::lcpyT(dst, src, dst_size);
435 }
436 
u16cstrlcpy(char16_t * dst,const char16_t * src,size_t dst_size)437 size_t u16cstrlcpy(char16_t* dst, const char16_t* src, size_t dst_size) {
438   return internal::lcpyT(dst, src, dst_size);
439 }
440 
wcslcpy(wchar_t * dst,const wchar_t * src,size_t dst_size)441 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
442   return internal::lcpyT(dst, src, dst_size);
443 }
444 
445 }  // namespace base
446