• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_util.h"
6 
7 #include <ctype.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <stdarg.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <time.h>
15 #include <wchar.h>
16 #include <wctype.h>
17 
18 #include <algorithm>
19 #include <vector>
20 
21 #include "base/basictypes.h"
22 #include "base/logging.h"
23 #include "base/memory/singleton.h"
24 #include "base/strings/utf_string_conversion_utils.h"
25 #include "base/strings/utf_string_conversions.h"
26 #include "base/third_party/icu/icu_utf.h"
27 #include "build/build_config.h"
28 
29 // Remove when this entire file is in the base namespace.
30 using base::char16;
31 using base::string16;
32 
33 namespace {
34 
35 // Force the singleton used by Empty[W]String[16] to be a unique type. This
36 // prevents other code that might accidentally use Singleton<string> from
37 // getting our internal one.
38 struct EmptyStrings {
EmptyStrings__anon9e5df4c00111::EmptyStrings39   EmptyStrings() {}
40   const std::string s;
41   const std::wstring ws;
42   const string16 s16;
43 
GetInstance__anon9e5df4c00111::EmptyStrings44   static EmptyStrings* GetInstance() {
45     return Singleton<EmptyStrings>::get();
46   }
47 };
48 
49 // Used by ReplaceStringPlaceholders to track the position in the string of
50 // replaced parameters.
51 struct ReplacementOffset {
ReplacementOffset__anon9e5df4c00111::ReplacementOffset52   ReplacementOffset(uintptr_t parameter, size_t offset)
53       : parameter(parameter),
54         offset(offset) {}
55 
56   // Index of the parameter.
57   uintptr_t parameter;
58 
59   // Starting position in the string.
60   size_t offset;
61 };
62 
CompareParameter(const ReplacementOffset & elem1,const ReplacementOffset & elem2)63 static bool CompareParameter(const ReplacementOffset& elem1,
64                              const ReplacementOffset& elem2) {
65   return elem1.parameter < elem2.parameter;
66 }
67 
68 }  // namespace
69 
70 namespace base {
71 
IsWprintfFormatPortable(const wchar_t * format)72 bool IsWprintfFormatPortable(const wchar_t* format) {
73   for (const wchar_t* position = format; *position != '\0'; ++position) {
74     if (*position == '%') {
75       bool in_specification = true;
76       bool modifier_l = false;
77       while (in_specification) {
78         // Eat up characters until reaching a known specifier.
79         if (*++position == '\0') {
80           // The format string ended in the middle of a specification.  Call
81           // it portable because no unportable specifications were found.  The
82           // string is equally broken on all platforms.
83           return true;
84         }
85 
86         if (*position == 'l') {
87           // 'l' is the only thing that can save the 's' and 'c' specifiers.
88           modifier_l = true;
89         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
90                    *position == 'S' || *position == 'C' || *position == 'F' ||
91                    *position == 'D' || *position == 'O' || *position == 'U') {
92           // Not portable.
93           return false;
94         }
95 
96         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
97           // Portable, keep scanning the rest of the format string.
98           in_specification = false;
99         }
100       }
101     }
102   }
103 
104   return true;
105 }
106 
EmptyString()107 const std::string& EmptyString() {
108   return EmptyStrings::GetInstance()->s;
109 }
110 
EmptyWString()111 const std::wstring& EmptyWString() {
112   return EmptyStrings::GetInstance()->ws;
113 }
114 
EmptyString16()115 const string16& EmptyString16() {
116   return EmptyStrings::GetInstance()->s16;
117 }
118 
119 template<typename STR>
ReplaceCharsT(const STR & input,const typename STR::value_type replace_chars[],const STR & replace_with,STR * output)120 bool ReplaceCharsT(const STR& input,
121                    const typename STR::value_type replace_chars[],
122                    const STR& replace_with,
123                    STR* output) {
124   bool removed = false;
125   size_t replace_length = replace_with.length();
126 
127   *output = input;
128 
129   size_t found = output->find_first_of(replace_chars);
130   while (found != STR::npos) {
131     removed = true;
132     output->replace(found, 1, replace_with);
133     found = output->find_first_of(replace_chars, found + replace_length);
134   }
135 
136   return removed;
137 }
138 
ReplaceChars(const string16 & input,const char16 replace_chars[],const string16 & replace_with,string16 * output)139 bool ReplaceChars(const string16& input,
140                   const char16 replace_chars[],
141                   const string16& replace_with,
142                   string16* output) {
143   return ReplaceCharsT(input, replace_chars, replace_with, output);
144 }
145 
ReplaceChars(const std::string & input,const char replace_chars[],const std::string & replace_with,std::string * output)146 bool ReplaceChars(const std::string& input,
147                   const char replace_chars[],
148                   const std::string& replace_with,
149                   std::string* output) {
150   return ReplaceCharsT(input, replace_chars, replace_with, output);
151 }
152 
RemoveChars(const string16 & input,const char16 remove_chars[],string16 * output)153 bool RemoveChars(const string16& input,
154                  const char16 remove_chars[],
155                  string16* output) {
156   return ReplaceChars(input, remove_chars, string16(), output);
157 }
158 
RemoveChars(const std::string & input,const char remove_chars[],std::string * output)159 bool RemoveChars(const std::string& input,
160                  const char remove_chars[],
161                  std::string* output) {
162   return ReplaceChars(input, remove_chars, std::string(), output);
163 }
164 
165 template<typename STR>
TrimStringT(const STR & input,const typename STR::value_type trim_chars[],TrimPositions positions,STR * output)166 TrimPositions TrimStringT(const STR& input,
167                           const typename STR::value_type trim_chars[],
168                           TrimPositions positions,
169                           STR* output) {
170   // Find the edges of leading/trailing whitespace as desired.
171   const typename STR::size_type last_char = input.length() - 1;
172   const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
173       input.find_first_not_of(trim_chars) : 0;
174   const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
175       input.find_last_not_of(trim_chars) : last_char;
176 
177   // When the string was all whitespace, report that we stripped off whitespace
178   // from whichever position the caller was interested in.  For empty input, we
179   // stripped no whitespace, but we still need to clear |output|.
180   if (input.empty() ||
181       (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
182     bool input_was_empty = input.empty();  // in case output == &input
183     output->clear();
184     return input_was_empty ? TRIM_NONE : positions;
185   }
186 
187   // Trim the whitespace.
188   *output =
189       input.substr(first_good_char, last_good_char - first_good_char + 1);
190 
191   // Return where we trimmed from.
192   return static_cast<TrimPositions>(
193       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
194       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
195 }
196 
TrimString(const string16 & input,const char16 trim_chars[],string16 * output)197 bool TrimString(const string16& input,
198                 const char16 trim_chars[],
199                 string16* output) {
200   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
201 }
202 
TrimString(const std::string & input,const char trim_chars[],std::string * output)203 bool TrimString(const std::string& input,
204                 const char trim_chars[],
205                 std::string* output) {
206   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
207 }
208 
TruncateUTF8ToByteSize(const std::string & input,const size_t byte_size,std::string * output)209 void TruncateUTF8ToByteSize(const std::string& input,
210                             const size_t byte_size,
211                             std::string* output) {
212   DCHECK(output);
213   if (byte_size > input.length()) {
214     *output = input;
215     return;
216   }
217   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
218   // Note: This cast is necessary because CBU8_NEXT uses int32s.
219   int32 truncation_length = static_cast<int32>(byte_size);
220   int32 char_index = truncation_length - 1;
221   const char* data = input.data();
222 
223   // Using CBU8, we will move backwards from the truncation point
224   // to the beginning of the string looking for a valid UTF8
225   // character.  Once a full UTF8 character is found, we will
226   // truncate the string to the end of that character.
227   while (char_index >= 0) {
228     int32 prev = char_index;
229     uint32 code_point = 0;
230     CBU8_NEXT(data, char_index, truncation_length, code_point);
231     if (!IsValidCharacter(code_point) ||
232         !IsValidCodepoint(code_point)) {
233       char_index = prev - 1;
234     } else {
235       break;
236     }
237   }
238 
239   if (char_index >= 0 )
240     *output = input.substr(0, char_index);
241   else
242     output->clear();
243 }
244 
245 }  // namespace base
246 
TrimWhitespace(const base::string16 & input,TrimPositions positions,base::string16 * output)247 TrimPositions TrimWhitespace(const base::string16& input,
248                              TrimPositions positions,
249                              base::string16* output) {
250   return base::TrimStringT(input, base::kWhitespaceUTF16, positions, output);
251 }
252 
TrimWhitespaceASCII(const std::string & input,TrimPositions positions,std::string * output)253 TrimPositions TrimWhitespaceASCII(const std::string& input,
254                                   TrimPositions positions,
255                                   std::string* output) {
256   return base::TrimStringT(input, base::kWhitespaceASCII, positions, output);
257 }
258 
259 // This function is only for backward-compatibility.
260 // To be removed when all callers are updated.
TrimWhitespace(const std::string & input,TrimPositions positions,std::string * output)261 TrimPositions TrimWhitespace(const std::string& input,
262                              TrimPositions positions,
263                              std::string* output) {
264   return TrimWhitespaceASCII(input, positions, output);
265 }
266 
267 template<typename STR>
CollapseWhitespaceT(const STR & text,bool trim_sequences_with_line_breaks)268 STR CollapseWhitespaceT(const STR& text,
269                         bool trim_sequences_with_line_breaks) {
270   STR result;
271   result.resize(text.size());
272 
273   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
274   // will trim any leading whitespace.
275   bool in_whitespace = true;
276   bool already_trimmed = true;
277 
278   int chars_written = 0;
279   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
280     if (IsWhitespace(*i)) {
281       if (!in_whitespace) {
282         // Reduce all whitespace sequences to a single space.
283         in_whitespace = true;
284         result[chars_written++] = L' ';
285       }
286       if (trim_sequences_with_line_breaks && !already_trimmed &&
287           ((*i == '\n') || (*i == '\r'))) {
288         // Whitespace sequences containing CR or LF are eliminated entirely.
289         already_trimmed = true;
290         --chars_written;
291       }
292     } else {
293       // Non-whitespace chracters are copied straight across.
294       in_whitespace = false;
295       already_trimmed = false;
296       result[chars_written++] = *i;
297     }
298   }
299 
300   if (in_whitespace && !already_trimmed) {
301     // Any trailing whitespace is eliminated.
302     --chars_written;
303   }
304 
305   result.resize(chars_written);
306   return result;
307 }
308 
CollapseWhitespace(const string16 & text,bool trim_sequences_with_line_breaks)309 string16 CollapseWhitespace(const string16& text,
310                             bool trim_sequences_with_line_breaks) {
311   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
312 }
313 
CollapseWhitespaceASCII(const std::string & text,bool trim_sequences_with_line_breaks)314 std::string CollapseWhitespaceASCII(const std::string& text,
315                                     bool trim_sequences_with_line_breaks) {
316   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
317 }
318 
ContainsOnlyWhitespaceASCII(const std::string & str)319 bool ContainsOnlyWhitespaceASCII(const std::string& str) {
320   for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
321     if (!IsAsciiWhitespace(*i))
322       return false;
323   }
324   return true;
325 }
326 
ContainsOnlyWhitespace(const base::string16 & str)327 bool ContainsOnlyWhitespace(const base::string16& str) {
328   return str.find_first_not_of(base::kWhitespaceUTF16) == string16::npos;
329 }
330 
331 template<typename STR>
ContainsOnlyCharsT(const STR & input,const STR & characters)332 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
333   for (typename STR::const_iterator iter = input.begin();
334        iter != input.end(); ++iter) {
335     if (characters.find(*iter) == STR::npos)
336       return false;
337   }
338   return true;
339 }
340 
ContainsOnlyChars(const string16 & input,const string16 & characters)341 bool ContainsOnlyChars(const string16& input, const string16& characters) {
342   return ContainsOnlyCharsT(input, characters);
343 }
344 
ContainsOnlyChars(const std::string & input,const std::string & characters)345 bool ContainsOnlyChars(const std::string& input,
346                        const std::string& characters) {
347   return ContainsOnlyCharsT(input, characters);
348 }
349 
350 #if !defined(WCHAR_T_IS_UTF16)
351 bool IsStringASCII(const std::wstring& str);
352 #endif
353 
WideToASCII(const std::wstring & wide)354 std::string WideToASCII(const std::wstring& wide) {
355   DCHECK(IsStringASCII(wide)) << wide;
356   return std::string(wide.begin(), wide.end());
357 }
358 
UTF16ToASCII(const string16 & utf16)359 std::string UTF16ToASCII(const string16& utf16) {
360   DCHECK(IsStringASCII(utf16)) << utf16;
361   return std::string(utf16.begin(), utf16.end());
362 }
363 
364 template<class STR>
DoIsStringASCII(const STR & str)365 static bool DoIsStringASCII(const STR& str) {
366   for (size_t i = 0; i < str.length(); i++) {
367     typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
368     if (c > 0x7F)
369       return false;
370   }
371   return true;
372 }
373 
374 #if !defined(WCHAR_T_IS_UTF16)
IsStringASCII(const std::wstring & str)375 bool IsStringASCII(const std::wstring& str) {
376   return DoIsStringASCII(str);
377 }
378 #endif
379 
IsStringASCII(const string16 & str)380 bool IsStringASCII(const string16& str) {
381   return DoIsStringASCII(str);
382 }
383 
IsStringASCII(const base::StringPiece & str)384 bool IsStringASCII(const base::StringPiece& str) {
385   return DoIsStringASCII(str);
386 }
387 
IsStringUTF8(const std::string & str)388 bool IsStringUTF8(const std::string& str) {
389   const char *src = str.data();
390   int32 src_len = static_cast<int32>(str.length());
391   int32 char_index = 0;
392 
393   while (char_index < src_len) {
394     int32 code_point;
395     CBU8_NEXT(src, char_index, src_len, code_point);
396     if (!base::IsValidCharacter(code_point))
397       return false;
398   }
399   return true;
400 }
401 
402 template<typename Iter>
DoLowerCaseEqualsASCII(Iter a_begin,Iter a_end,const char * b)403 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
404                                           Iter a_end,
405                                           const char* b) {
406   for (Iter it = a_begin; it != a_end; ++it, ++b) {
407     if (!*b || base::ToLowerASCII(*it) != *b)
408       return false;
409   }
410   return *b == 0;
411 }
412 
413 // Front-ends for LowerCaseEqualsASCII.
LowerCaseEqualsASCII(const std::string & a,const char * b)414 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
415   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
416 }
417 
LowerCaseEqualsASCII(const string16 & a,const char * b)418 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
419   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
420 }
421 
LowerCaseEqualsASCII(std::string::const_iterator a_begin,std::string::const_iterator a_end,const char * b)422 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
423                           std::string::const_iterator a_end,
424                           const char* b) {
425   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
426 }
427 
LowerCaseEqualsASCII(string16::const_iterator a_begin,string16::const_iterator a_end,const char * b)428 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
429                           string16::const_iterator a_end,
430                           const char* b) {
431   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
432 }
433 
434 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
435 #if !defined(OS_ANDROID)
LowerCaseEqualsASCII(const char * a_begin,const char * a_end,const char * b)436 bool LowerCaseEqualsASCII(const char* a_begin,
437                           const char* a_end,
438                           const char* b) {
439   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
440 }
441 
LowerCaseEqualsASCII(const char16 * a_begin,const char16 * a_end,const char * b)442 bool LowerCaseEqualsASCII(const char16* a_begin,
443                           const char16* a_end,
444                           const char* b) {
445   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
446 }
447 
448 #endif  // !defined(OS_ANDROID)
449 
EqualsASCII(const string16 & a,const base::StringPiece & b)450 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
451   if (a.length() != b.length())
452     return false;
453   return std::equal(b.begin(), b.end(), a.begin());
454 }
455 
StartsWithASCII(const std::string & str,const std::string & search,bool case_sensitive)456 bool StartsWithASCII(const std::string& str,
457                      const std::string& search,
458                      bool case_sensitive) {
459   if (case_sensitive)
460     return str.compare(0, search.length(), search) == 0;
461   else
462     return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
463 }
464 
465 template <typename STR>
StartsWithT(const STR & str,const STR & search,bool case_sensitive)466 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
467   if (case_sensitive) {
468     return str.compare(0, search.length(), search) == 0;
469   } else {
470     if (search.size() > str.size())
471       return false;
472     return std::equal(search.begin(), search.end(), str.begin(),
473                       base::CaseInsensitiveCompare<typename STR::value_type>());
474   }
475 }
476 
StartsWith(const string16 & str,const string16 & search,bool case_sensitive)477 bool StartsWith(const string16& str, const string16& search,
478                 bool case_sensitive) {
479   return StartsWithT(str, search, case_sensitive);
480 }
481 
482 template <typename STR>
EndsWithT(const STR & str,const STR & search,bool case_sensitive)483 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
484   typename STR::size_type str_length = str.length();
485   typename STR::size_type search_length = search.length();
486   if (search_length > str_length)
487     return false;
488   if (case_sensitive) {
489     return str.compare(str_length - search_length, search_length, search) == 0;
490   } else {
491     return std::equal(search.begin(), search.end(),
492                       str.begin() + (str_length - search_length),
493                       base::CaseInsensitiveCompare<typename STR::value_type>());
494   }
495 }
496 
EndsWith(const std::string & str,const std::string & search,bool case_sensitive)497 bool EndsWith(const std::string& str, const std::string& search,
498               bool case_sensitive) {
499   return EndsWithT(str, search, case_sensitive);
500 }
501 
EndsWith(const string16 & str,const string16 & search,bool case_sensitive)502 bool EndsWith(const string16& str, const string16& search,
503               bool case_sensitive) {
504   return EndsWithT(str, search, case_sensitive);
505 }
506 
507 static const char* const kByteStringsUnlocalized[] = {
508   " B",
509   " kB",
510   " MB",
511   " GB",
512   " TB",
513   " PB"
514 };
515 
FormatBytesUnlocalized(int64 bytes)516 string16 FormatBytesUnlocalized(int64 bytes) {
517   double unit_amount = static_cast<double>(bytes);
518   size_t dimension = 0;
519   const int kKilo = 1024;
520   while (unit_amount >= kKilo &&
521          dimension < arraysize(kByteStringsUnlocalized) - 1) {
522     unit_amount /= kKilo;
523     dimension++;
524   }
525 
526   char buf[64];
527   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
528     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
529                    kByteStringsUnlocalized[dimension]);
530   } else {
531     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
532                    kByteStringsUnlocalized[dimension]);
533   }
534 
535   return ASCIIToUTF16(buf);
536 }
537 
538 template<class StringType>
DoReplaceSubstringsAfterOffset(StringType * str,typename StringType::size_type start_offset,const StringType & find_this,const StringType & replace_with,bool replace_all)539 void DoReplaceSubstringsAfterOffset(StringType* str,
540                                     typename StringType::size_type start_offset,
541                                     const StringType& find_this,
542                                     const StringType& replace_with,
543                                     bool replace_all) {
544   if ((start_offset == StringType::npos) || (start_offset >= str->length()))
545     return;
546 
547   DCHECK(!find_this.empty());
548   for (typename StringType::size_type offs(str->find(find_this, start_offset));
549       offs != StringType::npos; offs = str->find(find_this, offs)) {
550     str->replace(offs, find_this.length(), replace_with);
551     offs += replace_with.length();
552 
553     if (!replace_all)
554       break;
555   }
556 }
557 
ReplaceFirstSubstringAfterOffset(string16 * str,string16::size_type start_offset,const string16 & find_this,const string16 & replace_with)558 void ReplaceFirstSubstringAfterOffset(string16* str,
559                                       string16::size_type start_offset,
560                                       const string16& find_this,
561                                       const string16& replace_with) {
562   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
563                                  false);  // replace first instance
564 }
565 
ReplaceFirstSubstringAfterOffset(std::string * str,std::string::size_type start_offset,const std::string & find_this,const std::string & replace_with)566 void ReplaceFirstSubstringAfterOffset(std::string* str,
567                                       std::string::size_type start_offset,
568                                       const std::string& find_this,
569                                       const std::string& replace_with) {
570   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
571                                  false);  // replace first instance
572 }
573 
ReplaceSubstringsAfterOffset(string16 * str,string16::size_type start_offset,const string16 & find_this,const string16 & replace_with)574 void ReplaceSubstringsAfterOffset(string16* str,
575                                   string16::size_type start_offset,
576                                   const string16& find_this,
577                                   const string16& replace_with) {
578   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
579                                  true);  // replace all instances
580 }
581 
ReplaceSubstringsAfterOffset(std::string * str,std::string::size_type start_offset,const std::string & find_this,const std::string & replace_with)582 void ReplaceSubstringsAfterOffset(std::string* str,
583                                   std::string::size_type start_offset,
584                                   const std::string& find_this,
585                                   const std::string& replace_with) {
586   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
587                                  true);  // replace all instances
588 }
589 
590 
591 template<typename STR>
TokenizeT(const STR & str,const STR & delimiters,std::vector<STR> * tokens)592 static size_t TokenizeT(const STR& str,
593                         const STR& delimiters,
594                         std::vector<STR>* tokens) {
595   tokens->clear();
596 
597   typename STR::size_type start = str.find_first_not_of(delimiters);
598   while (start != STR::npos) {
599     typename STR::size_type end = str.find_first_of(delimiters, start + 1);
600     if (end == STR::npos) {
601       tokens->push_back(str.substr(start));
602       break;
603     } else {
604       tokens->push_back(str.substr(start, end - start));
605       start = str.find_first_not_of(delimiters, end + 1);
606     }
607   }
608 
609   return tokens->size();
610 }
611 
Tokenize(const string16 & str,const string16 & delimiters,std::vector<string16> * tokens)612 size_t Tokenize(const string16& str,
613                 const string16& delimiters,
614                 std::vector<string16>* tokens) {
615   return TokenizeT(str, delimiters, tokens);
616 }
617 
Tokenize(const std::string & str,const std::string & delimiters,std::vector<std::string> * tokens)618 size_t Tokenize(const std::string& str,
619                 const std::string& delimiters,
620                 std::vector<std::string>* tokens) {
621   return TokenizeT(str, delimiters, tokens);
622 }
623 
Tokenize(const base::StringPiece & str,const base::StringPiece & delimiters,std::vector<base::StringPiece> * tokens)624 size_t Tokenize(const base::StringPiece& str,
625                 const base::StringPiece& delimiters,
626                 std::vector<base::StringPiece>* tokens) {
627   return TokenizeT(str, delimiters, tokens);
628 }
629 
630 template<typename STR>
JoinStringT(const std::vector<STR> & parts,const STR & sep)631 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
632   if (parts.empty())
633     return STR();
634 
635   STR result(parts[0]);
636   typename std::vector<STR>::const_iterator iter = parts.begin();
637   ++iter;
638 
639   for (; iter != parts.end(); ++iter) {
640     result += sep;
641     result += *iter;
642   }
643 
644   return result;
645 }
646 
JoinString(const std::vector<std::string> & parts,char sep)647 std::string JoinString(const std::vector<std::string>& parts, char sep) {
648   return JoinStringT(parts, std::string(1, sep));
649 }
650 
JoinString(const std::vector<string16> & parts,char16 sep)651 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
652   return JoinStringT(parts, string16(1, sep));
653 }
654 
JoinString(const std::vector<std::string> & parts,const std::string & separator)655 std::string JoinString(const std::vector<std::string>& parts,
656                        const std::string& separator) {
657   return JoinStringT(parts, separator);
658 }
659 
JoinString(const std::vector<string16> & parts,const string16 & separator)660 string16 JoinString(const std::vector<string16>& parts,
661                     const string16& separator) {
662   return JoinStringT(parts, separator);
663 }
664 
665 template<class FormatStringType, class OutStringType>
DoReplaceStringPlaceholders(const FormatStringType & format_string,const std::vector<OutStringType> & subst,std::vector<size_t> * offsets)666 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
667     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
668   size_t substitutions = subst.size();
669 
670   size_t sub_length = 0;
671   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
672        iter != subst.end(); ++iter) {
673     sub_length += iter->length();
674   }
675 
676   OutStringType formatted;
677   formatted.reserve(format_string.length() + sub_length);
678 
679   std::vector<ReplacementOffset> r_offsets;
680   for (typename FormatStringType::const_iterator i = format_string.begin();
681        i != format_string.end(); ++i) {
682     if ('$' == *i) {
683       if (i + 1 != format_string.end()) {
684         ++i;
685         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
686         if ('$' == *i) {
687           while (i != format_string.end() && '$' == *i) {
688             formatted.push_back('$');
689             ++i;
690           }
691           --i;
692         } else {
693           uintptr_t index = 0;
694           while (i != format_string.end() && '0' <= *i && *i <= '9') {
695             index *= 10;
696             index += *i - '0';
697             ++i;
698           }
699           --i;
700           index -= 1;
701           if (offsets) {
702             ReplacementOffset r_offset(index,
703                 static_cast<int>(formatted.size()));
704             r_offsets.insert(std::lower_bound(r_offsets.begin(),
705                                               r_offsets.end(),
706                                               r_offset,
707                                               &CompareParameter),
708                              r_offset);
709           }
710           if (index < substitutions)
711             formatted.append(subst.at(index));
712         }
713       }
714     } else {
715       formatted.push_back(*i);
716     }
717   }
718   if (offsets) {
719     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
720          i != r_offsets.end(); ++i) {
721       offsets->push_back(i->offset);
722     }
723   }
724   return formatted;
725 }
726 
ReplaceStringPlaceholders(const string16 & format_string,const std::vector<string16> & subst,std::vector<size_t> * offsets)727 string16 ReplaceStringPlaceholders(const string16& format_string,
728                                    const std::vector<string16>& subst,
729                                    std::vector<size_t>* offsets) {
730   return DoReplaceStringPlaceholders(format_string, subst, offsets);
731 }
732 
ReplaceStringPlaceholders(const base::StringPiece & format_string,const std::vector<std::string> & subst,std::vector<size_t> * offsets)733 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
734                                       const std::vector<std::string>& subst,
735                                       std::vector<size_t>* offsets) {
736   return DoReplaceStringPlaceholders(format_string, subst, offsets);
737 }
738 
ReplaceStringPlaceholders(const string16 & format_string,const string16 & a,size_t * offset)739 string16 ReplaceStringPlaceholders(const string16& format_string,
740                                    const string16& a,
741                                    size_t* offset) {
742   std::vector<size_t> offsets;
743   std::vector<string16> subst;
744   subst.push_back(a);
745   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
746 
747   DCHECK_EQ(1U, offsets.size());
748   if (offset)
749     *offset = offsets[0];
750   return result;
751 }
752 
IsWildcard(base_icu::UChar32 character)753 static bool IsWildcard(base_icu::UChar32 character) {
754   return character == '*' || character == '?';
755 }
756 
757 // Move the strings pointers to the point where they start to differ.
758 template <typename CHAR, typename NEXT>
EatSameChars(const CHAR ** pattern,const CHAR * pattern_end,const CHAR ** string,const CHAR * string_end,NEXT next)759 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
760                          const CHAR** string, const CHAR* string_end,
761                          NEXT next) {
762   const CHAR* escape = NULL;
763   while (*pattern != pattern_end && *string != string_end) {
764     if (!escape && IsWildcard(**pattern)) {
765       // We don't want to match wildcard here, except if it's escaped.
766       return;
767     }
768 
769     // Check if the escapement char is found. If so, skip it and move to the
770     // next character.
771     if (!escape && **pattern == '\\') {
772       escape = *pattern;
773       next(pattern, pattern_end);
774       continue;
775     }
776 
777     // Check if the chars match, if so, increment the ptrs.
778     const CHAR* pattern_next = *pattern;
779     const CHAR* string_next = *string;
780     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
781     if (pattern_char == next(&string_next, string_end) &&
782         pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
783       *pattern = pattern_next;
784       *string = string_next;
785     } else {
786       // Uh ho, it did not match, we are done. If the last char was an
787       // escapement, that means that it was an error to advance the ptr here,
788       // let's put it back where it was. This also mean that the MatchPattern
789       // function will return false because if we can't match an escape char
790       // here, then no one will.
791       if (escape) {
792         *pattern = escape;
793       }
794       return;
795     }
796 
797     escape = NULL;
798   }
799 }
800 
801 template <typename CHAR, typename NEXT>
EatWildcard(const CHAR ** pattern,const CHAR * end,NEXT next)802 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
803   while (*pattern != end) {
804     if (!IsWildcard(**pattern))
805       return;
806     next(pattern, end);
807   }
808 }
809 
810 template <typename CHAR, typename NEXT>
MatchPatternT(const CHAR * eval,const CHAR * eval_end,const CHAR * pattern,const CHAR * pattern_end,int depth,NEXT next)811 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
812                           const CHAR* pattern, const CHAR* pattern_end,
813                           int depth,
814                           NEXT next) {
815   const int kMaxDepth = 16;
816   if (depth > kMaxDepth)
817     return false;
818 
819   // Eat all the matching chars.
820   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
821 
822   // If the string is empty, then the pattern must be empty too, or contains
823   // only wildcards.
824   if (eval == eval_end) {
825     EatWildcard(&pattern, pattern_end, next);
826     return pattern == pattern_end;
827   }
828 
829   // Pattern is empty but not string, this is not a match.
830   if (pattern == pattern_end)
831     return false;
832 
833   // If this is a question mark, then we need to compare the rest with
834   // the current string or the string with one character eaten.
835   const CHAR* next_pattern = pattern;
836   next(&next_pattern, pattern_end);
837   if (pattern[0] == '?') {
838     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
839                       depth + 1, next))
840       return true;
841     const CHAR* next_eval = eval;
842     next(&next_eval, eval_end);
843     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
844                       depth + 1, next))
845       return true;
846   }
847 
848   // This is a *, try to match all the possible substrings with the remainder
849   // of the pattern.
850   if (pattern[0] == '*') {
851     // Collapse duplicate wild cards (********** into *) so that the
852     // method does not recurse unnecessarily. http://crbug.com/52839
853     EatWildcard(&next_pattern, pattern_end, next);
854 
855     while (eval != eval_end) {
856       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
857                         depth + 1, next))
858         return true;
859       eval++;
860     }
861 
862     // We reached the end of the string, let see if the pattern contains only
863     // wildcards.
864     if (eval == eval_end) {
865       EatWildcard(&pattern, pattern_end, next);
866       if (pattern != pattern_end)
867         return false;
868       return true;
869     }
870   }
871 
872   return false;
873 }
874 
875 struct NextCharUTF8 {
operator ()NextCharUTF8876   base_icu::UChar32 operator()(const char** p, const char* end) {
877     base_icu::UChar32 c;
878     int offset = 0;
879     CBU8_NEXT(*p, offset, end - *p, c);
880     *p += offset;
881     return c;
882   }
883 };
884 
885 struct NextCharUTF16 {
operator ()NextCharUTF16886   base_icu::UChar32 operator()(const char16** p, const char16* end) {
887     base_icu::UChar32 c;
888     int offset = 0;
889     CBU16_NEXT(*p, offset, end - *p, c);
890     *p += offset;
891     return c;
892   }
893 };
894 
MatchPattern(const base::StringPiece & eval,const base::StringPiece & pattern)895 bool MatchPattern(const base::StringPiece& eval,
896                   const base::StringPiece& pattern) {
897   return MatchPatternT(eval.data(), eval.data() + eval.size(),
898                        pattern.data(), pattern.data() + pattern.size(),
899                        0, NextCharUTF8());
900 }
901 
MatchPattern(const string16 & eval,const string16 & pattern)902 bool MatchPattern(const string16& eval, const string16& pattern) {
903   return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
904                        pattern.c_str(), pattern.c_str() + pattern.size(),
905                        0, NextCharUTF16());
906 }
907 
908 // The following code is compatible with the OpenBSD lcpy interface.  See:
909 //   http://www.gratisoft.us/todd/papers/strlcpy.html
910 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
911 
912 namespace {
913 
914 template <typename CHAR>
lcpyT(CHAR * dst,const CHAR * src,size_t dst_size)915 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
916   for (size_t i = 0; i < dst_size; ++i) {
917     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
918       return i;
919   }
920 
921   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
922   if (dst_size != 0)
923     dst[dst_size - 1] = 0;
924 
925   // Count the rest of the |src|, and return it's length in characters.
926   while (src[dst_size]) ++dst_size;
927   return dst_size;
928 }
929 
930 }  // namespace
931 
strlcpy(char * dst,const char * src,size_t dst_size)932 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
933   return lcpyT<char>(dst, src, dst_size);
934 }
wcslcpy(wchar_t * dst,const wchar_t * src,size_t dst_size)935 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
936   return lcpyT<wchar_t>(dst, src, dst_size);
937 }
938