• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/string_util.h"
6 
7 #include "build/build_config.h"
8 
9 #include <ctype.h>
10 #include <errno.h>
11 #include <math.h>
12 #include <stdarg.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <time.h>
17 #include <wchar.h>
18 #include <wctype.h>
19 
20 #include <algorithm>
21 #include <vector>
22 
23 #include "base/basictypes.h"
24 #include "base/logging.h"
25 #include "base/memory/singleton.h"
26 #include "base/third_party/dmg_fp/dmg_fp.h"
27 #include "base/utf_string_conversion_utils.h"
28 #include "base/utf_string_conversions.h"
29 #include "base/third_party/icu/icu_utf.h"
30 
31 namespace {
32 
33 // Force the singleton used by Empty[W]String[16] to be a unique type. This
34 // prevents other code that might accidentally use Singleton<string> from
35 // getting our internal one.
36 struct EmptyStrings {
EmptyStrings__anon889a81dc0111::EmptyStrings37   EmptyStrings() {}
38   const std::string s;
39   const std::wstring ws;
40   const string16 s16;
41 
GetInstance__anon889a81dc0111::EmptyStrings42   static EmptyStrings* GetInstance() {
43     return Singleton<EmptyStrings>::get();
44   }
45 };
46 
47 // Used by ReplaceStringPlaceholders to track the position in the string of
48 // replaced parameters.
49 struct ReplacementOffset {
ReplacementOffset__anon889a81dc0111::ReplacementOffset50   ReplacementOffset(uintptr_t parameter, size_t offset)
51       : parameter(parameter),
52         offset(offset) {}
53 
54   // Index of the parameter.
55   uintptr_t parameter;
56 
57   // Starting position in the string.
58   size_t offset;
59 };
60 
CompareParameter(const ReplacementOffset & elem1,const ReplacementOffset & elem2)61 static bool CompareParameter(const ReplacementOffset& elem1,
62                              const ReplacementOffset& elem2) {
63   return elem1.parameter < elem2.parameter;
64 }
65 
66 }  // namespace
67 
68 namespace base {
69 
IsWprintfFormatPortable(const wchar_t * format)70 bool IsWprintfFormatPortable(const wchar_t* format) {
71   for (const wchar_t* position = format; *position != '\0'; ++position) {
72     if (*position == '%') {
73       bool in_specification = true;
74       bool modifier_l = false;
75       while (in_specification) {
76         // Eat up characters until reaching a known specifier.
77         if (*++position == '\0') {
78           // The format string ended in the middle of a specification.  Call
79           // it portable because no unportable specifications were found.  The
80           // string is equally broken on all platforms.
81           return true;
82         }
83 
84         if (*position == 'l') {
85           // 'l' is the only thing that can save the 's' and 'c' specifiers.
86           modifier_l = true;
87         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
88                    *position == 'S' || *position == 'C' || *position == 'F' ||
89                    *position == 'D' || *position == 'O' || *position == 'U') {
90           // Not portable.
91           return false;
92         }
93 
94         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
95           // Portable, keep scanning the rest of the format string.
96           in_specification = false;
97         }
98       }
99     }
100   }
101 
102   return true;
103 }
104 
105 }  // namespace base
106 
107 
EmptyString()108 const std::string& EmptyString() {
109   return EmptyStrings::GetInstance()->s;
110 }
111 
EmptyWString()112 const std::wstring& EmptyWString() {
113   return EmptyStrings::GetInstance()->ws;
114 }
115 
EmptyString16()116 const string16& EmptyString16() {
117   return EmptyStrings::GetInstance()->s16;
118 }
119 
120 #define WHITESPACE_UNICODE \
121   0x0009, /* <control-0009> to <control-000D> */ \
122   0x000A,                                        \
123   0x000B,                                        \
124   0x000C,                                        \
125   0x000D,                                        \
126   0x0020, /* Space */                            \
127   0x0085, /* <control-0085> */                   \
128   0x00A0, /* No-Break Space */                   \
129   0x1680, /* Ogham Space Mark */                 \
130   0x180E, /* Mongolian Vowel Separator */        \
131   0x2000, /* En Quad to Hair Space */            \
132   0x2001,                                        \
133   0x2002,                                        \
134   0x2003,                                        \
135   0x2004,                                        \
136   0x2005,                                        \
137   0x2006,                                        \
138   0x2007,                                        \
139   0x2008,                                        \
140   0x2009,                                        \
141   0x200A,                                        \
142   0x200C, /* Zero Width Non-Joiner */            \
143   0x2028, /* Line Separator */                   \
144   0x2029, /* Paragraph Separator */              \
145   0x202F, /* Narrow No-Break Space */            \
146   0x205F, /* Medium Mathematical Space */        \
147   0x3000, /* Ideographic Space */                \
148   0
149 
150 const wchar_t kWhitespaceWide[] = {
151   WHITESPACE_UNICODE
152 };
153 const char16 kWhitespaceUTF16[] = {
154   WHITESPACE_UNICODE
155 };
156 const char kWhitespaceASCII[] = {
157   0x09,    // <control-0009> to <control-000D>
158   0x0A,
159   0x0B,
160   0x0C,
161   0x0D,
162   0x20,    // Space
163   0
164 };
165 
166 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
167 
168 template<typename STR>
RemoveCharsT(const STR & input,const typename STR::value_type remove_chars[],STR * output)169 bool RemoveCharsT(const STR& input,
170                   const typename STR::value_type remove_chars[],
171                   STR* output) {
172   bool removed = false;
173   size_t found;
174 
175   *output = input;
176 
177   found = output->find_first_of(remove_chars);
178   while (found != STR::npos) {
179     removed = true;
180     output->replace(found, 1, STR());
181     found = output->find_first_of(remove_chars, found);
182   }
183 
184   return removed;
185 }
186 
RemoveChars(const std::wstring & input,const wchar_t remove_chars[],std::wstring * output)187 bool RemoveChars(const std::wstring& input,
188                  const wchar_t remove_chars[],
189                  std::wstring* output) {
190   return RemoveCharsT(input, remove_chars, output);
191 }
192 
193 #if !defined(WCHAR_T_IS_UTF16)
RemoveChars(const string16 & input,const char16 remove_chars[],string16 * output)194 bool RemoveChars(const string16& input,
195                  const char16 remove_chars[],
196                  string16* output) {
197   return RemoveCharsT(input, remove_chars, output);
198 }
199 #endif
200 
RemoveChars(const std::string & input,const char remove_chars[],std::string * output)201 bool RemoveChars(const std::string& input,
202                  const char remove_chars[],
203                  std::string* output) {
204   return RemoveCharsT(input, remove_chars, output);
205 }
206 
207 template<typename STR>
TrimStringT(const STR & input,const typename STR::value_type trim_chars[],TrimPositions positions,STR * output)208 TrimPositions TrimStringT(const STR& input,
209                           const typename STR::value_type trim_chars[],
210                           TrimPositions positions,
211                           STR* output) {
212   // Find the edges of leading/trailing whitespace as desired.
213   const typename STR::size_type last_char = input.length() - 1;
214   const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
215       input.find_first_not_of(trim_chars) : 0;
216   const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
217       input.find_last_not_of(trim_chars) : last_char;
218 
219   // When the string was all whitespace, report that we stripped off whitespace
220   // from whichever position the caller was interested in.  For empty input, we
221   // stripped no whitespace, but we still need to clear |output|.
222   if (input.empty() ||
223       (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
224     bool input_was_empty = input.empty();  // in case output == &input
225     output->clear();
226     return input_was_empty ? TRIM_NONE : positions;
227   }
228 
229   // Trim the whitespace.
230   *output =
231       input.substr(first_good_char, last_good_char - first_good_char + 1);
232 
233   // Return where we trimmed from.
234   return static_cast<TrimPositions>(
235       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
236       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
237 }
238 
TrimString(const std::wstring & input,const wchar_t trim_chars[],std::wstring * output)239 bool TrimString(const std::wstring& input,
240                 const wchar_t trim_chars[],
241                 std::wstring* output) {
242   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
243 }
244 
245 #if !defined(WCHAR_T_IS_UTF16)
TrimString(const string16 & input,const char16 trim_chars[],string16 * output)246 bool TrimString(const string16& input,
247                 const char16 trim_chars[],
248                 string16* output) {
249   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
250 }
251 #endif
252 
TrimString(const std::string & input,const char trim_chars[],std::string * output)253 bool TrimString(const std::string& input,
254                 const char trim_chars[],
255                 std::string* output) {
256   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
257 }
258 
TruncateUTF8ToByteSize(const std::string & input,const size_t byte_size,std::string * output)259 void TruncateUTF8ToByteSize(const std::string& input,
260                             const size_t byte_size,
261                             std::string* output) {
262   DCHECK(output);
263   if (byte_size > input.length()) {
264     *output = input;
265     return;
266   }
267   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
268   // Note: This cast is necessary because CBU8_NEXT uses int32s.
269   int32 truncation_length = static_cast<int32>(byte_size);
270   int32 char_index = truncation_length - 1;
271   const char* data = input.data();
272 
273   // Using CBU8, we will move backwards from the truncation point
274   // to the beginning of the string looking for a valid UTF8
275   // character.  Once a full UTF8 character is found, we will
276   // truncate the string to the end of that character.
277   while (char_index >= 0) {
278     int32 prev = char_index;
279     uint32 code_point = 0;
280     CBU8_NEXT(data, char_index, truncation_length, code_point);
281     if (!base::IsValidCharacter(code_point) ||
282         !base::IsValidCodepoint(code_point)) {
283       char_index = prev - 1;
284     } else {
285       break;
286     }
287   }
288 
289   if (char_index >= 0 )
290     *output = input.substr(0, char_index);
291   else
292     output->clear();
293 }
294 
TrimWhitespace(const std::wstring & input,TrimPositions positions,std::wstring * output)295 TrimPositions TrimWhitespace(const std::wstring& input,
296                              TrimPositions positions,
297                              std::wstring* output) {
298   return TrimStringT(input, kWhitespaceWide, positions, output);
299 }
300 
301 #if !defined(WCHAR_T_IS_UTF16)
TrimWhitespace(const string16 & input,TrimPositions positions,string16 * output)302 TrimPositions TrimWhitespace(const string16& input,
303                              TrimPositions positions,
304                              string16* output) {
305   return TrimStringT(input, kWhitespaceUTF16, positions, output);
306 }
307 #endif
308 
TrimWhitespaceASCII(const std::string & input,TrimPositions positions,std::string * output)309 TrimPositions TrimWhitespaceASCII(const std::string& input,
310                                   TrimPositions positions,
311                                   std::string* output) {
312   return TrimStringT(input, kWhitespaceASCII, positions, output);
313 }
314 
315 // This function is only for backward-compatibility.
316 // To be removed when all callers are updated.
TrimWhitespace(const std::string & input,TrimPositions positions,std::string * output)317 TrimPositions TrimWhitespace(const std::string& input,
318                              TrimPositions positions,
319                              std::string* output) {
320   return TrimWhitespaceASCII(input, positions, output);
321 }
322 
323 template<typename STR>
CollapseWhitespaceT(const STR & text,bool trim_sequences_with_line_breaks)324 STR CollapseWhitespaceT(const STR& text,
325                         bool trim_sequences_with_line_breaks) {
326   STR result;
327   result.resize(text.size());
328 
329   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
330   // will trim any leading whitespace.
331   bool in_whitespace = true;
332   bool already_trimmed = true;
333 
334   int chars_written = 0;
335   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
336     if (IsWhitespace(*i)) {
337       if (!in_whitespace) {
338         // Reduce all whitespace sequences to a single space.
339         in_whitespace = true;
340         result[chars_written++] = L' ';
341       }
342       if (trim_sequences_with_line_breaks && !already_trimmed &&
343           ((*i == '\n') || (*i == '\r'))) {
344         // Whitespace sequences containing CR or LF are eliminated entirely.
345         already_trimmed = true;
346         --chars_written;
347       }
348     } else {
349       // Non-whitespace chracters are copied straight across.
350       in_whitespace = false;
351       already_trimmed = false;
352       result[chars_written++] = *i;
353     }
354   }
355 
356   if (in_whitespace && !already_trimmed) {
357     // Any trailing whitespace is eliminated.
358     --chars_written;
359   }
360 
361   result.resize(chars_written);
362   return result;
363 }
364 
CollapseWhitespace(const std::wstring & text,bool trim_sequences_with_line_breaks)365 std::wstring CollapseWhitespace(const std::wstring& text,
366                                 bool trim_sequences_with_line_breaks) {
367   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
368 }
369 
370 #if !defined(WCHAR_T_IS_UTF16)
CollapseWhitespace(const string16 & text,bool trim_sequences_with_line_breaks)371 string16 CollapseWhitespace(const string16& text,
372                             bool trim_sequences_with_line_breaks) {
373   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
374 }
375 #endif
376 
CollapseWhitespaceASCII(const std::string & text,bool trim_sequences_with_line_breaks)377 std::string CollapseWhitespaceASCII(const std::string& text,
378                                     bool trim_sequences_with_line_breaks) {
379   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
380 }
381 
ContainsOnlyWhitespaceASCII(const std::string & str)382 bool ContainsOnlyWhitespaceASCII(const std::string& str) {
383   for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
384     if (!IsAsciiWhitespace(*i))
385       return false;
386   }
387   return true;
388 }
389 
ContainsOnlyWhitespace(const string16 & str)390 bool ContainsOnlyWhitespace(const string16& str) {
391   for (string16::const_iterator i(str.begin()); i != str.end(); ++i) {
392     if (!IsWhitespace(*i))
393       return false;
394   }
395   return true;
396 }
397 
398 template<typename STR>
ContainsOnlyCharsT(const STR & input,const STR & characters)399 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
400   for (typename STR::const_iterator iter = input.begin();
401        iter != input.end(); ++iter) {
402     if (characters.find(*iter) == STR::npos)
403       return false;
404   }
405   return true;
406 }
407 
ContainsOnlyChars(const std::wstring & input,const std::wstring & characters)408 bool ContainsOnlyChars(const std::wstring& input,
409                        const std::wstring& characters) {
410   return ContainsOnlyCharsT(input, characters);
411 }
412 
413 #if !defined(WCHAR_T_IS_UTF16)
ContainsOnlyChars(const string16 & input,const string16 & characters)414 bool ContainsOnlyChars(const string16& input, const string16& characters) {
415   return ContainsOnlyCharsT(input, characters);
416 }
417 #endif
418 
ContainsOnlyChars(const std::string & input,const std::string & characters)419 bool ContainsOnlyChars(const std::string& input,
420                        const std::string& characters) {
421   return ContainsOnlyCharsT(input, characters);
422 }
423 
WideToASCII(const std::wstring & wide)424 std::string WideToASCII(const std::wstring& wide) {
425   DCHECK(IsStringASCII(wide)) << wide;
426   return std::string(wide.begin(), wide.end());
427 }
428 
UTF16ToASCII(const string16 & utf16)429 std::string UTF16ToASCII(const string16& utf16) {
430   DCHECK(IsStringASCII(utf16)) << utf16;
431   return std::string(utf16.begin(), utf16.end());
432 }
433 
434 // Latin1 is just the low range of Unicode, so we can copy directly to convert.
WideToLatin1(const std::wstring & wide,std::string * latin1)435 bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
436   std::string output;
437   output.resize(wide.size());
438   latin1->clear();
439   for (size_t i = 0; i < wide.size(); i++) {
440     if (wide[i] > 255)
441       return false;
442     output[i] = static_cast<char>(wide[i]);
443   }
444   latin1->swap(output);
445   return true;
446 }
447 
448 template<class STR>
DoIsStringASCII(const STR & str)449 static bool DoIsStringASCII(const STR& str) {
450   for (size_t i = 0; i < str.length(); i++) {
451     typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
452     if (c > 0x7F)
453       return false;
454   }
455   return true;
456 }
457 
IsStringASCII(const std::wstring & str)458 bool IsStringASCII(const std::wstring& str) {
459   return DoIsStringASCII(str);
460 }
461 
462 #if !defined(WCHAR_T_IS_UTF16)
IsStringASCII(const string16 & str)463 bool IsStringASCII(const string16& str) {
464   return DoIsStringASCII(str);
465 }
466 #endif
467 
IsStringASCII(const base::StringPiece & str)468 bool IsStringASCII(const base::StringPiece& str) {
469   return DoIsStringASCII(str);
470 }
471 
IsStringUTF8(const std::string & str)472 bool IsStringUTF8(const std::string& str) {
473   const char *src = str.data();
474   int32 src_len = static_cast<int32>(str.length());
475   int32 char_index = 0;
476 
477   while (char_index < src_len) {
478     int32 code_point;
479     CBU8_NEXT(src, char_index, src_len, code_point);
480     if (!base::IsValidCharacter(code_point))
481        return false;
482   }
483   return true;
484 }
485 
486 template<typename Iter>
DoLowerCaseEqualsASCII(Iter a_begin,Iter a_end,const char * b)487 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
488                                           Iter a_end,
489                                           const char* b) {
490   for (Iter it = a_begin; it != a_end; ++it, ++b) {
491     if (!*b || base::ToLowerASCII(*it) != *b)
492       return false;
493   }
494   return *b == 0;
495 }
496 
497 // Front-ends for LowerCaseEqualsASCII.
LowerCaseEqualsASCII(const std::string & a,const char * b)498 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
499   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
500 }
501 
LowerCaseEqualsASCII(const std::wstring & a,const char * b)502 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
503   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
504 }
505 
506 #if !defined(WCHAR_T_IS_UTF16)
LowerCaseEqualsASCII(const string16 & a,const char * b)507 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
508   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
509 }
510 #endif
511 
LowerCaseEqualsASCII(std::string::const_iterator a_begin,std::string::const_iterator a_end,const char * b)512 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
513                           std::string::const_iterator a_end,
514                           const char* b) {
515   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
516 }
517 
LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,std::wstring::const_iterator a_end,const char * b)518 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
519                           std::wstring::const_iterator a_end,
520                           const char* b) {
521   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
522 }
523 
524 #if !defined(WCHAR_T_IS_UTF16)
LowerCaseEqualsASCII(string16::const_iterator a_begin,string16::const_iterator a_end,const char * b)525 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
526                           string16::const_iterator a_end,
527                           const char* b) {
528   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
529 }
530 #endif
531 
532 #if !defined(ANDROID)
LowerCaseEqualsASCII(const char * a_begin,const char * a_end,const char * b)533 bool LowerCaseEqualsASCII(const char* a_begin,
534                           const char* a_end,
535                           const char* b) {
536   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
537 }
538 #endif // !ANDROID
539 
540 #if !defined(ANDROID)
LowerCaseEqualsASCII(const wchar_t * a_begin,const wchar_t * a_end,const char * b)541 bool LowerCaseEqualsASCII(const wchar_t* a_begin,
542                           const wchar_t* a_end,
543                           const char* b) {
544   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
545 }
546 #endif // !ANDROID
547 
548 #if !defined(WCHAR_T_IS_UTF16) && !defined(ANDROID)
LowerCaseEqualsASCII(const char16 * a_begin,const char16 * a_end,const char * b)549 bool LowerCaseEqualsASCII(const char16* a_begin,
550                           const char16* a_end,
551                           const char* b) {
552   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
553 }
554 #endif
555 
EqualsASCII(const string16 & a,const base::StringPiece & b)556 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
557   if (a.length() != b.length())
558     return false;
559   return std::equal(b.begin(), b.end(), a.begin());
560 }
561 
StartsWithASCII(const std::string & str,const std::string & search,bool case_sensitive)562 bool StartsWithASCII(const std::string& str,
563                      const std::string& search,
564                      bool case_sensitive) {
565   if (case_sensitive)
566     return str.compare(0, search.length(), search) == 0;
567   else
568     return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
569 }
570 
571 template <typename STR>
StartsWithT(const STR & str,const STR & search,bool case_sensitive)572 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
573   if (case_sensitive) {
574     return str.compare(0, search.length(), search) == 0;
575   } else {
576     if (search.size() > str.size())
577       return false;
578     return std::equal(search.begin(), search.end(), str.begin(),
579                       base::CaseInsensitiveCompare<typename STR::value_type>());
580   }
581 }
582 
StartsWith(const std::wstring & str,const std::wstring & search,bool case_sensitive)583 bool StartsWith(const std::wstring& str, const std::wstring& search,
584                 bool case_sensitive) {
585   return StartsWithT(str, search, case_sensitive);
586 }
587 
588 #if !defined(WCHAR_T_IS_UTF16)
StartsWith(const string16 & str,const string16 & search,bool case_sensitive)589 bool StartsWith(const string16& str, const string16& search,
590                 bool case_sensitive) {
591   return StartsWithT(str, search, case_sensitive);
592 }
593 #endif
594 
595 template <typename STR>
EndsWithT(const STR & str,const STR & search,bool case_sensitive)596 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
597   typename STR::size_type str_length = str.length();
598   typename STR::size_type search_length = search.length();
599   if (search_length > str_length)
600     return false;
601   if (case_sensitive) {
602     return str.compare(str_length - search_length, search_length, search) == 0;
603   } else {
604     return std::equal(search.begin(), search.end(),
605                       str.begin() + (str_length - search_length),
606                       base::CaseInsensitiveCompare<typename STR::value_type>());
607   }
608 }
609 
EndsWith(const std::string & str,const std::string & search,bool case_sensitive)610 bool EndsWith(const std::string& str, const std::string& search,
611               bool case_sensitive) {
612   return EndsWithT(str, search, case_sensitive);
613 }
614 
EndsWith(const std::wstring & str,const std::wstring & search,bool case_sensitive)615 bool EndsWith(const std::wstring& str, const std::wstring& search,
616               bool case_sensitive) {
617   return EndsWithT(str, search, case_sensitive);
618 }
619 
620 #if !defined(WCHAR_T_IS_UTF16)
EndsWith(const string16 & str,const string16 & search,bool case_sensitive)621 bool EndsWith(const string16& str, const string16& search,
622               bool case_sensitive) {
623   return EndsWithT(str, search, case_sensitive);
624 }
625 #endif
626 
GetByteDisplayUnits(int64 bytes)627 DataUnits GetByteDisplayUnits(int64 bytes) {
628   // The byte thresholds at which we display amounts.  A byte count is displayed
629   // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
630   // This must match the DataUnits enum.
631   static const int64 kUnitThresholds[] = {
632     0,              // DATA_UNITS_BYTE,
633     3*1024,         // DATA_UNITS_KIBIBYTE,
634     2*1024*1024,    // DATA_UNITS_MEBIBYTE,
635     1024*1024*1024  // DATA_UNITS_GIBIBYTE,
636   };
637 
638   if (bytes < 0) {
639     NOTREACHED() << "Negative bytes value";
640     return DATA_UNITS_BYTE;
641   }
642 
643   int unit_index = arraysize(kUnitThresholds);
644   while (--unit_index > 0) {
645     if (bytes >= kUnitThresholds[unit_index])
646       break;
647   }
648 
649   DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIBIBYTE);
650   return DataUnits(unit_index);
651 }
652 
653 // TODO(mpcomplete): deal with locale
654 // Byte suffixes.  This must match the DataUnits enum.
655 static const char* const kByteStrings[] = {
656   "B",
657   "kB",
658   "MB",
659   "GB"
660 };
661 
662 static const char* const kSpeedStrings[] = {
663   "B/s",
664   "kB/s",
665   "MB/s",
666   "GB/s"
667 };
668 
FormatBytesInternal(int64 bytes,DataUnits units,bool show_units,const char * const * suffix)669 string16 FormatBytesInternal(int64 bytes,
670                              DataUnits units,
671                              bool show_units,
672                              const char* const* suffix) {
673   if (bytes < 0) {
674     NOTREACHED() << "Negative bytes value";
675     return string16();
676   }
677 
678   DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIBIBYTE);
679 
680   // Put the quantity in the right units.
681   double unit_amount = static_cast<double>(bytes);
682   for (int i = 0; i < units; ++i)
683     unit_amount /= 1024.0;
684 
685   char buf[64];
686   if (bytes != 0 && units != DATA_UNITS_BYTE && unit_amount < 100)
687     base::snprintf(buf, arraysize(buf), "%.1lf", unit_amount);
688   else
689     base::snprintf(buf, arraysize(buf), "%.0lf", unit_amount);
690 
691   std::string ret(buf);
692   if (show_units) {
693     ret += " ";
694     ret += suffix[units];
695   }
696 
697   return ASCIIToUTF16(ret);
698 }
699 
FormatBytes(int64 bytes,DataUnits units,bool show_units)700 string16 FormatBytes(int64 bytes, DataUnits units, bool show_units) {
701   return FormatBytesInternal(bytes, units, show_units, kByteStrings);
702 }
703 
FormatSpeed(int64 bytes,DataUnits units,bool show_units)704 string16 FormatSpeed(int64 bytes, DataUnits units, bool show_units) {
705   return FormatBytesInternal(bytes, units, show_units, kSpeedStrings);
706 }
707 
708 template<class StringType>
DoReplaceSubstringsAfterOffset(StringType * str,typename StringType::size_type start_offset,const StringType & find_this,const StringType & replace_with,bool replace_all)709 void DoReplaceSubstringsAfterOffset(StringType* str,
710                                     typename StringType::size_type start_offset,
711                                     const StringType& find_this,
712                                     const StringType& replace_with,
713                                     bool replace_all) {
714   if ((start_offset == StringType::npos) || (start_offset >= str->length()))
715     return;
716 
717   DCHECK(!find_this.empty());
718   for (typename StringType::size_type offs(str->find(find_this, start_offset));
719       offs != StringType::npos; offs = str->find(find_this, offs)) {
720     str->replace(offs, find_this.length(), replace_with);
721     offs += replace_with.length();
722 
723     if (!replace_all)
724       break;
725   }
726 }
727 
ReplaceFirstSubstringAfterOffset(string16 * str,string16::size_type start_offset,const string16 & find_this,const string16 & replace_with)728 void ReplaceFirstSubstringAfterOffset(string16* str,
729                                       string16::size_type start_offset,
730                                       const string16& find_this,
731                                       const string16& replace_with) {
732   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
733                                  false);  // replace first instance
734 }
735 
ReplaceFirstSubstringAfterOffset(std::string * str,std::string::size_type start_offset,const std::string & find_this,const std::string & replace_with)736 void ReplaceFirstSubstringAfterOffset(std::string* str,
737                                       std::string::size_type start_offset,
738                                       const std::string& find_this,
739                                       const std::string& replace_with) {
740   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
741                                  false);  // replace first instance
742 }
743 
ReplaceSubstringsAfterOffset(string16 * str,string16::size_type start_offset,const string16 & find_this,const string16 & replace_with)744 void ReplaceSubstringsAfterOffset(string16* str,
745                                   string16::size_type start_offset,
746                                   const string16& find_this,
747                                   const string16& replace_with) {
748   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
749                                  true);  // replace all instances
750 }
751 
ReplaceSubstringsAfterOffset(std::string * str,std::string::size_type start_offset,const std::string & find_this,const std::string & replace_with)752 void ReplaceSubstringsAfterOffset(std::string* str,
753                                   std::string::size_type start_offset,
754                                   const std::string& find_this,
755                                   const std::string& replace_with) {
756   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
757                                  true);  // replace all instances
758 }
759 
760 
761 template<typename STR>
TokenizeT(const STR & str,const STR & delimiters,std::vector<STR> * tokens)762 static size_t TokenizeT(const STR& str,
763                         const STR& delimiters,
764                         std::vector<STR>* tokens) {
765   tokens->clear();
766 
767   typename STR::size_type start = str.find_first_not_of(delimiters);
768   while (start != STR::npos) {
769     typename STR::size_type end = str.find_first_of(delimiters, start + 1);
770     if (end == STR::npos) {
771       tokens->push_back(str.substr(start));
772       break;
773     } else {
774       tokens->push_back(str.substr(start, end - start));
775       start = str.find_first_not_of(delimiters, end + 1);
776     }
777   }
778 
779   return tokens->size();
780 }
781 
Tokenize(const std::wstring & str,const std::wstring & delimiters,std::vector<std::wstring> * tokens)782 size_t Tokenize(const std::wstring& str,
783                 const std::wstring& delimiters,
784                 std::vector<std::wstring>* tokens) {
785   return TokenizeT(str, delimiters, tokens);
786 }
787 
788 #if !defined(WCHAR_T_IS_UTF16)
Tokenize(const string16 & str,const string16 & delimiters,std::vector<string16> * tokens)789 size_t Tokenize(const string16& str,
790                 const string16& delimiters,
791                 std::vector<string16>* tokens) {
792   return TokenizeT(str, delimiters, tokens);
793 }
794 #endif
795 
Tokenize(const std::string & str,const std::string & delimiters,std::vector<std::string> * tokens)796 size_t Tokenize(const std::string& str,
797                 const std::string& delimiters,
798                 std::vector<std::string>* tokens) {
799   return TokenizeT(str, delimiters, tokens);
800 }
801 
Tokenize(const base::StringPiece & str,const base::StringPiece & delimiters,std::vector<base::StringPiece> * tokens)802 size_t Tokenize(const base::StringPiece& str,
803                 const base::StringPiece& delimiters,
804                 std::vector<base::StringPiece>* tokens) {
805   return TokenizeT(str, delimiters, tokens);
806 }
807 
808 template<typename STR>
JoinStringT(const std::vector<STR> & parts,typename STR::value_type sep)809 static STR JoinStringT(const std::vector<STR>& parts,
810                        typename STR::value_type sep) {
811   if (parts.empty())
812     return STR();
813 
814   STR result(parts[0]);
815   typename std::vector<STR>::const_iterator iter = parts.begin();
816   ++iter;
817 
818   for (; iter != parts.end(); ++iter) {
819     result += sep;
820     result += *iter;
821   }
822 
823   return result;
824 }
825 
JoinString(const std::vector<std::string> & parts,char sep)826 std::string JoinString(const std::vector<std::string>& parts, char sep) {
827   return JoinStringT(parts, sep);
828 }
829 
JoinString(const std::vector<string16> & parts,char16 sep)830 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
831   return JoinStringT(parts, sep);
832 }
833 
834 template<class FormatStringType, class OutStringType>
DoReplaceStringPlaceholders(const FormatStringType & format_string,const std::vector<OutStringType> & subst,std::vector<size_t> * offsets)835 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
836     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
837   size_t substitutions = subst.size();
838   DCHECK(substitutions < 10);
839 
840   size_t sub_length = 0;
841   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
842        iter != subst.end(); ++iter) {
843     sub_length += iter->length();
844   }
845 
846   OutStringType formatted;
847   formatted.reserve(format_string.length() + sub_length);
848 
849   std::vector<ReplacementOffset> r_offsets;
850   for (typename FormatStringType::const_iterator i = format_string.begin();
851        i != format_string.end(); ++i) {
852     if ('$' == *i) {
853       if (i + 1 != format_string.end()) {
854         ++i;
855         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
856         if ('$' == *i) {
857           while (i != format_string.end() && '$' == *i) {
858             formatted.push_back('$');
859             ++i;
860           }
861           --i;
862         } else {
863           uintptr_t index = *i - '1';
864           if (offsets) {
865             ReplacementOffset r_offset(index,
866                 static_cast<int>(formatted.size()));
867             r_offsets.insert(std::lower_bound(r_offsets.begin(),
868                                               r_offsets.end(),
869                                               r_offset,
870                                               &CompareParameter),
871                              r_offset);
872           }
873           if (index < substitutions)
874             formatted.append(subst.at(index));
875         }
876       }
877     } else {
878       formatted.push_back(*i);
879     }
880   }
881   if (offsets) {
882     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
883          i != r_offsets.end(); ++i) {
884       offsets->push_back(i->offset);
885     }
886   }
887   return formatted;
888 }
889 
ReplaceStringPlaceholders(const string16 & format_string,const std::vector<string16> & subst,std::vector<size_t> * offsets)890 string16 ReplaceStringPlaceholders(const string16& format_string,
891                                    const std::vector<string16>& subst,
892                                    std::vector<size_t>* offsets) {
893   return DoReplaceStringPlaceholders(format_string, subst, offsets);
894 }
895 
ReplaceStringPlaceholders(const base::StringPiece & format_string,const std::vector<std::string> & subst,std::vector<size_t> * offsets)896 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
897                                       const std::vector<std::string>& subst,
898                                       std::vector<size_t>* offsets) {
899   return DoReplaceStringPlaceholders(format_string, subst, offsets);
900 }
901 
ReplaceStringPlaceholders(const string16 & format_string,const string16 & a,size_t * offset)902 string16 ReplaceStringPlaceholders(const string16& format_string,
903                                    const string16& a,
904                                    size_t* offset) {
905   std::vector<size_t> offsets;
906   std::vector<string16> subst;
907   subst.push_back(a);
908   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
909 
910   DCHECK(offsets.size() == 1);
911   if (offset) {
912     *offset = offsets[0];
913   }
914   return result;
915 }
916 
IsWildcard(base_icu::UChar32 character)917 static bool IsWildcard(base_icu::UChar32 character) {
918   return character == '*' || character == '?';
919 }
920 
921 // Move the strings pointers to the point where they start to differ.
922 template <typename CHAR, typename NEXT>
EatSameChars(const CHAR ** pattern,const CHAR * pattern_end,const CHAR ** string,const CHAR * string_end,NEXT next)923 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
924                          const CHAR** string, const CHAR* string_end,
925                          NEXT next) {
926   const CHAR* escape = NULL;
927   while (*pattern != pattern_end && *string != string_end) {
928     if (!escape && IsWildcard(**pattern)) {
929       // We don't want to match wildcard here, except if it's escaped.
930       return;
931     }
932 
933     // Check if the escapement char is found. If so, skip it and move to the
934     // next character.
935     if (!escape && **pattern == '\\') {
936       escape = *pattern;
937       next(pattern, pattern_end);
938       continue;
939     }
940 
941     // Check if the chars match, if so, increment the ptrs.
942     const CHAR* pattern_next = *pattern;
943     const CHAR* string_next = *string;
944     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
945     if (pattern_char == next(&string_next, string_end) &&
946         pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
947       *pattern = pattern_next;
948       *string = string_next;
949     } else {
950       // Uh ho, it did not match, we are done. If the last char was an
951       // escapement, that means that it was an error to advance the ptr here,
952       // let's put it back where it was. This also mean that the MatchPattern
953       // function will return false because if we can't match an escape char
954       // here, then no one will.
955       if (escape) {
956         *pattern = escape;
957       }
958       return;
959     }
960 
961     escape = NULL;
962   }
963 }
964 
965 template <typename CHAR, typename NEXT>
EatWildcard(const CHAR ** pattern,const CHAR * end,NEXT next)966 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
967   while (*pattern != end) {
968     if (!IsWildcard(**pattern))
969       return;
970     next(pattern, end);
971   }
972 }
973 
974 template <typename CHAR, typename NEXT>
MatchPatternT(const CHAR * eval,const CHAR * eval_end,const CHAR * pattern,const CHAR * pattern_end,int depth,NEXT next)975 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
976                           const CHAR* pattern, const CHAR* pattern_end,
977                           int depth,
978                           NEXT next) {
979   const int kMaxDepth = 16;
980   if (depth > kMaxDepth)
981     return false;
982 
983   // Eat all the matching chars.
984   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
985 
986   // If the string is empty, then the pattern must be empty too, or contains
987   // only wildcards.
988   if (eval == eval_end) {
989     EatWildcard(&pattern, pattern_end, next);
990     return pattern == pattern_end;
991   }
992 
993   // Pattern is empty but not string, this is not a match.
994   if (pattern == pattern_end)
995     return false;
996 
997   // If this is a question mark, then we need to compare the rest with
998   // the current string or the string with one character eaten.
999   const CHAR* next_pattern = pattern;
1000   next(&next_pattern, pattern_end);
1001   if (pattern[0] == '?') {
1002     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
1003                       depth + 1, next))
1004       return true;
1005     const CHAR* next_eval = eval;
1006     next(&next_eval, eval_end);
1007     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
1008                       depth + 1, next))
1009       return true;
1010   }
1011 
1012   // This is a *, try to match all the possible substrings with the remainder
1013   // of the pattern.
1014   if (pattern[0] == '*') {
1015     // Collapse duplicate wild cards (********** into *) so that the
1016     // method does not recurse unnecessarily. http://crbug.com/52839
1017     EatWildcard(&next_pattern, pattern_end, next);
1018 
1019     while (eval != eval_end) {
1020       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
1021                         depth + 1, next))
1022         return true;
1023       eval++;
1024     }
1025 
1026     // We reached the end of the string, let see if the pattern contains only
1027     // wildcards.
1028     if (eval == eval_end) {
1029       EatWildcard(&pattern, pattern_end, next);
1030       if (pattern != pattern_end)
1031         return false;
1032       return true;
1033     }
1034   }
1035 
1036   return false;
1037 }
1038 
1039 struct NextCharUTF8 {
operator ()NextCharUTF81040   base_icu::UChar32 operator()(const char** p, const char* end) {
1041     base_icu::UChar32 c;
1042     int offset = 0;
1043     CBU8_NEXT(*p, offset, end - *p, c);
1044     *p += offset;
1045     return c;
1046   }
1047 };
1048 
1049 struct NextCharUTF16 {
operator ()NextCharUTF161050   base_icu::UChar32 operator()(const char16** p, const char16* end) {
1051     base_icu::UChar32 c;
1052     int offset = 0;
1053     CBU16_NEXT(*p, offset, end - *p, c);
1054     *p += offset;
1055     return c;
1056   }
1057 };
1058 
MatchPattern(const base::StringPiece & eval,const base::StringPiece & pattern)1059 bool MatchPattern(const base::StringPiece& eval,
1060                   const base::StringPiece& pattern) {
1061   return MatchPatternT(eval.data(), eval.data() + eval.size(),
1062                        pattern.data(), pattern.data() + pattern.size(),
1063                        0, NextCharUTF8());
1064 }
1065 
MatchPattern(const string16 & eval,const string16 & pattern)1066 bool MatchPattern(const string16& eval, const string16& pattern) {
1067   return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
1068                        pattern.c_str(), pattern.c_str() + pattern.size(),
1069                        0, NextCharUTF16());
1070 }
1071 
1072 // The following code is compatible with the OpenBSD lcpy interface.  See:
1073 //   http://www.gratisoft.us/todd/papers/strlcpy.html
1074 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1075 
1076 namespace {
1077 
1078 template <typename CHAR>
lcpyT(CHAR * dst,const CHAR * src,size_t dst_size)1079 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1080   for (size_t i = 0; i < dst_size; ++i) {
1081     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
1082       return i;
1083   }
1084 
1085   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
1086   if (dst_size != 0)
1087     dst[dst_size - 1] = 0;
1088 
1089   // Count the rest of the |src|, and return it's length in characters.
1090   while (src[dst_size]) ++dst_size;
1091   return dst_size;
1092 }
1093 
1094 }  // namespace
1095 
strlcpy(char * dst,const char * src,size_t dst_size)1096 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
1097   return lcpyT<char>(dst, src, dst_size);
1098 }
wcslcpy(wchar_t * dst,const wchar_t * src,size_t dst_size)1099 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1100   return lcpyT<wchar_t>(dst, src, dst_size);
1101 }
1102