1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/string_util.h"
6
7 #include "build/build_config.h"
8
9 #include <ctype.h>
10 #include <errno.h>
11 #include <math.h>
12 #include <stdarg.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <time.h>
17 #include <wchar.h>
18 #include <wctype.h>
19
20 #include <algorithm>
21 #include <vector>
22
23 #include "base/basictypes.h"
24 #include "base/logging.h"
25 #include "base/memory/singleton.h"
26 #include "base/third_party/dmg_fp/dmg_fp.h"
27 #include "base/utf_string_conversion_utils.h"
28 #include "base/utf_string_conversions.h"
29 #include "base/third_party/icu/icu_utf.h"
30
31 namespace {
32
33 // Force the singleton used by Empty[W]String[16] to be a unique type. This
34 // prevents other code that might accidentally use Singleton<string> from
35 // getting our internal one.
36 struct EmptyStrings {
EmptyStrings__anon687084bc0111::EmptyStrings37 EmptyStrings() {}
38 const std::string s;
39 const std::wstring ws;
40 const string16 s16;
41
GetInstance__anon687084bc0111::EmptyStrings42 static EmptyStrings* GetInstance() {
43 return Singleton<EmptyStrings>::get();
44 }
45 };
46
47 // Used by ReplaceStringPlaceholders to track the position in the string of
48 // replaced parameters.
49 struct ReplacementOffset {
ReplacementOffset__anon687084bc0111::ReplacementOffset50 ReplacementOffset(uintptr_t parameter, size_t offset)
51 : parameter(parameter),
52 offset(offset) {}
53
54 // Index of the parameter.
55 uintptr_t parameter;
56
57 // Starting position in the string.
58 size_t offset;
59 };
60
CompareParameter(const ReplacementOffset & elem1,const ReplacementOffset & elem2)61 static bool CompareParameter(const ReplacementOffset& elem1,
62 const ReplacementOffset& elem2) {
63 return elem1.parameter < elem2.parameter;
64 }
65
66 } // namespace
67
68 namespace base {
69
IsWprintfFormatPortable(const wchar_t * format)70 bool IsWprintfFormatPortable(const wchar_t* format) {
71 for (const wchar_t* position = format; *position != '\0'; ++position) {
72 if (*position == '%') {
73 bool in_specification = true;
74 bool modifier_l = false;
75 while (in_specification) {
76 // Eat up characters until reaching a known specifier.
77 if (*++position == '\0') {
78 // The format string ended in the middle of a specification. Call
79 // it portable because no unportable specifications were found. The
80 // string is equally broken on all platforms.
81 return true;
82 }
83
84 if (*position == 'l') {
85 // 'l' is the only thing that can save the 's' and 'c' specifiers.
86 modifier_l = true;
87 } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
88 *position == 'S' || *position == 'C' || *position == 'F' ||
89 *position == 'D' || *position == 'O' || *position == 'U') {
90 // Not portable.
91 return false;
92 }
93
94 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
95 // Portable, keep scanning the rest of the format string.
96 in_specification = false;
97 }
98 }
99 }
100 }
101
102 return true;
103 }
104
105 } // namespace base
106
107
EmptyString()108 const std::string& EmptyString() {
109 return EmptyStrings::GetInstance()->s;
110 }
111
EmptyWString()112 const std::wstring& EmptyWString() {
113 return EmptyStrings::GetInstance()->ws;
114 }
115
EmptyString16()116 const string16& EmptyString16() {
117 return EmptyStrings::GetInstance()->s16;
118 }
119
120 #define WHITESPACE_UNICODE \
121 0x0009, /* <control-0009> to <control-000D> */ \
122 0x000A, \
123 0x000B, \
124 0x000C, \
125 0x000D, \
126 0x0020, /* Space */ \
127 0x0085, /* <control-0085> */ \
128 0x00A0, /* No-Break Space */ \
129 0x1680, /* Ogham Space Mark */ \
130 0x180E, /* Mongolian Vowel Separator */ \
131 0x2000, /* En Quad to Hair Space */ \
132 0x2001, \
133 0x2002, \
134 0x2003, \
135 0x2004, \
136 0x2005, \
137 0x2006, \
138 0x2007, \
139 0x2008, \
140 0x2009, \
141 0x200A, \
142 0x200C, /* Zero Width Non-Joiner */ \
143 0x2028, /* Line Separator */ \
144 0x2029, /* Paragraph Separator */ \
145 0x202F, /* Narrow No-Break Space */ \
146 0x205F, /* Medium Mathematical Space */ \
147 0x3000, /* Ideographic Space */ \
148 0
149
150 const wchar_t kWhitespaceWide[] = {
151 WHITESPACE_UNICODE
152 };
153 const char16 kWhitespaceUTF16[] = {
154 WHITESPACE_UNICODE
155 };
156 const char kWhitespaceASCII[] = {
157 0x09, // <control-0009> to <control-000D>
158 0x0A,
159 0x0B,
160 0x0C,
161 0x0D,
162 0x20, // Space
163 0
164 };
165
166 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
167
168 template<typename STR>
RemoveCharsT(const STR & input,const typename STR::value_type remove_chars[],STR * output)169 bool RemoveCharsT(const STR& input,
170 const typename STR::value_type remove_chars[],
171 STR* output) {
172 bool removed = false;
173 size_t found;
174
175 *output = input;
176
177 found = output->find_first_of(remove_chars);
178 while (found != STR::npos) {
179 removed = true;
180 output->replace(found, 1, STR());
181 found = output->find_first_of(remove_chars, found);
182 }
183
184 return removed;
185 }
186
RemoveChars(const std::wstring & input,const wchar_t remove_chars[],std::wstring * output)187 bool RemoveChars(const std::wstring& input,
188 const wchar_t remove_chars[],
189 std::wstring* output) {
190 return RemoveCharsT(input, remove_chars, output);
191 }
192
193 #if !defined(WCHAR_T_IS_UTF16)
RemoveChars(const string16 & input,const char16 remove_chars[],string16 * output)194 bool RemoveChars(const string16& input,
195 const char16 remove_chars[],
196 string16* output) {
197 return RemoveCharsT(input, remove_chars, output);
198 }
199 #endif
200
RemoveChars(const std::string & input,const char remove_chars[],std::string * output)201 bool RemoveChars(const std::string& input,
202 const char remove_chars[],
203 std::string* output) {
204 return RemoveCharsT(input, remove_chars, output);
205 }
206
207 template<typename STR>
TrimStringT(const STR & input,const typename STR::value_type trim_chars[],TrimPositions positions,STR * output)208 TrimPositions TrimStringT(const STR& input,
209 const typename STR::value_type trim_chars[],
210 TrimPositions positions,
211 STR* output) {
212 // Find the edges of leading/trailing whitespace as desired.
213 const typename STR::size_type last_char = input.length() - 1;
214 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
215 input.find_first_not_of(trim_chars) : 0;
216 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
217 input.find_last_not_of(trim_chars) : last_char;
218
219 // When the string was all whitespace, report that we stripped off whitespace
220 // from whichever position the caller was interested in. For empty input, we
221 // stripped no whitespace, but we still need to clear |output|.
222 if (input.empty() ||
223 (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
224 bool input_was_empty = input.empty(); // in case output == &input
225 output->clear();
226 return input_was_empty ? TRIM_NONE : positions;
227 }
228
229 // Trim the whitespace.
230 *output =
231 input.substr(first_good_char, last_good_char - first_good_char + 1);
232
233 // Return where we trimmed from.
234 return static_cast<TrimPositions>(
235 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
236 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
237 }
238
TrimString(const std::wstring & input,const wchar_t trim_chars[],std::wstring * output)239 bool TrimString(const std::wstring& input,
240 const wchar_t trim_chars[],
241 std::wstring* output) {
242 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
243 }
244
245 #if !defined(WCHAR_T_IS_UTF16)
TrimString(const string16 & input,const char16 trim_chars[],string16 * output)246 bool TrimString(const string16& input,
247 const char16 trim_chars[],
248 string16* output) {
249 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
250 }
251 #endif
252
TrimString(const std::string & input,const char trim_chars[],std::string * output)253 bool TrimString(const std::string& input,
254 const char trim_chars[],
255 std::string* output) {
256 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
257 }
258
TruncateUTF8ToByteSize(const std::string & input,const size_t byte_size,std::string * output)259 void TruncateUTF8ToByteSize(const std::string& input,
260 const size_t byte_size,
261 std::string* output) {
262 DCHECK(output);
263 if (byte_size > input.length()) {
264 *output = input;
265 return;
266 }
267 DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
268 // Note: This cast is necessary because CBU8_NEXT uses int32s.
269 int32 truncation_length = static_cast<int32>(byte_size);
270 int32 char_index = truncation_length - 1;
271 const char* data = input.data();
272
273 // Using CBU8, we will move backwards from the truncation point
274 // to the beginning of the string looking for a valid UTF8
275 // character. Once a full UTF8 character is found, we will
276 // truncate the string to the end of that character.
277 while (char_index >= 0) {
278 int32 prev = char_index;
279 uint32 code_point = 0;
280 CBU8_NEXT(data, char_index, truncation_length, code_point);
281 if (!base::IsValidCharacter(code_point) ||
282 !base::IsValidCodepoint(code_point)) {
283 char_index = prev - 1;
284 } else {
285 break;
286 }
287 }
288
289 if (char_index >= 0 )
290 *output = input.substr(0, char_index);
291 else
292 output->clear();
293 }
294
TrimWhitespace(const std::wstring & input,TrimPositions positions,std::wstring * output)295 TrimPositions TrimWhitespace(const std::wstring& input,
296 TrimPositions positions,
297 std::wstring* output) {
298 return TrimStringT(input, kWhitespaceWide, positions, output);
299 }
300
301 #if !defined(WCHAR_T_IS_UTF16)
TrimWhitespace(const string16 & input,TrimPositions positions,string16 * output)302 TrimPositions TrimWhitespace(const string16& input,
303 TrimPositions positions,
304 string16* output) {
305 return TrimStringT(input, kWhitespaceUTF16, positions, output);
306 }
307 #endif
308
TrimWhitespaceASCII(const std::string & input,TrimPositions positions,std::string * output)309 TrimPositions TrimWhitespaceASCII(const std::string& input,
310 TrimPositions positions,
311 std::string* output) {
312 return TrimStringT(input, kWhitespaceASCII, positions, output);
313 }
314
315 // This function is only for backward-compatibility.
316 // To be removed when all callers are updated.
TrimWhitespace(const std::string & input,TrimPositions positions,std::string * output)317 TrimPositions TrimWhitespace(const std::string& input,
318 TrimPositions positions,
319 std::string* output) {
320 return TrimWhitespaceASCII(input, positions, output);
321 }
322
323 template<typename STR>
CollapseWhitespaceT(const STR & text,bool trim_sequences_with_line_breaks)324 STR CollapseWhitespaceT(const STR& text,
325 bool trim_sequences_with_line_breaks) {
326 STR result;
327 result.resize(text.size());
328
329 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
330 // will trim any leading whitespace.
331 bool in_whitespace = true;
332 bool already_trimmed = true;
333
334 int chars_written = 0;
335 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
336 if (IsWhitespace(*i)) {
337 if (!in_whitespace) {
338 // Reduce all whitespace sequences to a single space.
339 in_whitespace = true;
340 result[chars_written++] = L' ';
341 }
342 if (trim_sequences_with_line_breaks && !already_trimmed &&
343 ((*i == '\n') || (*i == '\r'))) {
344 // Whitespace sequences containing CR or LF are eliminated entirely.
345 already_trimmed = true;
346 --chars_written;
347 }
348 } else {
349 // Non-whitespace chracters are copied straight across.
350 in_whitespace = false;
351 already_trimmed = false;
352 result[chars_written++] = *i;
353 }
354 }
355
356 if (in_whitespace && !already_trimmed) {
357 // Any trailing whitespace is eliminated.
358 --chars_written;
359 }
360
361 result.resize(chars_written);
362 return result;
363 }
364
CollapseWhitespace(const std::wstring & text,bool trim_sequences_with_line_breaks)365 std::wstring CollapseWhitespace(const std::wstring& text,
366 bool trim_sequences_with_line_breaks) {
367 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
368 }
369
370 #if !defined(WCHAR_T_IS_UTF16)
CollapseWhitespace(const string16 & text,bool trim_sequences_with_line_breaks)371 string16 CollapseWhitespace(const string16& text,
372 bool trim_sequences_with_line_breaks) {
373 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
374 }
375 #endif
376
CollapseWhitespaceASCII(const std::string & text,bool trim_sequences_with_line_breaks)377 std::string CollapseWhitespaceASCII(const std::string& text,
378 bool trim_sequences_with_line_breaks) {
379 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
380 }
381
ContainsOnlyWhitespaceASCII(const std::string & str)382 bool ContainsOnlyWhitespaceASCII(const std::string& str) {
383 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
384 if (!IsAsciiWhitespace(*i))
385 return false;
386 }
387 return true;
388 }
389
ContainsOnlyWhitespace(const string16 & str)390 bool ContainsOnlyWhitespace(const string16& str) {
391 for (string16::const_iterator i(str.begin()); i != str.end(); ++i) {
392 if (!IsWhitespace(*i))
393 return false;
394 }
395 return true;
396 }
397
398 template<typename STR>
ContainsOnlyCharsT(const STR & input,const STR & characters)399 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
400 for (typename STR::const_iterator iter = input.begin();
401 iter != input.end(); ++iter) {
402 if (characters.find(*iter) == STR::npos)
403 return false;
404 }
405 return true;
406 }
407
ContainsOnlyChars(const std::wstring & input,const std::wstring & characters)408 bool ContainsOnlyChars(const std::wstring& input,
409 const std::wstring& characters) {
410 return ContainsOnlyCharsT(input, characters);
411 }
412
413 #if !defined(WCHAR_T_IS_UTF16)
ContainsOnlyChars(const string16 & input,const string16 & characters)414 bool ContainsOnlyChars(const string16& input, const string16& characters) {
415 return ContainsOnlyCharsT(input, characters);
416 }
417 #endif
418
ContainsOnlyChars(const std::string & input,const std::string & characters)419 bool ContainsOnlyChars(const std::string& input,
420 const std::string& characters) {
421 return ContainsOnlyCharsT(input, characters);
422 }
423
WideToASCII(const std::wstring & wide)424 std::string WideToASCII(const std::wstring& wide) {
425 DCHECK(IsStringASCII(wide)) << wide;
426 return std::string(wide.begin(), wide.end());
427 }
428
UTF16ToASCII(const string16 & utf16)429 std::string UTF16ToASCII(const string16& utf16) {
430 DCHECK(IsStringASCII(utf16)) << utf16;
431 return std::string(utf16.begin(), utf16.end());
432 }
433
434 // Latin1 is just the low range of Unicode, so we can copy directly to convert.
WideToLatin1(const std::wstring & wide,std::string * latin1)435 bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
436 std::string output;
437 output.resize(wide.size());
438 latin1->clear();
439 for (size_t i = 0; i < wide.size(); i++) {
440 if (wide[i] > 255)
441 return false;
442 output[i] = static_cast<char>(wide[i]);
443 }
444 latin1->swap(output);
445 return true;
446 }
447
448 template<class STR>
DoIsStringASCII(const STR & str)449 static bool DoIsStringASCII(const STR& str) {
450 for (size_t i = 0; i < str.length(); i++) {
451 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
452 if (c > 0x7F)
453 return false;
454 }
455 return true;
456 }
457
IsStringASCII(const std::wstring & str)458 bool IsStringASCII(const std::wstring& str) {
459 return DoIsStringASCII(str);
460 }
461
462 #if !defined(WCHAR_T_IS_UTF16)
IsStringASCII(const string16 & str)463 bool IsStringASCII(const string16& str) {
464 return DoIsStringASCII(str);
465 }
466 #endif
467
IsStringASCII(const base::StringPiece & str)468 bool IsStringASCII(const base::StringPiece& str) {
469 return DoIsStringASCII(str);
470 }
471
IsStringUTF8(const std::string & str)472 bool IsStringUTF8(const std::string& str) {
473 const char *src = str.data();
474 int32 src_len = static_cast<int32>(str.length());
475 int32 char_index = 0;
476
477 while (char_index < src_len) {
478 int32 code_point;
479 CBU8_NEXT(src, char_index, src_len, code_point);
480 if (!base::IsValidCharacter(code_point))
481 return false;
482 }
483 return true;
484 }
485
486 template<typename Iter>
DoLowerCaseEqualsASCII(Iter a_begin,Iter a_end,const char * b)487 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
488 Iter a_end,
489 const char* b) {
490 for (Iter it = a_begin; it != a_end; ++it, ++b) {
491 if (!*b || base::ToLowerASCII(*it) != *b)
492 return false;
493 }
494 return *b == 0;
495 }
496
497 // Front-ends for LowerCaseEqualsASCII.
LowerCaseEqualsASCII(const std::string & a,const char * b)498 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
499 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
500 }
501
LowerCaseEqualsASCII(const std::wstring & a,const char * b)502 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
503 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
504 }
505
506 #if !defined(WCHAR_T_IS_UTF16)
LowerCaseEqualsASCII(const string16 & a,const char * b)507 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
508 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
509 }
510 #endif
511
LowerCaseEqualsASCII(std::string::const_iterator a_begin,std::string::const_iterator a_end,const char * b)512 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
513 std::string::const_iterator a_end,
514 const char* b) {
515 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
516 }
517
LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,std::wstring::const_iterator a_end,const char * b)518 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
519 std::wstring::const_iterator a_end,
520 const char* b) {
521 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
522 }
523
524 #if !defined(WCHAR_T_IS_UTF16)
LowerCaseEqualsASCII(string16::const_iterator a_begin,string16::const_iterator a_end,const char * b)525 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
526 string16::const_iterator a_end,
527 const char* b) {
528 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
529 }
530 #endif
531
532 #if !defined(ANDROID)
LowerCaseEqualsASCII(const char * a_begin,const char * a_end,const char * b)533 bool LowerCaseEqualsASCII(const char* a_begin,
534 const char* a_end,
535 const char* b) {
536 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
537 }
538 #endif // !ANDROID
539
540 #if !defined(ANDROID)
LowerCaseEqualsASCII(const wchar_t * a_begin,const wchar_t * a_end,const char * b)541 bool LowerCaseEqualsASCII(const wchar_t* a_begin,
542 const wchar_t* a_end,
543 const char* b) {
544 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
545 }
546 #endif // !ANDROID
547
548 #if !defined(WCHAR_T_IS_UTF16) && !defined(ANDROID)
LowerCaseEqualsASCII(const char16 * a_begin,const char16 * a_end,const char * b)549 bool LowerCaseEqualsASCII(const char16* a_begin,
550 const char16* a_end,
551 const char* b) {
552 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
553 }
554 #endif
555
EqualsASCII(const string16 & a,const base::StringPiece & b)556 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
557 if (a.length() != b.length())
558 return false;
559 return std::equal(b.begin(), b.end(), a.begin());
560 }
561
StartsWithASCII(const std::string & str,const std::string & search,bool case_sensitive)562 bool StartsWithASCII(const std::string& str,
563 const std::string& search,
564 bool case_sensitive) {
565 if (case_sensitive)
566 return str.compare(0, search.length(), search) == 0;
567 else
568 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
569 }
570
571 template <typename STR>
StartsWithT(const STR & str,const STR & search,bool case_sensitive)572 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
573 if (case_sensitive) {
574 return str.compare(0, search.length(), search) == 0;
575 } else {
576 if (search.size() > str.size())
577 return false;
578 return std::equal(search.begin(), search.end(), str.begin(),
579 base::CaseInsensitiveCompare<typename STR::value_type>());
580 }
581 }
582
StartsWith(const std::wstring & str,const std::wstring & search,bool case_sensitive)583 bool StartsWith(const std::wstring& str, const std::wstring& search,
584 bool case_sensitive) {
585 return StartsWithT(str, search, case_sensitive);
586 }
587
588 #if !defined(WCHAR_T_IS_UTF16)
StartsWith(const string16 & str,const string16 & search,bool case_sensitive)589 bool StartsWith(const string16& str, const string16& search,
590 bool case_sensitive) {
591 return StartsWithT(str, search, case_sensitive);
592 }
593 #endif
594
595 template <typename STR>
EndsWithT(const STR & str,const STR & search,bool case_sensitive)596 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
597 typename STR::size_type str_length = str.length();
598 typename STR::size_type search_length = search.length();
599 if (search_length > str_length)
600 return false;
601 if (case_sensitive) {
602 return str.compare(str_length - search_length, search_length, search) == 0;
603 } else {
604 return std::equal(search.begin(), search.end(),
605 str.begin() + (str_length - search_length),
606 base::CaseInsensitiveCompare<typename STR::value_type>());
607 }
608 }
609
EndsWith(const std::string & str,const std::string & search,bool case_sensitive)610 bool EndsWith(const std::string& str, const std::string& search,
611 bool case_sensitive) {
612 return EndsWithT(str, search, case_sensitive);
613 }
614
EndsWith(const std::wstring & str,const std::wstring & search,bool case_sensitive)615 bool EndsWith(const std::wstring& str, const std::wstring& search,
616 bool case_sensitive) {
617 return EndsWithT(str, search, case_sensitive);
618 }
619
620 #if !defined(WCHAR_T_IS_UTF16)
EndsWith(const string16 & str,const string16 & search,bool case_sensitive)621 bool EndsWith(const string16& str, const string16& search,
622 bool case_sensitive) {
623 return EndsWithT(str, search, case_sensitive);
624 }
625 #endif
626
GetByteDisplayUnits(int64 bytes)627 DataUnits GetByteDisplayUnits(int64 bytes) {
628 // The byte thresholds at which we display amounts. A byte count is displayed
629 // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
630 // This must match the DataUnits enum.
631 static const int64 kUnitThresholds[] = {
632 0, // DATA_UNITS_BYTE,
633 3*1024, // DATA_UNITS_KIBIBYTE,
634 2*1024*1024, // DATA_UNITS_MEBIBYTE,
635 1024*1024*1024 // DATA_UNITS_GIBIBYTE,
636 };
637
638 if (bytes < 0) {
639 NOTREACHED() << "Negative bytes value";
640 return DATA_UNITS_BYTE;
641 }
642
643 int unit_index = arraysize(kUnitThresholds);
644 while (--unit_index > 0) {
645 if (bytes >= kUnitThresholds[unit_index])
646 break;
647 }
648
649 DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIBIBYTE);
650 return DataUnits(unit_index);
651 }
652
653 // TODO(mpcomplete): deal with locale
654 // Byte suffixes. This must match the DataUnits enum.
655 static const char* const kByteStrings[] = {
656 "B",
657 "kB",
658 "MB",
659 "GB"
660 };
661
662 static const char* const kSpeedStrings[] = {
663 "B/s",
664 "kB/s",
665 "MB/s",
666 "GB/s"
667 };
668
FormatBytesInternal(int64 bytes,DataUnits units,bool show_units,const char * const * suffix)669 string16 FormatBytesInternal(int64 bytes,
670 DataUnits units,
671 bool show_units,
672 const char* const* suffix) {
673 if (bytes < 0) {
674 NOTREACHED() << "Negative bytes value";
675 return string16();
676 }
677
678 DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIBIBYTE);
679
680 // Put the quantity in the right units.
681 double unit_amount = static_cast<double>(bytes);
682 for (int i = 0; i < units; ++i)
683 unit_amount /= 1024.0;
684
685 char buf[64];
686 if (bytes != 0 && units != DATA_UNITS_BYTE && unit_amount < 100)
687 base::snprintf(buf, arraysize(buf), "%.1lf", unit_amount);
688 else
689 base::snprintf(buf, arraysize(buf), "%.0lf", unit_amount);
690
691 std::string ret(buf);
692 if (show_units) {
693 ret += " ";
694 ret += suffix[units];
695 }
696
697 return ASCIIToUTF16(ret);
698 }
699
FormatBytes(int64 bytes,DataUnits units,bool show_units)700 string16 FormatBytes(int64 bytes, DataUnits units, bool show_units) {
701 return FormatBytesInternal(bytes, units, show_units, kByteStrings);
702 }
703
FormatSpeed(int64 bytes,DataUnits units,bool show_units)704 string16 FormatSpeed(int64 bytes, DataUnits units, bool show_units) {
705 return FormatBytesInternal(bytes, units, show_units, kSpeedStrings);
706 }
707
708 template<class StringType>
DoReplaceSubstringsAfterOffset(StringType * str,typename StringType::size_type start_offset,const StringType & find_this,const StringType & replace_with,bool replace_all)709 void DoReplaceSubstringsAfterOffset(StringType* str,
710 typename StringType::size_type start_offset,
711 const StringType& find_this,
712 const StringType& replace_with,
713 bool replace_all) {
714 if ((start_offset == StringType::npos) || (start_offset >= str->length()))
715 return;
716
717 DCHECK(!find_this.empty());
718 for (typename StringType::size_type offs(str->find(find_this, start_offset));
719 offs != StringType::npos; offs = str->find(find_this, offs)) {
720 str->replace(offs, find_this.length(), replace_with);
721 offs += replace_with.length();
722
723 if (!replace_all)
724 break;
725 }
726 }
727
ReplaceFirstSubstringAfterOffset(string16 * str,string16::size_type start_offset,const string16 & find_this,const string16 & replace_with)728 void ReplaceFirstSubstringAfterOffset(string16* str,
729 string16::size_type start_offset,
730 const string16& find_this,
731 const string16& replace_with) {
732 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
733 false); // replace first instance
734 }
735
ReplaceFirstSubstringAfterOffset(std::string * str,std::string::size_type start_offset,const std::string & find_this,const std::string & replace_with)736 void ReplaceFirstSubstringAfterOffset(std::string* str,
737 std::string::size_type start_offset,
738 const std::string& find_this,
739 const std::string& replace_with) {
740 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
741 false); // replace first instance
742 }
743
ReplaceSubstringsAfterOffset(string16 * str,string16::size_type start_offset,const string16 & find_this,const string16 & replace_with)744 void ReplaceSubstringsAfterOffset(string16* str,
745 string16::size_type start_offset,
746 const string16& find_this,
747 const string16& replace_with) {
748 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
749 true); // replace all instances
750 }
751
ReplaceSubstringsAfterOffset(std::string * str,std::string::size_type start_offset,const std::string & find_this,const std::string & replace_with)752 void ReplaceSubstringsAfterOffset(std::string* str,
753 std::string::size_type start_offset,
754 const std::string& find_this,
755 const std::string& replace_with) {
756 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
757 true); // replace all instances
758 }
759
760
761 template<typename STR>
TokenizeT(const STR & str,const STR & delimiters,std::vector<STR> * tokens)762 static size_t TokenizeT(const STR& str,
763 const STR& delimiters,
764 std::vector<STR>* tokens) {
765 tokens->clear();
766
767 typename STR::size_type start = str.find_first_not_of(delimiters);
768 while (start != STR::npos) {
769 typename STR::size_type end = str.find_first_of(delimiters, start + 1);
770 if (end == STR::npos) {
771 tokens->push_back(str.substr(start));
772 break;
773 } else {
774 tokens->push_back(str.substr(start, end - start));
775 start = str.find_first_not_of(delimiters, end + 1);
776 }
777 }
778
779 return tokens->size();
780 }
781
Tokenize(const std::wstring & str,const std::wstring & delimiters,std::vector<std::wstring> * tokens)782 size_t Tokenize(const std::wstring& str,
783 const std::wstring& delimiters,
784 std::vector<std::wstring>* tokens) {
785 return TokenizeT(str, delimiters, tokens);
786 }
787
788 #if !defined(WCHAR_T_IS_UTF16)
Tokenize(const string16 & str,const string16 & delimiters,std::vector<string16> * tokens)789 size_t Tokenize(const string16& str,
790 const string16& delimiters,
791 std::vector<string16>* tokens) {
792 return TokenizeT(str, delimiters, tokens);
793 }
794 #endif
795
Tokenize(const std::string & str,const std::string & delimiters,std::vector<std::string> * tokens)796 size_t Tokenize(const std::string& str,
797 const std::string& delimiters,
798 std::vector<std::string>* tokens) {
799 return TokenizeT(str, delimiters, tokens);
800 }
801
Tokenize(const base::StringPiece & str,const base::StringPiece & delimiters,std::vector<base::StringPiece> * tokens)802 size_t Tokenize(const base::StringPiece& str,
803 const base::StringPiece& delimiters,
804 std::vector<base::StringPiece>* tokens) {
805 return TokenizeT(str, delimiters, tokens);
806 }
807
808 template<typename STR>
JoinStringT(const std::vector<STR> & parts,typename STR::value_type sep)809 static STR JoinStringT(const std::vector<STR>& parts,
810 typename STR::value_type sep) {
811 if (parts.empty())
812 return STR();
813
814 STR result(parts[0]);
815 typename std::vector<STR>::const_iterator iter = parts.begin();
816 ++iter;
817
818 for (; iter != parts.end(); ++iter) {
819 result += sep;
820 result += *iter;
821 }
822
823 return result;
824 }
825
JoinString(const std::vector<std::string> & parts,char sep)826 std::string JoinString(const std::vector<std::string>& parts, char sep) {
827 return JoinStringT(parts, sep);
828 }
829
JoinString(const std::vector<string16> & parts,char16 sep)830 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
831 return JoinStringT(parts, sep);
832 }
833
834 template<class FormatStringType, class OutStringType>
DoReplaceStringPlaceholders(const FormatStringType & format_string,const std::vector<OutStringType> & subst,std::vector<size_t> * offsets)835 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
836 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
837 size_t substitutions = subst.size();
838 DCHECK(substitutions < 10);
839
840 size_t sub_length = 0;
841 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
842 iter != subst.end(); ++iter) {
843 sub_length += iter->length();
844 }
845
846 OutStringType formatted;
847 formatted.reserve(format_string.length() + sub_length);
848
849 std::vector<ReplacementOffset> r_offsets;
850 for (typename FormatStringType::const_iterator i = format_string.begin();
851 i != format_string.end(); ++i) {
852 if ('$' == *i) {
853 if (i + 1 != format_string.end()) {
854 ++i;
855 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
856 if ('$' == *i) {
857 while (i != format_string.end() && '$' == *i) {
858 formatted.push_back('$');
859 ++i;
860 }
861 --i;
862 } else {
863 uintptr_t index = *i - '1';
864 if (offsets) {
865 ReplacementOffset r_offset(index,
866 static_cast<int>(formatted.size()));
867 r_offsets.insert(std::lower_bound(r_offsets.begin(),
868 r_offsets.end(),
869 r_offset,
870 &CompareParameter),
871 r_offset);
872 }
873 if (index < substitutions)
874 formatted.append(subst.at(index));
875 }
876 }
877 } else {
878 formatted.push_back(*i);
879 }
880 }
881 if (offsets) {
882 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
883 i != r_offsets.end(); ++i) {
884 offsets->push_back(i->offset);
885 }
886 }
887 return formatted;
888 }
889
ReplaceStringPlaceholders(const string16 & format_string,const std::vector<string16> & subst,std::vector<size_t> * offsets)890 string16 ReplaceStringPlaceholders(const string16& format_string,
891 const std::vector<string16>& subst,
892 std::vector<size_t>* offsets) {
893 return DoReplaceStringPlaceholders(format_string, subst, offsets);
894 }
895
ReplaceStringPlaceholders(const base::StringPiece & format_string,const std::vector<std::string> & subst,std::vector<size_t> * offsets)896 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
897 const std::vector<std::string>& subst,
898 std::vector<size_t>* offsets) {
899 return DoReplaceStringPlaceholders(format_string, subst, offsets);
900 }
901
ReplaceStringPlaceholders(const string16 & format_string,const string16 & a,size_t * offset)902 string16 ReplaceStringPlaceholders(const string16& format_string,
903 const string16& a,
904 size_t* offset) {
905 std::vector<size_t> offsets;
906 std::vector<string16> subst;
907 subst.push_back(a);
908 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
909
910 DCHECK(offsets.size() == 1);
911 if (offset) {
912 *offset = offsets[0];
913 }
914 return result;
915 }
916
IsWildcard(base_icu::UChar32 character)917 static bool IsWildcard(base_icu::UChar32 character) {
918 return character == '*' || character == '?';
919 }
920
921 // Move the strings pointers to the point where they start to differ.
922 template <typename CHAR, typename NEXT>
EatSameChars(const CHAR ** pattern,const CHAR * pattern_end,const CHAR ** string,const CHAR * string_end,NEXT next)923 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
924 const CHAR** string, const CHAR* string_end,
925 NEXT next) {
926 const CHAR* escape = NULL;
927 while (*pattern != pattern_end && *string != string_end) {
928 if (!escape && IsWildcard(**pattern)) {
929 // We don't want to match wildcard here, except if it's escaped.
930 return;
931 }
932
933 // Check if the escapement char is found. If so, skip it and move to the
934 // next character.
935 if (!escape && **pattern == '\\') {
936 escape = *pattern;
937 next(pattern, pattern_end);
938 continue;
939 }
940
941 // Check if the chars match, if so, increment the ptrs.
942 const CHAR* pattern_next = *pattern;
943 const CHAR* string_next = *string;
944 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
945 if (pattern_char == next(&string_next, string_end) &&
946 pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
947 *pattern = pattern_next;
948 *string = string_next;
949 } else {
950 // Uh ho, it did not match, we are done. If the last char was an
951 // escapement, that means that it was an error to advance the ptr here,
952 // let's put it back where it was. This also mean that the MatchPattern
953 // function will return false because if we can't match an escape char
954 // here, then no one will.
955 if (escape) {
956 *pattern = escape;
957 }
958 return;
959 }
960
961 escape = NULL;
962 }
963 }
964
965 template <typename CHAR, typename NEXT>
EatWildcard(const CHAR ** pattern,const CHAR * end,NEXT next)966 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
967 while (*pattern != end) {
968 if (!IsWildcard(**pattern))
969 return;
970 next(pattern, end);
971 }
972 }
973
974 template <typename CHAR, typename NEXT>
MatchPatternT(const CHAR * eval,const CHAR * eval_end,const CHAR * pattern,const CHAR * pattern_end,int depth,NEXT next)975 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
976 const CHAR* pattern, const CHAR* pattern_end,
977 int depth,
978 NEXT next) {
979 const int kMaxDepth = 16;
980 if (depth > kMaxDepth)
981 return false;
982
983 // Eat all the matching chars.
984 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
985
986 // If the string is empty, then the pattern must be empty too, or contains
987 // only wildcards.
988 if (eval == eval_end) {
989 EatWildcard(&pattern, pattern_end, next);
990 return pattern == pattern_end;
991 }
992
993 // Pattern is empty but not string, this is not a match.
994 if (pattern == pattern_end)
995 return false;
996
997 // If this is a question mark, then we need to compare the rest with
998 // the current string or the string with one character eaten.
999 const CHAR* next_pattern = pattern;
1000 next(&next_pattern, pattern_end);
1001 if (pattern[0] == '?') {
1002 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
1003 depth + 1, next))
1004 return true;
1005 const CHAR* next_eval = eval;
1006 next(&next_eval, eval_end);
1007 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
1008 depth + 1, next))
1009 return true;
1010 }
1011
1012 // This is a *, try to match all the possible substrings with the remainder
1013 // of the pattern.
1014 if (pattern[0] == '*') {
1015 // Collapse duplicate wild cards (********** into *) so that the
1016 // method does not recurse unnecessarily. http://crbug.com/52839
1017 EatWildcard(&next_pattern, pattern_end, next);
1018
1019 while (eval != eval_end) {
1020 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
1021 depth + 1, next))
1022 return true;
1023 eval++;
1024 }
1025
1026 // We reached the end of the string, let see if the pattern contains only
1027 // wildcards.
1028 if (eval == eval_end) {
1029 EatWildcard(&pattern, pattern_end, next);
1030 if (pattern != pattern_end)
1031 return false;
1032 return true;
1033 }
1034 }
1035
1036 return false;
1037 }
1038
1039 struct NextCharUTF8 {
operator ()NextCharUTF81040 base_icu::UChar32 operator()(const char** p, const char* end) {
1041 base_icu::UChar32 c;
1042 int offset = 0;
1043 CBU8_NEXT(*p, offset, end - *p, c);
1044 *p += offset;
1045 return c;
1046 }
1047 };
1048
1049 struct NextCharUTF16 {
operator ()NextCharUTF161050 base_icu::UChar32 operator()(const char16** p, const char16* end) {
1051 base_icu::UChar32 c;
1052 int offset = 0;
1053 CBU16_NEXT(*p, offset, end - *p, c);
1054 *p += offset;
1055 return c;
1056 }
1057 };
1058
MatchPattern(const base::StringPiece & eval,const base::StringPiece & pattern)1059 bool MatchPattern(const base::StringPiece& eval,
1060 const base::StringPiece& pattern) {
1061 return MatchPatternT(eval.data(), eval.data() + eval.size(),
1062 pattern.data(), pattern.data() + pattern.size(),
1063 0, NextCharUTF8());
1064 }
1065
MatchPattern(const string16 & eval,const string16 & pattern)1066 bool MatchPattern(const string16& eval, const string16& pattern) {
1067 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
1068 pattern.c_str(), pattern.c_str() + pattern.size(),
1069 0, NextCharUTF16());
1070 }
1071
1072 // The following code is compatible with the OpenBSD lcpy interface. See:
1073 // http://www.gratisoft.us/todd/papers/strlcpy.html
1074 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1075
1076 namespace {
1077
1078 template <typename CHAR>
lcpyT(CHAR * dst,const CHAR * src,size_t dst_size)1079 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1080 for (size_t i = 0; i < dst_size; ++i) {
1081 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
1082 return i;
1083 }
1084
1085 // We were left off at dst_size. We over copied 1 byte. Null terminate.
1086 if (dst_size != 0)
1087 dst[dst_size - 1] = 0;
1088
1089 // Count the rest of the |src|, and return it's length in characters.
1090 while (src[dst_size]) ++dst_size;
1091 return dst_size;
1092 }
1093
1094 } // namespace
1095
strlcpy(char * dst,const char * src,size_t dst_size)1096 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
1097 return lcpyT<char>(dst, src, dst_size);
1098 }
wcslcpy(wchar_t * dst,const wchar_t * src,size_t dst_size)1099 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1100 return lcpyT<wchar_t>(dst, src, dst_size);
1101 }
1102