1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/string_util.h"
6
7 #include <math.h>
8 #include <stdarg.h>
9 #include <stddef.h>
10 #include <stdint.h>
11
12 #include <algorithm>
13
14 #include "base/macros.h"
15 #include "base/strings/string16.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "testing/gmock/include/gmock/gmock.h"
18 #include "testing/gtest/include/gtest/gtest.h"
19
20 using ::testing::ElementsAre;
21
22 namespace base {
23
24 static const struct trim_case {
25 const wchar_t* input;
26 const TrimPositions positions;
27 const wchar_t* output;
28 const TrimPositions return_value;
29 } trim_cases[] = {
30 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
31 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
32 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
33 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
34 {L"", TRIM_ALL, L"", TRIM_NONE},
35 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
36 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
37 {L" ", TRIM_ALL, L"", TRIM_ALL},
38 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
39 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
40 };
41
42 static const struct trim_case_ascii {
43 const char* input;
44 const TrimPositions positions;
45 const char* output;
46 const TrimPositions return_value;
47 } trim_cases_ascii[] = {
48 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
49 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
50 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
51 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
52 {"", TRIM_ALL, "", TRIM_NONE},
53 {" ", TRIM_LEADING, "", TRIM_LEADING},
54 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
55 {" ", TRIM_ALL, "", TRIM_ALL},
56 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
57 };
58
59 namespace {
60
61 // Helper used to test TruncateUTF8ToByteSize.
Truncated(const std::string & input,const size_t byte_size,std::string * output)62 bool Truncated(const std::string& input,
63 const size_t byte_size,
64 std::string* output) {
65 size_t prev = input.length();
66 TruncateUTF8ToByteSize(input, byte_size, output);
67 return prev != output->length();
68 }
69
70 } // namespace
71
TEST(StringUtilTest,TruncateUTF8ToByteSize)72 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
73 std::string output;
74
75 // Empty strings and invalid byte_size arguments
76 EXPECT_FALSE(Truncated(std::string(), 0, &output));
77 EXPECT_EQ(output, "");
78 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
79 EXPECT_EQ(output, "");
80 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
81 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
82
83 // Testing the truncation of valid UTF8 correctly
84 EXPECT_TRUE(Truncated("abc", 2, &output));
85 EXPECT_EQ(output, "ab");
86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
87 EXPECT_EQ(output.compare("\xc2\x81"), 0);
88 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
89 EXPECT_EQ(output.compare("\xc2\x81"), 0);
90 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
91 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
92
93 {
94 const char array[] = "\x00\x00\xc2\x81\xc2\x81";
95 const std::string array_string(array, arraysize(array));
96 EXPECT_TRUE(Truncated(array_string, 4, &output));
97 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
98 }
99
100 {
101 const char array[] = "\x00\xc2\x81\xc2\x81";
102 const std::string array_string(array, arraysize(array));
103 EXPECT_TRUE(Truncated(array_string, 4, &output));
104 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
105 }
106
107 // Testing invalid UTF8
108 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
109 EXPECT_EQ(output.compare(""), 0);
110 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
111 EXPECT_EQ(output.compare(""), 0);
112 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
113 EXPECT_EQ(output.compare(""), 0);
114
115 // Testing invalid UTF8 mixed with valid UTF8
116 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
117 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
118 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
119 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
120 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
121 10, &output));
122 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
123 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
124 10, &output));
125 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
126 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
127 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
128
129 // Overlong sequences
130 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
131 EXPECT_EQ(output.compare(""), 0);
132 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
133 EXPECT_EQ(output.compare(""), 0);
134 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
135 EXPECT_EQ(output.compare(""), 0);
136 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
137 EXPECT_EQ(output.compare(""), 0);
138 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
139 EXPECT_EQ(output.compare(""), 0);
140 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
141 EXPECT_EQ(output.compare(""), 0);
142 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
143 EXPECT_EQ(output.compare(""), 0);
144 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
145 EXPECT_EQ(output.compare(""), 0);
146 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
147 EXPECT_EQ(output.compare(""), 0);
148 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
149 EXPECT_EQ(output.compare(""), 0);
150 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
151 EXPECT_EQ(output.compare(""), 0);
152
153 // Beyond U+10FFFF (the upper limit of Unicode codespace)
154 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
155 EXPECT_EQ(output.compare(""), 0);
156 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
157 EXPECT_EQ(output.compare(""), 0);
158 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
159 EXPECT_EQ(output.compare(""), 0);
160
161 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
162 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
163 EXPECT_EQ(output.compare(""), 0);
164 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
165 EXPECT_EQ(output.compare(""), 0);
166
167 {
168 const char array[] = "\x00\x00\xfe\xff";
169 const std::string array_string(array, arraysize(array));
170 EXPECT_TRUE(Truncated(array_string, 4, &output));
171 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
172 }
173
174 // Variants on the previous test
175 {
176 const char array[] = "\xff\xfe\x00\x00";
177 const std::string array_string(array, 4);
178 EXPECT_FALSE(Truncated(array_string, 4, &output));
179 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
180 }
181 {
182 const char array[] = "\xff\x00\x00\xfe";
183 const std::string array_string(array, arraysize(array));
184 EXPECT_TRUE(Truncated(array_string, 4, &output));
185 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
186 }
187
188 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
189 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
190 EXPECT_EQ(output.compare(""), 0);
191 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
192 EXPECT_EQ(output.compare(""), 0);
193 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
194 EXPECT_EQ(output.compare(""), 0);
195 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
196 EXPECT_EQ(output.compare(""), 0);
197 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
198 EXPECT_EQ(output.compare(""), 0);
199
200 // Strings in legacy encodings that are valid in UTF-8, but
201 // are invalid as UTF-8 in real data.
202 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
203 EXPECT_EQ(output.compare("caf"), 0);
204 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
205 EXPECT_EQ(output.compare(""), 0);
206 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
207 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
208 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
209 &output));
210 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
211
212 // Testing using the same string as input and output.
213 EXPECT_FALSE(Truncated(output, 4, &output));
214 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
215 EXPECT_TRUE(Truncated(output, 3, &output));
216 EXPECT_EQ(output.compare("\xa7\x41"), 0);
217
218 // "abc" with U+201[CD] in windows-125[0-8]
219 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
220 EXPECT_EQ(output.compare("\x93" "abc"), 0);
221
222 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
223 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
224 EXPECT_EQ(output.compare(""), 0);
225
226 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
227 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
228 EXPECT_EQ(output.compare(""), 0);
229 }
230
TEST(StringUtilTest,TrimWhitespace)231 TEST(StringUtilTest, TrimWhitespace) {
232 string16 output; // Allow contents to carry over to next testcase
233 for (size_t i = 0; i < arraysize(trim_cases); ++i) {
234 const trim_case& value = trim_cases[i];
235 EXPECT_EQ(value.return_value,
236 TrimWhitespace(WideToUTF16(value.input), value.positions,
237 &output));
238 EXPECT_EQ(WideToUTF16(value.output), output);
239 }
240
241 // Test that TrimWhitespace() can take the same string for input and output
242 output = ASCIIToUTF16(" This is a test \r\n");
243 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
244 EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
245
246 // Once more, but with a string of whitespace
247 output = ASCIIToUTF16(" \r\n");
248 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
249 EXPECT_EQ(string16(), output);
250
251 std::string output_ascii;
252 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
253 const trim_case_ascii& value = trim_cases_ascii[i];
254 EXPECT_EQ(value.return_value,
255 TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
256 EXPECT_EQ(value.output, output_ascii);
257 }
258 }
259
260 static const struct collapse_case {
261 const wchar_t* input;
262 const bool trim;
263 const wchar_t* output;
264 } collapse_cases[] = {
265 {L" Google Video ", false, L"Google Video"},
266 {L"Google Video", false, L"Google Video"},
267 {L"", false, L""},
268 {L" ", false, L""},
269 {L"\t\rTest String\n", false, L"Test String"},
270 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
271 {L" Test \n \t String ", false, L"Test String"},
272 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
273 {L" Test String", false, L"Test String"},
274 {L"Test String ", false, L"Test String"},
275 {L"Test String", false, L"Test String"},
276 {L"", true, L""},
277 {L"\n", true, L""},
278 {L" \r ", true, L""},
279 {L"\nFoo", true, L"Foo"},
280 {L"\r Foo ", true, L"Foo"},
281 {L" Foo bar ", true, L"Foo bar"},
282 {L" \tFoo bar \n", true, L"Foo bar"},
283 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
284 };
285
TEST(StringUtilTest,CollapseWhitespace)286 TEST(StringUtilTest, CollapseWhitespace) {
287 for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
288 const collapse_case& value = collapse_cases[i];
289 EXPECT_EQ(WideToUTF16(value.output),
290 CollapseWhitespace(WideToUTF16(value.input), value.trim));
291 }
292 }
293
294 static const struct collapse_case_ascii {
295 const char* input;
296 const bool trim;
297 const char* output;
298 } collapse_cases_ascii[] = {
299 {" Google Video ", false, "Google Video"},
300 {"Google Video", false, "Google Video"},
301 {"", false, ""},
302 {" ", false, ""},
303 {"\t\rTest String\n", false, "Test String"},
304 {" Test \n \t String ", false, "Test String"},
305 {" Test String", false, "Test String"},
306 {"Test String ", false, "Test String"},
307 {"Test String", false, "Test String"},
308 {"", true, ""},
309 {"\n", true, ""},
310 {" \r ", true, ""},
311 {"\nFoo", true, "Foo"},
312 {"\r Foo ", true, "Foo"},
313 {" Foo bar ", true, "Foo bar"},
314 {" \tFoo bar \n", true, "Foo bar"},
315 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
316 };
317
TEST(StringUtilTest,CollapseWhitespaceASCII)318 TEST(StringUtilTest, CollapseWhitespaceASCII) {
319 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
320 const collapse_case_ascii& value = collapse_cases_ascii[i];
321 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
322 }
323 }
324
TEST(StringUtilTest,IsStringUTF8)325 TEST(StringUtilTest, IsStringUTF8) {
326 EXPECT_TRUE(IsStringUTF8("abc"));
327 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
328 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
329 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
330 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
331 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
332
333 // surrogate code points
334 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
335 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
336 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
337
338 // overlong sequences
339 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
340 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
341 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
342 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
343 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
344 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
345 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
346 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
347 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
348 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
349 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
350
351 // Beyond U+10FFFF (the upper limit of Unicode codespace)
352 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
353 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
354 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
355
356 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
357 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
358 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
359 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
360 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
361
362 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
363 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
364 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
365 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
366 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
367 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
368 // Strings in legacy encodings. We can certainly make up strings
369 // in a legacy encoding that are valid in UTF-8, but in real data,
370 // most of them are invalid as UTF-8.
371 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
372 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
373 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
374 // "abc" with U+201[CD] in windows-125[0-8]
375 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
376 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
377 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
378 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
379 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
380
381 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
382 // representation, and the second uses a 2-byte sequence. The second version
383 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
384 // given codepoint must be used.
385 static const char kEmbeddedNull[] = "embedded\0null";
386 EXPECT_TRUE(IsStringUTF8(
387 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
388 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
389 }
390
TEST(StringUtilTest,IsStringASCII)391 TEST(StringUtilTest, IsStringASCII) {
392 static char char_ascii[] =
393 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
394 static char16 char16_ascii[] = {
395 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
396 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
397 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
398 static std::wstring wchar_ascii(
399 L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
400
401 // Test a variety of the fragment start positions and lengths in order to make
402 // sure that bit masking in IsStringASCII works correctly.
403 // Also, test that a non-ASCII character will be detected regardless of its
404 // position inside the string.
405 {
406 const size_t string_length = arraysize(char_ascii) - 1;
407 for (size_t offset = 0; offset < 8; ++offset) {
408 for (size_t len = 0, max_len = string_length - offset; len < max_len;
409 ++len) {
410 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
411 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
412 char_ascii[char_pos] |= '\x80';
413 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
414 char_ascii[char_pos] &= ~'\x80';
415 }
416 }
417 }
418 }
419
420 {
421 const size_t string_length = arraysize(char16_ascii) - 1;
422 for (size_t offset = 0; offset < 4; ++offset) {
423 for (size_t len = 0, max_len = string_length - offset; len < max_len;
424 ++len) {
425 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
426 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
427 char16_ascii[char_pos] |= 0x80;
428 EXPECT_FALSE(
429 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
430 char16_ascii[char_pos] &= ~0x80;
431 // Also test when the upper half is non-zero.
432 char16_ascii[char_pos] |= 0x100;
433 EXPECT_FALSE(
434 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
435 char16_ascii[char_pos] &= ~0x100;
436 }
437 }
438 }
439 }
440
441 {
442 const size_t string_length = wchar_ascii.length();
443 for (size_t len = 0; len < string_length; ++len) {
444 EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
445 for (size_t char_pos = 0; char_pos < len; ++char_pos) {
446 wchar_ascii[char_pos] |= 0x80;
447 EXPECT_FALSE(
448 IsStringASCII(wchar_ascii.substr(0, len)));
449 wchar_ascii[char_pos] &= ~0x80;
450 wchar_ascii[char_pos] |= 0x100;
451 EXPECT_FALSE(
452 IsStringASCII(wchar_ascii.substr(0, len)));
453 wchar_ascii[char_pos] &= ~0x100;
454 #if defined(WCHAR_T_IS_UTF32)
455 wchar_ascii[char_pos] |= 0x10000;
456 EXPECT_FALSE(
457 IsStringASCII(wchar_ascii.substr(0, len)));
458 wchar_ascii[char_pos] &= ~0x10000;
459 #endif // WCHAR_T_IS_UTF32
460 }
461 }
462 }
463 }
464
TEST(StringUtilTest,ConvertASCII)465 TEST(StringUtilTest, ConvertASCII) {
466 static const char* const char_cases[] = {
467 "Google Video",
468 "Hello, world\n",
469 "0123ABCDwxyz \a\b\t\r\n!+,.~"
470 };
471
472 static const wchar_t* const wchar_cases[] = {
473 L"Google Video",
474 L"Hello, world\n",
475 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
476 };
477
478 for (size_t i = 0; i < arraysize(char_cases); ++i) {
479 EXPECT_TRUE(IsStringASCII(char_cases[i]));
480 string16 utf16 = ASCIIToUTF16(char_cases[i]);
481 EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
482
483 std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
484 EXPECT_EQ(char_cases[i], ascii);
485 }
486
487 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
488
489 // Convert empty strings.
490 string16 empty16;
491 std::string empty;
492 EXPECT_EQ(empty, UTF16ToASCII(empty16));
493 EXPECT_EQ(empty16, ASCIIToUTF16(empty));
494
495 // Convert strings with an embedded NUL character.
496 const char chars_with_nul[] = "test\0string";
497 const int length_with_nul = arraysize(chars_with_nul) - 1;
498 std::string string_with_nul(chars_with_nul, length_with_nul);
499 string16 string16_with_nul = ASCIIToUTF16(string_with_nul);
500 EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),
501 string16_with_nul.length());
502 std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
503 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
504 narrow_with_nul.length());
505 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
506 }
507
TEST(StringUtilTest,ToLowerASCII)508 TEST(StringUtilTest, ToLowerASCII) {
509 EXPECT_EQ('c', ToLowerASCII('C'));
510 EXPECT_EQ('c', ToLowerASCII('c'));
511 EXPECT_EQ('2', ToLowerASCII('2'));
512
513 EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('C')));
514 EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('c')));
515 EXPECT_EQ(static_cast<char16>('2'), ToLowerASCII(static_cast<char16>('2')));
516
517 EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
518 EXPECT_EQ(ASCIIToUTF16("cc2"), ToLowerASCII(ASCIIToUTF16("Cc2")));
519 }
520
TEST(StringUtilTest,ToUpperASCII)521 TEST(StringUtilTest, ToUpperASCII) {
522 EXPECT_EQ('C', ToUpperASCII('C'));
523 EXPECT_EQ('C', ToUpperASCII('c'));
524 EXPECT_EQ('2', ToUpperASCII('2'));
525
526 EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('C')));
527 EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('c')));
528 EXPECT_EQ(static_cast<char16>('2'), ToUpperASCII(static_cast<char16>('2')));
529
530 EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
531 EXPECT_EQ(ASCIIToUTF16("CC2"), ToUpperASCII(ASCIIToUTF16("Cc2")));
532 }
533
TEST(StringUtilTest,LowerCaseEqualsASCII)534 TEST(StringUtilTest, LowerCaseEqualsASCII) {
535 static const struct {
536 const char* src_a;
537 const char* dst;
538 } lowercase_cases[] = {
539 { "FoO", "foo" },
540 { "foo", "foo" },
541 { "FOO", "foo" },
542 };
543
544 for (size_t i = 0; i < arraysize(lowercase_cases); ++i) {
545 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
546 lowercase_cases[i].dst));
547 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
548 lowercase_cases[i].dst));
549 }
550 }
551
TEST(StringUtilTest,FormatBytesUnlocalized)552 TEST(StringUtilTest, FormatBytesUnlocalized) {
553 static const struct {
554 int64_t bytes;
555 const char* expected;
556 } cases[] = {
557 // Expected behavior: we show one post-decimal digit when we have
558 // under two pre-decimal digits, except in cases where it makes no
559 // sense (zero or bytes).
560 // Since we switch units once we cross the 1000 mark, this keeps
561 // the display of file sizes or bytes consistently around three
562 // digits.
563 {0, "0 B"},
564 {512, "512 B"},
565 {1024*1024, "1.0 MB"},
566 {1024*1024*1024, "1.0 GB"},
567 {10LL*1024*1024*1024, "10.0 GB"},
568 {99LL*1024*1024*1024, "99.0 GB"},
569 {105LL*1024*1024*1024, "105 GB"},
570 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
571 {~(1LL << 63), "8192 PB"},
572
573 {99*1024 + 103, "99.1 kB"},
574 {1024*1024 + 103, "1.0 MB"},
575 {1024*1024 + 205 * 1024, "1.2 MB"},
576 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
577 {10LL*1024*1024*1024, "10.0 GB"},
578 {100LL*1024*1024*1024, "100 GB"},
579 };
580
581 for (size_t i = 0; i < arraysize(cases); ++i) {
582 EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
583 FormatBytesUnlocalized(cases[i].bytes));
584 }
585 }
TEST(StringUtilTest,ReplaceSubstringsAfterOffset)586 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
587 static const struct {
588 const char* str;
589 string16::size_type start_offset;
590 const char* find_this;
591 const char* replace_with;
592 const char* expected;
593 } cases[] = {
594 {"aaa", 0, "a", "b", "bbb"},
595 {"abb", 0, "ab", "a", "ab"},
596 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
597 {"Not found", 0, "x", "0", "Not found"},
598 {"Not found again", 5, "x", "0", "Not found again"},
599 {" Making it much longer ", 0, " ", "Four score and seven years ago",
600 "Four score and seven years agoMakingFour score and seven years agoit"
601 "Four score and seven years agomuchFour score and seven years agolonger"
602 "Four score and seven years ago"},
603 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
604 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
605 {"abababab", 2, "ab", "c", "abccc"},
606 };
607
608 for (size_t i = 0; i < arraysize(cases); i++) {
609 string16 str = ASCIIToUTF16(cases[i].str);
610 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
611 ASCIIToUTF16(cases[i].find_this),
612 ASCIIToUTF16(cases[i].replace_with));
613 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
614 }
615 }
616
TEST(StringUtilTest,ReplaceFirstSubstringAfterOffset)617 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
618 static const struct {
619 const char* str;
620 string16::size_type start_offset;
621 const char* find_this;
622 const char* replace_with;
623 const char* expected;
624 } cases[] = {
625 {"aaa", 0, "a", "b", "baa"},
626 {"abb", 0, "ab", "a", "ab"},
627 {"Removing some substrings inging", 0, "ing", "",
628 "Remov some substrings inging"},
629 {"Not found", 0, "x", "0", "Not found"},
630 {"Not found again", 5, "x", "0", "Not found again"},
631 {" Making it much longer ", 0, " ", "Four score and seven years ago",
632 "Four score and seven years agoMaking it much longer "},
633 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
634 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
635 {"abababab", 2, "ab", "c", "abcabab"},
636 };
637
638 for (size_t i = 0; i < arraysize(cases); i++) {
639 string16 str = ASCIIToUTF16(cases[i].str);
640 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
641 ASCIIToUTF16(cases[i].find_this),
642 ASCIIToUTF16(cases[i].replace_with));
643 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
644 }
645 }
646
TEST(StringUtilTest,HexDigitToInt)647 TEST(StringUtilTest, HexDigitToInt) {
648 EXPECT_EQ(0, HexDigitToInt('0'));
649 EXPECT_EQ(1, HexDigitToInt('1'));
650 EXPECT_EQ(2, HexDigitToInt('2'));
651 EXPECT_EQ(3, HexDigitToInt('3'));
652 EXPECT_EQ(4, HexDigitToInt('4'));
653 EXPECT_EQ(5, HexDigitToInt('5'));
654 EXPECT_EQ(6, HexDigitToInt('6'));
655 EXPECT_EQ(7, HexDigitToInt('7'));
656 EXPECT_EQ(8, HexDigitToInt('8'));
657 EXPECT_EQ(9, HexDigitToInt('9'));
658 EXPECT_EQ(10, HexDigitToInt('A'));
659 EXPECT_EQ(11, HexDigitToInt('B'));
660 EXPECT_EQ(12, HexDigitToInt('C'));
661 EXPECT_EQ(13, HexDigitToInt('D'));
662 EXPECT_EQ(14, HexDigitToInt('E'));
663 EXPECT_EQ(15, HexDigitToInt('F'));
664
665 // Verify the lower case as well.
666 EXPECT_EQ(10, HexDigitToInt('a'));
667 EXPECT_EQ(11, HexDigitToInt('b'));
668 EXPECT_EQ(12, HexDigitToInt('c'));
669 EXPECT_EQ(13, HexDigitToInt('d'));
670 EXPECT_EQ(14, HexDigitToInt('e'));
671 EXPECT_EQ(15, HexDigitToInt('f'));
672 }
673
TEST(StringUtilTest,JoinString)674 TEST(StringUtilTest, JoinString) {
675 std::string separator(", ");
676 std::vector<std::string> parts;
677 EXPECT_EQ(std::string(), JoinString(parts, separator));
678
679 parts.push_back("a");
680 EXPECT_EQ("a", JoinString(parts, separator));
681
682 parts.push_back("b");
683 parts.push_back("c");
684 EXPECT_EQ("a, b, c", JoinString(parts, separator));
685
686 parts.push_back(std::string());
687 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
688 parts.push_back(" ");
689 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
690 }
691
TEST(StringUtilTest,JoinString16)692 TEST(StringUtilTest, JoinString16) {
693 string16 separator = ASCIIToUTF16(", ");
694 std::vector<string16> parts;
695 EXPECT_EQ(string16(), JoinString(parts, separator));
696
697 parts.push_back(ASCIIToUTF16("a"));
698 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
699
700 parts.push_back(ASCIIToUTF16("b"));
701 parts.push_back(ASCIIToUTF16("c"));
702 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
703
704 parts.push_back(ASCIIToUTF16(""));
705 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
706 parts.push_back(ASCIIToUTF16(" "));
707 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
708 }
709
TEST(StringUtilTest,StartsWith)710 TEST(StringUtilTest, StartsWith) {
711 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
712 base::CompareCase::SENSITIVE));
713 EXPECT_FALSE(StartsWith("JavaScript:url", "javascript",
714 base::CompareCase::SENSITIVE));
715 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
716 base::CompareCase::INSENSITIVE_ASCII));
717 EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",
718 base::CompareCase::INSENSITIVE_ASCII));
719 EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));
720 EXPECT_FALSE(StartsWith("java", "javascript",
721 base::CompareCase::INSENSITIVE_ASCII));
722 EXPECT_FALSE(StartsWith(std::string(), "javascript",
723 base::CompareCase::INSENSITIVE_ASCII));
724 EXPECT_FALSE(StartsWith(std::string(), "javascript",
725 base::CompareCase::SENSITIVE));
726 EXPECT_TRUE(StartsWith("java", std::string(),
727 base::CompareCase::INSENSITIVE_ASCII));
728 EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));
729
730 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
731 ASCIIToUTF16("javascript"),
732 base::CompareCase::SENSITIVE));
733 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
734 ASCIIToUTF16("javascript"),
735 base::CompareCase::SENSITIVE));
736 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
737 ASCIIToUTF16("javascript"),
738 base::CompareCase::INSENSITIVE_ASCII));
739 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
740 ASCIIToUTF16("javascript"),
741 base::CompareCase::INSENSITIVE_ASCII));
742 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
743 base::CompareCase::SENSITIVE));
744 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
745 base::CompareCase::INSENSITIVE_ASCII));
746 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
747 base::CompareCase::INSENSITIVE_ASCII));
748 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
749 base::CompareCase::SENSITIVE));
750 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
751 base::CompareCase::INSENSITIVE_ASCII));
752 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
753 base::CompareCase::SENSITIVE));
754 }
755
TEST(StringUtilTest,EndsWith)756 TEST(StringUtilTest, EndsWith) {
757 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
758 base::CompareCase::SENSITIVE));
759 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
760 base::CompareCase::SENSITIVE));
761 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
762 base::CompareCase::INSENSITIVE_ASCII));
763 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
764 base::CompareCase::INSENSITIVE_ASCII));
765 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
766 base::CompareCase::SENSITIVE));
767 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
768 base::CompareCase::INSENSITIVE_ASCII));
769 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
770 base::CompareCase::SENSITIVE));
771 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
772 base::CompareCase::INSENSITIVE_ASCII));
773 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
774 base::CompareCase::INSENSITIVE_ASCII));
775 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
776 base::CompareCase::SENSITIVE));
777 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
778 base::CompareCase::INSENSITIVE_ASCII));
779 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
780 base::CompareCase::SENSITIVE));
781 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
782 base::CompareCase::INSENSITIVE_ASCII));
783 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
784 base::CompareCase::SENSITIVE));
785 EXPECT_TRUE(
786 EndsWith(string16(), string16(), base::CompareCase::INSENSITIVE_ASCII));
787 EXPECT_TRUE(EndsWith(string16(), string16(), base::CompareCase::SENSITIVE));
788 }
789
TEST(StringUtilTest,GetStringFWithOffsets)790 TEST(StringUtilTest, GetStringFWithOffsets) {
791 std::vector<string16> subst;
792 subst.push_back(ASCIIToUTF16("1"));
793 subst.push_back(ASCIIToUTF16("2"));
794 std::vector<size_t> offsets;
795
796 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
797 subst,
798 &offsets);
799 EXPECT_EQ(2U, offsets.size());
800 EXPECT_EQ(7U, offsets[0]);
801 EXPECT_EQ(25U, offsets[1]);
802 offsets.clear();
803
804 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
805 subst,
806 &offsets);
807 EXPECT_EQ(2U, offsets.size());
808 EXPECT_EQ(25U, offsets[0]);
809 EXPECT_EQ(7U, offsets[1]);
810 offsets.clear();
811 }
812
TEST(StringUtilTest,ReplaceStringPlaceholdersTooFew)813 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
814 // Test whether replacestringplaceholders works as expected when there
815 // are fewer inputs than outputs.
816 std::vector<string16> subst;
817 subst.push_back(ASCIIToUTF16("9a"));
818 subst.push_back(ASCIIToUTF16("8b"));
819 subst.push_back(ASCIIToUTF16("7c"));
820
821 string16 formatted =
822 ReplaceStringPlaceholders(
823 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
824
825 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
826 }
827
TEST(StringUtilTest,ReplaceStringPlaceholders)828 TEST(StringUtilTest, ReplaceStringPlaceholders) {
829 std::vector<string16> subst;
830 subst.push_back(ASCIIToUTF16("9a"));
831 subst.push_back(ASCIIToUTF16("8b"));
832 subst.push_back(ASCIIToUTF16("7c"));
833 subst.push_back(ASCIIToUTF16("6d"));
834 subst.push_back(ASCIIToUTF16("5e"));
835 subst.push_back(ASCIIToUTF16("4f"));
836 subst.push_back(ASCIIToUTF16("3g"));
837 subst.push_back(ASCIIToUTF16("2h"));
838 subst.push_back(ASCIIToUTF16("1i"));
839
840 string16 formatted =
841 ReplaceStringPlaceholders(
842 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
843
844 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
845 }
846
TEST(StringUtilTest,ReplaceStringPlaceholdersMoreThan9Replacements)847 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
848 std::vector<string16> subst;
849 subst.push_back(ASCIIToUTF16("9a"));
850 subst.push_back(ASCIIToUTF16("8b"));
851 subst.push_back(ASCIIToUTF16("7c"));
852 subst.push_back(ASCIIToUTF16("6d"));
853 subst.push_back(ASCIIToUTF16("5e"));
854 subst.push_back(ASCIIToUTF16("4f"));
855 subst.push_back(ASCIIToUTF16("3g"));
856 subst.push_back(ASCIIToUTF16("2h"));
857 subst.push_back(ASCIIToUTF16("1i"));
858 subst.push_back(ASCIIToUTF16("0j"));
859 subst.push_back(ASCIIToUTF16("-1k"));
860 subst.push_back(ASCIIToUTF16("-2l"));
861 subst.push_back(ASCIIToUTF16("-3m"));
862 subst.push_back(ASCIIToUTF16("-4n"));
863
864 string16 formatted =
865 ReplaceStringPlaceholders(
866 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
867 "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
868
869 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
870 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
871 }
872
TEST(StringUtilTest,StdStringReplaceStringPlaceholders)873 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
874 std::vector<std::string> subst;
875 subst.push_back("9a");
876 subst.push_back("8b");
877 subst.push_back("7c");
878 subst.push_back("6d");
879 subst.push_back("5e");
880 subst.push_back("4f");
881 subst.push_back("3g");
882 subst.push_back("2h");
883 subst.push_back("1i");
884
885 std::string formatted =
886 ReplaceStringPlaceholders(
887 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
888
889 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
890 }
891
TEST(StringUtilTest,ReplaceStringPlaceholdersConsecutiveDollarSigns)892 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
893 std::vector<std::string> subst;
894 subst.push_back("a");
895 subst.push_back("b");
896 subst.push_back("c");
897 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
898 "$1 $$2 $$$3");
899 }
900
TEST(StringUtilTest,LcpyTest)901 TEST(StringUtilTest, LcpyTest) {
902 // Test the normal case where we fit in our buffer.
903 {
904 char dst[10];
905 wchar_t wdst[10];
906 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
907 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
908 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
909 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
910 }
911
912 // Test dst_size == 0, nothing should be written to |dst| and we should
913 // have the equivalent of strlen(src).
914 {
915 char dst[2] = {1, 2};
916 wchar_t wdst[2] = {1, 2};
917 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
918 EXPECT_EQ(1, dst[0]);
919 EXPECT_EQ(2, dst[1]);
920 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
921 EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
922 EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
923 }
924
925 // Test the case were we _just_ competely fit including the null.
926 {
927 char dst[8];
928 wchar_t wdst[8];
929 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
930 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
931 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
932 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
933 }
934
935 // Test the case were we we are one smaller, so we can't fit the null.
936 {
937 char dst[7];
938 wchar_t wdst[7];
939 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
940 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
941 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
942 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
943 }
944
945 // Test the case were we are just too small.
946 {
947 char dst[3];
948 wchar_t wdst[3];
949 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
950 EXPECT_EQ(0, memcmp(dst, "ab", 3));
951 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
952 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
953 }
954 }
955
TEST(StringUtilTest,WprintfFormatPortabilityTest)956 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
957 static const struct {
958 const wchar_t* input;
959 bool portable;
960 } cases[] = {
961 { L"%ls", true },
962 { L"%s", false },
963 { L"%S", false },
964 { L"%lS", false },
965 { L"Hello, %s", false },
966 { L"%lc", true },
967 { L"%c", false },
968 { L"%C", false },
969 { L"%lC", false },
970 { L"%ls %s", false },
971 { L"%s %ls", false },
972 { L"%s %ls %s", false },
973 { L"%f", true },
974 { L"%f %F", false },
975 { L"%d %D", false },
976 { L"%o %O", false },
977 { L"%u %U", false },
978 { L"%f %d %o %u", true },
979 { L"%-8d (%02.1f%)", true },
980 { L"% 10s", false },
981 { L"% 10ls", true }
982 };
983 for (size_t i = 0; i < arraysize(cases); ++i)
984 EXPECT_EQ(cases[i].portable, IsWprintfFormatPortable(cases[i].input));
985 }
986
TEST(StringUtilTest,RemoveChars)987 TEST(StringUtilTest, RemoveChars) {
988 const char kRemoveChars[] = "-/+*";
989 std::string input = "A-+bc/d!*";
990 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
991 EXPECT_EQ("Abcd!", input);
992
993 // No characters match kRemoveChars.
994 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
995 EXPECT_EQ("Abcd!", input);
996
997 // Empty string.
998 input.clear();
999 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1000 EXPECT_EQ(std::string(), input);
1001 }
1002
TEST(StringUtilTest,ReplaceChars)1003 TEST(StringUtilTest, ReplaceChars) {
1004 struct TestData {
1005 const char* input;
1006 const char* replace_chars;
1007 const char* replace_with;
1008 const char* output;
1009 bool result;
1010 } cases[] = {
1011 { "", "", "", "", false },
1012 { "test", "", "", "test", false },
1013 { "test", "", "!", "test", false },
1014 { "test", "z", "!", "test", false },
1015 { "test", "e", "!", "t!st", true },
1016 { "test", "e", "!?", "t!?st", true },
1017 { "test", "ez", "!", "t!st", true },
1018 { "test", "zed", "!?", "t!?st", true },
1019 { "test", "t", "!?", "!?es!?", true },
1020 { "test", "et", "!>", "!>!>s!>", true },
1021 { "test", "zest", "!", "!!!!", true },
1022 { "test", "szt", "!", "!e!!", true },
1023 { "test", "t", "test", "testestest", true },
1024 };
1025
1026 for (size_t i = 0; i < arraysize(cases); ++i) {
1027 std::string output;
1028 bool result = ReplaceChars(cases[i].input,
1029 cases[i].replace_chars,
1030 cases[i].replace_with,
1031 &output);
1032 EXPECT_EQ(cases[i].result, result);
1033 EXPECT_EQ(cases[i].output, output);
1034 }
1035 }
1036
TEST(StringUtilTest,ContainsOnlyChars)1037 TEST(StringUtilTest, ContainsOnlyChars) {
1038 // Providing an empty list of characters should return false but for the empty
1039 // string.
1040 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1041 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1042
1043 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1044 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1045 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1046 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1047 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1048
1049 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1050 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1051 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1052 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII));
1053 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1054 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII));
1055
1056 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1057 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1058 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1059 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16));
1060 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1061 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),
1062 kWhitespaceUTF16));
1063 }
1064
TEST(StringUtilTest,CompareCaseInsensitiveASCII)1065 TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
1066 EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));
1067 EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));
1068
1069 // Differing lengths.
1070 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));
1071 EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));
1072
1073 // Differing values.
1074 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
1075 EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
1076 }
1077
TEST(StringUtilTest,EqualsCaseInsensitiveASCII)1078 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
1079 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));
1080 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));
1081 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));
1082 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));
1083 }
1084
TEST(StringUtilTest,IsUnicodeWhitespace)1085 TEST(StringUtilTest, IsUnicodeWhitespace) {
1086 // NOT unicode white space.
1087 EXPECT_FALSE(IsUnicodeWhitespace(L'\0'));
1088 EXPECT_FALSE(IsUnicodeWhitespace(L'A'));
1089 EXPECT_FALSE(IsUnicodeWhitespace(L'0'));
1090 EXPECT_FALSE(IsUnicodeWhitespace(L'.'));
1091 EXPECT_FALSE(IsUnicodeWhitespace(L';'));
1092 EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100'));
1093
1094 // Actual unicode whitespace.
1095 EXPECT_TRUE(IsUnicodeWhitespace(L' '));
1096 EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0'));
1097 EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000'));
1098 EXPECT_TRUE(IsUnicodeWhitespace(L'\t'));
1099 EXPECT_TRUE(IsUnicodeWhitespace(L'\r'));
1100 EXPECT_TRUE(IsUnicodeWhitespace(L'\v'));
1101 EXPECT_TRUE(IsUnicodeWhitespace(L'\f'));
1102 EXPECT_TRUE(IsUnicodeWhitespace(L'\n'));
1103 }
1104
1105 class WriteIntoTest : public testing::Test {
1106 protected:
WritesCorrectly(size_t num_chars)1107 static void WritesCorrectly(size_t num_chars) {
1108 std::string buffer;
1109 char kOriginal[] = "supercali";
1110 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1111 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1112 // string at the first \0.
1113 EXPECT_EQ(std::string(kOriginal,
1114 std::min(num_chars, arraysize(kOriginal) - 1)),
1115 std::string(buffer.c_str()));
1116 EXPECT_EQ(num_chars, buffer.size());
1117 }
1118 };
1119
TEST_F(WriteIntoTest,WriteInto)1120 TEST_F(WriteIntoTest, WriteInto) {
1121 // Validate that WriteInto reserves enough space and
1122 // sizes a string correctly.
1123 WritesCorrectly(1);
1124 WritesCorrectly(2);
1125 WritesCorrectly(5000);
1126
1127 // Validate that WriteInto doesn't modify other strings
1128 // when using a Copy-on-Write implementation.
1129 const char kLive[] = "live";
1130 const char kDead[] = "dead";
1131 const std::string live = kLive;
1132 std::string dead = live;
1133 strncpy(WriteInto(&dead, 5), kDead, 4);
1134 EXPECT_EQ(kDead, dead);
1135 EXPECT_EQ(4u, dead.size());
1136 EXPECT_EQ(kLive, live);
1137 EXPECT_EQ(4u, live.size());
1138 }
1139
1140 } // namespace base
1141