1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/string_util.h"
6
7 #include <math.h>
8 #include <stdarg.h>
9 #include <stddef.h>
10 #include <stdint.h>
11
12 #include <algorithm>
13
14 #include "base/macros.h"
15 #include "base/strings/string16.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "testing/gmock/include/gmock/gmock.h"
18 #include "testing/gtest/include/gtest/gtest.h"
19
20 using ::testing::ElementsAre;
21
22 namespace base {
23
24 static const struct trim_case {
25 const wchar_t* input;
26 const TrimPositions positions;
27 const wchar_t* output;
28 const TrimPositions return_value;
29 } trim_cases[] = {
30 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
31 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
32 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
33 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
34 {L"", TRIM_ALL, L"", TRIM_NONE},
35 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
36 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
37 {L" ", TRIM_ALL, L"", TRIM_ALL},
38 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
39 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
40 };
41
42 static const struct trim_case_ascii {
43 const char* input;
44 const TrimPositions positions;
45 const char* output;
46 const TrimPositions return_value;
47 } trim_cases_ascii[] = {
48 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
49 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
50 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
51 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
52 {"", TRIM_ALL, "", TRIM_NONE},
53 {" ", TRIM_LEADING, "", TRIM_LEADING},
54 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
55 {" ", TRIM_ALL, "", TRIM_ALL},
56 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
57 };
58
59 namespace {
60
61 // Helper used to test TruncateUTF8ToByteSize.
Truncated(const std::string & input,const size_t byte_size,std::string * output)62 bool Truncated(const std::string& input,
63 const size_t byte_size,
64 std::string* output) {
65 size_t prev = input.length();
66 TruncateUTF8ToByteSize(input, byte_size, output);
67 return prev != output->length();
68 }
69
70 } // namespace
71
TEST(StringUtilTest,TruncateUTF8ToByteSize)72 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
73 std::string output;
74
75 // Empty strings and invalid byte_size arguments
76 EXPECT_FALSE(Truncated(std::string(), 0, &output));
77 EXPECT_EQ(output, "");
78 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
79 EXPECT_EQ(output, "");
80 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
81 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
82
83 // Testing the truncation of valid UTF8 correctly
84 EXPECT_TRUE(Truncated("abc", 2, &output));
85 EXPECT_EQ(output, "ab");
86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
87 EXPECT_EQ(output.compare("\xc2\x81"), 0);
88 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
89 EXPECT_EQ(output.compare("\xc2\x81"), 0);
90 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
91 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
92
93 {
94 const char array[] = "\x00\x00\xc2\x81\xc2\x81";
95 const std::string array_string(array, arraysize(array));
96 EXPECT_TRUE(Truncated(array_string, 4, &output));
97 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
98 }
99
100 {
101 const char array[] = "\x00\xc2\x81\xc2\x81";
102 const std::string array_string(array, arraysize(array));
103 EXPECT_TRUE(Truncated(array_string, 4, &output));
104 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
105 }
106
107 // Testing invalid UTF8
108 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
109 EXPECT_EQ(output.compare(""), 0);
110 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
111 EXPECT_EQ(output.compare(""), 0);
112 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
113 EXPECT_EQ(output.compare(""), 0);
114
115 // Testing invalid UTF8 mixed with valid UTF8
116 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
117 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
118 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
119 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
120 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
121 10, &output));
122 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
123 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
124 10, &output));
125 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
126 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
127 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
128
129 // Overlong sequences
130 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
131 EXPECT_EQ(output.compare(""), 0);
132 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
133 EXPECT_EQ(output.compare(""), 0);
134 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
135 EXPECT_EQ(output.compare(""), 0);
136 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
137 EXPECT_EQ(output.compare(""), 0);
138 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
139 EXPECT_EQ(output.compare(""), 0);
140 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
141 EXPECT_EQ(output.compare(""), 0);
142 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
143 EXPECT_EQ(output.compare(""), 0);
144 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
145 EXPECT_EQ(output.compare(""), 0);
146 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
147 EXPECT_EQ(output.compare(""), 0);
148 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
149 EXPECT_EQ(output.compare(""), 0);
150 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
151 EXPECT_EQ(output.compare(""), 0);
152
153 // Beyond U+10FFFF (the upper limit of Unicode codespace)
154 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
155 EXPECT_EQ(output.compare(""), 0);
156 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
157 EXPECT_EQ(output.compare(""), 0);
158 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
159 EXPECT_EQ(output.compare(""), 0);
160
161 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
162 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
163 EXPECT_EQ(output.compare(""), 0);
164 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
165 EXPECT_EQ(output.compare(""), 0);
166
167 {
168 const char array[] = "\x00\x00\xfe\xff";
169 const std::string array_string(array, arraysize(array));
170 EXPECT_TRUE(Truncated(array_string, 4, &output));
171 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
172 }
173
174 // Variants on the previous test
175 {
176 const char array[] = "\xff\xfe\x00\x00";
177 const std::string array_string(array, 4);
178 EXPECT_FALSE(Truncated(array_string, 4, &output));
179 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
180 }
181 {
182 const char array[] = "\xff\x00\x00\xfe";
183 const std::string array_string(array, arraysize(array));
184 EXPECT_TRUE(Truncated(array_string, 4, &output));
185 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
186 }
187
188 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
189 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
190 EXPECT_EQ(output.compare(""), 0);
191 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
192 EXPECT_EQ(output.compare(""), 0);
193 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
194 EXPECT_EQ(output.compare(""), 0);
195 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
196 EXPECT_EQ(output.compare(""), 0);
197 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
198 EXPECT_EQ(output.compare(""), 0);
199
200 // Strings in legacy encodings that are valid in UTF-8, but
201 // are invalid as UTF-8 in real data.
202 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
203 EXPECT_EQ(output.compare("caf"), 0);
204 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
205 EXPECT_EQ(output.compare(""), 0);
206 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
207 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
208 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
209 &output));
210 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
211
212 // Testing using the same string as input and output.
213 EXPECT_FALSE(Truncated(output, 4, &output));
214 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
215 EXPECT_TRUE(Truncated(output, 3, &output));
216 EXPECT_EQ(output.compare("\xa7\x41"), 0);
217
218 // "abc" with U+201[CD] in windows-125[0-8]
219 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
220 EXPECT_EQ(output.compare("\x93" "abc"), 0);
221
222 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
223 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
224 EXPECT_EQ(output.compare(""), 0);
225
226 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
227 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
228 EXPECT_EQ(output.compare(""), 0);
229 }
230
TEST(StringUtilTest,TrimWhitespace)231 TEST(StringUtilTest, TrimWhitespace) {
232 string16 output; // Allow contents to carry over to next testcase
233 for (size_t i = 0; i < arraysize(trim_cases); ++i) {
234 const trim_case& value = trim_cases[i];
235 EXPECT_EQ(value.return_value,
236 TrimWhitespace(WideToUTF16(value.input), value.positions,
237 &output));
238 EXPECT_EQ(WideToUTF16(value.output), output);
239 }
240
241 // Test that TrimWhitespace() can take the same string for input and output
242 output = ASCIIToUTF16(" This is a test \r\n");
243 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
244 EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
245
246 // Once more, but with a string of whitespace
247 output = ASCIIToUTF16(" \r\n");
248 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
249 EXPECT_EQ(string16(), output);
250
251 std::string output_ascii;
252 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
253 const trim_case_ascii& value = trim_cases_ascii[i];
254 EXPECT_EQ(value.return_value,
255 TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
256 EXPECT_EQ(value.output, output_ascii);
257 }
258 }
259
260 static const struct collapse_case {
261 const wchar_t* input;
262 const bool trim;
263 const wchar_t* output;
264 } collapse_cases[] = {
265 {L" Google Video ", false, L"Google Video"},
266 {L"Google Video", false, L"Google Video"},
267 {L"", false, L""},
268 {L" ", false, L""},
269 {L"\t\rTest String\n", false, L"Test String"},
270 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
271 {L" Test \n \t String ", false, L"Test String"},
272 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
273 {L" Test String", false, L"Test String"},
274 {L"Test String ", false, L"Test String"},
275 {L"Test String", false, L"Test String"},
276 {L"", true, L""},
277 {L"\n", true, L""},
278 {L" \r ", true, L""},
279 {L"\nFoo", true, L"Foo"},
280 {L"\r Foo ", true, L"Foo"},
281 {L" Foo bar ", true, L"Foo bar"},
282 {L" \tFoo bar \n", true, L"Foo bar"},
283 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
284 };
285
TEST(StringUtilTest,CollapseWhitespace)286 TEST(StringUtilTest, CollapseWhitespace) {
287 for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
288 const collapse_case& value = collapse_cases[i];
289 EXPECT_EQ(WideToUTF16(value.output),
290 CollapseWhitespace(WideToUTF16(value.input), value.trim));
291 }
292 }
293
294 static const struct collapse_case_ascii {
295 const char* input;
296 const bool trim;
297 const char* output;
298 } collapse_cases_ascii[] = {
299 {" Google Video ", false, "Google Video"},
300 {"Google Video", false, "Google Video"},
301 {"", false, ""},
302 {" ", false, ""},
303 {"\t\rTest String\n", false, "Test String"},
304 {" Test \n \t String ", false, "Test String"},
305 {" Test String", false, "Test String"},
306 {"Test String ", false, "Test String"},
307 {"Test String", false, "Test String"},
308 {"", true, ""},
309 {"\n", true, ""},
310 {" \r ", true, ""},
311 {"\nFoo", true, "Foo"},
312 {"\r Foo ", true, "Foo"},
313 {" Foo bar ", true, "Foo bar"},
314 {" \tFoo bar \n", true, "Foo bar"},
315 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
316 };
317
TEST(StringUtilTest,CollapseWhitespaceASCII)318 TEST(StringUtilTest, CollapseWhitespaceASCII) {
319 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
320 const collapse_case_ascii& value = collapse_cases_ascii[i];
321 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
322 }
323 }
324
TEST(StringUtilTest,IsStringUTF8)325 TEST(StringUtilTest, IsStringUTF8) {
326 EXPECT_TRUE(IsStringUTF8("abc"));
327 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
328 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
329 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
330 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
331 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
332
333 // surrogate code points
334 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
335 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
336 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
337
338 // overlong sequences
339 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
340 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
341 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
342 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
343 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
344 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
345 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
346 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
347 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
348 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
349 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
350
351 // Beyond U+10FFFF (the upper limit of Unicode codespace)
352 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
353 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
354 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
355
356 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
357 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
358 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
359 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
360 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
361
362 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
363 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
364 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
365 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
366 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
367 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
368 // Strings in legacy encodings. We can certainly make up strings
369 // in a legacy encoding that are valid in UTF-8, but in real data,
370 // most of them are invalid as UTF-8.
371 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
372 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
373 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
374 // "abc" with U+201[CD] in windows-125[0-8]
375 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
376 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
377 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
378 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
379 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
380
381 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
382 // representation, and the second uses a 2-byte sequence. The second version
383 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
384 // given codepoint must be used.
385 static const char kEmbeddedNull[] = "embedded\0null";
386 EXPECT_TRUE(IsStringUTF8(
387 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
388 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
389 }
390
TEST(StringUtilTest,IsStringASCII)391 TEST(StringUtilTest, IsStringASCII) {
392 static char char_ascii[] =
393 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
394 static char16 char16_ascii[] = {
395 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
396 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
397 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
398 static std::wstring wchar_ascii(
399 L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
400
401 // Test a variety of the fragment start positions and lengths in order to make
402 // sure that bit masking in IsStringASCII works correctly.
403 // Also, test that a non-ASCII character will be detected regardless of its
404 // position inside the string.
405 {
406 const size_t string_length = arraysize(char_ascii) - 1;
407 for (size_t offset = 0; offset < 8; ++offset) {
408 for (size_t len = 0, max_len = string_length - offset; len < max_len;
409 ++len) {
410 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
411 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
412 char_ascii[char_pos] |= '\x80';
413 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
414 char_ascii[char_pos] &= ~'\x80';
415 }
416 }
417 }
418 }
419
420 {
421 const size_t string_length = arraysize(char16_ascii) - 1;
422 for (size_t offset = 0; offset < 4; ++offset) {
423 for (size_t len = 0, max_len = string_length - offset; len < max_len;
424 ++len) {
425 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
426 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
427 char16_ascii[char_pos] |= 0x80;
428 EXPECT_FALSE(
429 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
430 char16_ascii[char_pos] &= ~0x80;
431 // Also test when the upper half is non-zero.
432 char16_ascii[char_pos] |= 0x100;
433 EXPECT_FALSE(
434 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
435 char16_ascii[char_pos] &= ~0x100;
436 }
437 }
438 }
439 }
440
441 {
442 const size_t string_length = wchar_ascii.length();
443 for (size_t len = 0; len < string_length; ++len) {
444 EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
445 for (size_t char_pos = 0; char_pos < len; ++char_pos) {
446 wchar_ascii[char_pos] |= 0x80;
447 EXPECT_FALSE(
448 IsStringASCII(wchar_ascii.substr(0, len)));
449 wchar_ascii[char_pos] &= ~0x80;
450 wchar_ascii[char_pos] |= 0x100;
451 EXPECT_FALSE(
452 IsStringASCII(wchar_ascii.substr(0, len)));
453 wchar_ascii[char_pos] &= ~0x100;
454 #if defined(WCHAR_T_IS_UTF32)
455 wchar_ascii[char_pos] |= 0x10000;
456 EXPECT_FALSE(
457 IsStringASCII(wchar_ascii.substr(0, len)));
458 wchar_ascii[char_pos] &= ~0x10000;
459 #endif // WCHAR_T_IS_UTF32
460 }
461 }
462 }
463 }
464
TEST(StringUtilTest,ConvertASCII)465 TEST(StringUtilTest, ConvertASCII) {
466 static const char* const char_cases[] = {
467 "Google Video",
468 "Hello, world\n",
469 "0123ABCDwxyz \a\b\t\r\n!+,.~"
470 };
471
472 static const wchar_t* const wchar_cases[] = {
473 L"Google Video",
474 L"Hello, world\n",
475 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
476 };
477
478 for (size_t i = 0; i < arraysize(char_cases); ++i) {
479 EXPECT_TRUE(IsStringASCII(char_cases[i]));
480 string16 utf16 = ASCIIToUTF16(char_cases[i]);
481 EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
482
483 std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
484 EXPECT_EQ(char_cases[i], ascii);
485 }
486
487 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
488
489 // Convert empty strings.
490 string16 empty16;
491 std::string empty;
492 EXPECT_EQ(empty, UTF16ToASCII(empty16));
493 EXPECT_EQ(empty16, ASCIIToUTF16(empty));
494
495 // Convert strings with an embedded NUL character.
496 const char chars_with_nul[] = "test\0string";
497 const int length_with_nul = arraysize(chars_with_nul) - 1;
498 std::string string_with_nul(chars_with_nul, length_with_nul);
499 string16 string16_with_nul = ASCIIToUTF16(string_with_nul);
500 EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),
501 string16_with_nul.length());
502 std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
503 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
504 narrow_with_nul.length());
505 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
506 }
507
TEST(StringUtilTest,ToLowerASCII)508 TEST(StringUtilTest, ToLowerASCII) {
509 EXPECT_EQ('c', ToLowerASCII('C'));
510 EXPECT_EQ('c', ToLowerASCII('c'));
511 EXPECT_EQ('2', ToLowerASCII('2'));
512
513 EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('C')));
514 EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('c')));
515 EXPECT_EQ(static_cast<char16>('2'), ToLowerASCII(static_cast<char16>('2')));
516
517 EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
518 EXPECT_EQ(ASCIIToUTF16("cc2"), ToLowerASCII(ASCIIToUTF16("Cc2")));
519 }
520
TEST(StringUtilTest,ToUpperASCII)521 TEST(StringUtilTest, ToUpperASCII) {
522 EXPECT_EQ('C', ToUpperASCII('C'));
523 EXPECT_EQ('C', ToUpperASCII('c'));
524 EXPECT_EQ('2', ToUpperASCII('2'));
525
526 EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('C')));
527 EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('c')));
528 EXPECT_EQ(static_cast<char16>('2'), ToUpperASCII(static_cast<char16>('2')));
529
530 EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
531 EXPECT_EQ(ASCIIToUTF16("CC2"), ToUpperASCII(ASCIIToUTF16("Cc2")));
532 }
533
TEST(StringUtilTest,LowerCaseEqualsASCII)534 TEST(StringUtilTest, LowerCaseEqualsASCII) {
535 static const struct {
536 const char* src_a;
537 const char* dst;
538 } lowercase_cases[] = {
539 { "FoO", "foo" },
540 { "foo", "foo" },
541 { "FOO", "foo" },
542 };
543
544 for (size_t i = 0; i < arraysize(lowercase_cases); ++i) {
545 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
546 lowercase_cases[i].dst));
547 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
548 lowercase_cases[i].dst));
549 }
550 }
551
TEST(StringUtilTest,FormatBytesUnlocalized)552 TEST(StringUtilTest, FormatBytesUnlocalized) {
553 static const struct {
554 int64_t bytes;
555 const char* expected;
556 } cases[] = {
557 // Expected behavior: we show one post-decimal digit when we have
558 // under two pre-decimal digits, except in cases where it makes no
559 // sense (zero or bytes).
560 // Since we switch units once we cross the 1000 mark, this keeps
561 // the display of file sizes or bytes consistently around three
562 // digits.
563 {0, "0 B"},
564 {512, "512 B"},
565 {1024*1024, "1.0 MB"},
566 {1024*1024*1024, "1.0 GB"},
567 {10LL*1024*1024*1024, "10.0 GB"},
568 {99LL*1024*1024*1024, "99.0 GB"},
569 {105LL*1024*1024*1024, "105 GB"},
570 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
571 {~(1LL << 63), "8192 PB"},
572
573 {99*1024 + 103, "99.1 kB"},
574 {1024*1024 + 103, "1.0 MB"},
575 {1024*1024 + 205 * 1024, "1.2 MB"},
576 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
577 {10LL*1024*1024*1024, "10.0 GB"},
578 {100LL*1024*1024*1024, "100 GB"},
579 };
580
581 for (size_t i = 0; i < arraysize(cases); ++i) {
582 EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
583 FormatBytesUnlocalized(cases[i].bytes));
584 }
585 }
TEST(StringUtilTest,ReplaceSubstringsAfterOffset)586 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
587 static const struct {
588 const char* str;
589 string16::size_type start_offset;
590 const char* find_this;
591 const char* replace_with;
592 const char* expected;
593 } cases[] = {
594 {"aaa", 0, "a", "b", "bbb"},
595 {"abb", 0, "ab", "a", "ab"},
596 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
597 {"Not found", 0, "x", "0", "Not found"},
598 {"Not found again", 5, "x", "0", "Not found again"},
599 {" Making it much longer ", 0, " ", "Four score and seven years ago",
600 "Four score and seven years agoMakingFour score and seven years agoit"
601 "Four score and seven years agomuchFour score and seven years agolonger"
602 "Four score and seven years ago"},
603 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
604 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
605 {"abababab", 2, "ab", "c", "abccc"},
606 };
607
608 for (size_t i = 0; i < arraysize(cases); i++) {
609 string16 str = ASCIIToUTF16(cases[i].str);
610 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
611 ASCIIToUTF16(cases[i].find_this),
612 ASCIIToUTF16(cases[i].replace_with));
613 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
614 }
615 }
616
TEST(StringUtilTest,ReplaceFirstSubstringAfterOffset)617 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
618 static const struct {
619 const char* str;
620 string16::size_type start_offset;
621 const char* find_this;
622 const char* replace_with;
623 const char* expected;
624 } cases[] = {
625 {"aaa", 0, "a", "b", "baa"},
626 {"abb", 0, "ab", "a", "ab"},
627 {"Removing some substrings inging", 0, "ing", "",
628 "Remov some substrings inging"},
629 {"Not found", 0, "x", "0", "Not found"},
630 {"Not found again", 5, "x", "0", "Not found again"},
631 {" Making it much longer ", 0, " ", "Four score and seven years ago",
632 "Four score and seven years agoMaking it much longer "},
633 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
634 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
635 {"abababab", 2, "ab", "c", "abcabab"},
636 };
637
638 for (size_t i = 0; i < arraysize(cases); i++) {
639 string16 str = ASCIIToUTF16(cases[i].str);
640 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
641 ASCIIToUTF16(cases[i].find_this),
642 ASCIIToUTF16(cases[i].replace_with));
643 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
644 }
645 }
646
TEST(StringUtilTest,HexDigitToInt)647 TEST(StringUtilTest, HexDigitToInt) {
648 EXPECT_EQ(0, HexDigitToInt('0'));
649 EXPECT_EQ(1, HexDigitToInt('1'));
650 EXPECT_EQ(2, HexDigitToInt('2'));
651 EXPECT_EQ(3, HexDigitToInt('3'));
652 EXPECT_EQ(4, HexDigitToInt('4'));
653 EXPECT_EQ(5, HexDigitToInt('5'));
654 EXPECT_EQ(6, HexDigitToInt('6'));
655 EXPECT_EQ(7, HexDigitToInt('7'));
656 EXPECT_EQ(8, HexDigitToInt('8'));
657 EXPECT_EQ(9, HexDigitToInt('9'));
658 EXPECT_EQ(10, HexDigitToInt('A'));
659 EXPECT_EQ(11, HexDigitToInt('B'));
660 EXPECT_EQ(12, HexDigitToInt('C'));
661 EXPECT_EQ(13, HexDigitToInt('D'));
662 EXPECT_EQ(14, HexDigitToInt('E'));
663 EXPECT_EQ(15, HexDigitToInt('F'));
664
665 // Verify the lower case as well.
666 EXPECT_EQ(10, HexDigitToInt('a'));
667 EXPECT_EQ(11, HexDigitToInt('b'));
668 EXPECT_EQ(12, HexDigitToInt('c'));
669 EXPECT_EQ(13, HexDigitToInt('d'));
670 EXPECT_EQ(14, HexDigitToInt('e'));
671 EXPECT_EQ(15, HexDigitToInt('f'));
672 }
673
TEST(StringUtilTest,JoinString)674 TEST(StringUtilTest, JoinString) {
675 std::string separator(", ");
676 std::vector<std::string> parts;
677 EXPECT_EQ(std::string(), JoinString(parts, separator));
678
679 parts.push_back(std::string());
680 EXPECT_EQ(std::string(), JoinString(parts, separator));
681 parts.clear();
682
683 parts.push_back("a");
684 EXPECT_EQ("a", JoinString(parts, separator));
685
686 parts.push_back("b");
687 parts.push_back("c");
688 EXPECT_EQ("a, b, c", JoinString(parts, separator));
689
690 parts.push_back(std::string());
691 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
692 parts.push_back(" ");
693 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
694 }
695
TEST(StringUtilTest,JoinString16)696 TEST(StringUtilTest, JoinString16) {
697 string16 separator = ASCIIToUTF16(", ");
698 std::vector<string16> parts;
699 EXPECT_EQ(string16(), JoinString(parts, separator));
700
701 parts.push_back(string16());
702 EXPECT_EQ(string16(), JoinString(parts, separator));
703 parts.clear();
704
705 parts.push_back(ASCIIToUTF16("a"));
706 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
707
708 parts.push_back(ASCIIToUTF16("b"));
709 parts.push_back(ASCIIToUTF16("c"));
710 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
711
712 parts.push_back(ASCIIToUTF16(""));
713 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
714 parts.push_back(ASCIIToUTF16(" "));
715 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
716 }
717
TEST(StringUtilTest,JoinStringPiece)718 TEST(StringUtilTest, JoinStringPiece) {
719 std::string separator(", ");
720 std::vector<StringPiece> parts;
721 EXPECT_EQ(std::string(), JoinString(parts, separator));
722
723 // Test empty first part (https://crbug.com/698073).
724 parts.push_back(StringPiece());
725 EXPECT_EQ(std::string(), JoinString(parts, separator));
726 parts.clear();
727
728 parts.push_back("a");
729 EXPECT_EQ("a", JoinString(parts, separator));
730
731 parts.push_back("b");
732 parts.push_back("c");
733 EXPECT_EQ("a, b, c", JoinString(parts, separator));
734
735 parts.push_back(StringPiece());
736 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
737 parts.push_back(" ");
738 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
739 }
740
TEST(StringUtilTest,JoinStringPiece16)741 TEST(StringUtilTest, JoinStringPiece16) {
742 string16 separator = ASCIIToUTF16(", ");
743 std::vector<StringPiece16> parts;
744 EXPECT_EQ(string16(), JoinString(parts, separator));
745
746 // Test empty first part (https://crbug.com/698073).
747 parts.push_back(StringPiece16());
748 EXPECT_EQ(string16(), JoinString(parts, separator));
749 parts.clear();
750
751 const string16 kA = ASCIIToUTF16("a");
752 parts.push_back(kA);
753 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
754
755 const string16 kB = ASCIIToUTF16("b");
756 parts.push_back(kB);
757 const string16 kC = ASCIIToUTF16("c");
758 parts.push_back(kC);
759 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
760
761 parts.push_back(StringPiece16());
762 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
763 const string16 kSpace = ASCIIToUTF16(" ");
764 parts.push_back(kSpace);
765 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
766 }
767
TEST(StringUtilTest,JoinStringInitializerList)768 TEST(StringUtilTest, JoinStringInitializerList) {
769 std::string separator(", ");
770 EXPECT_EQ(std::string(), JoinString({}, separator));
771
772 // Test empty first part (https://crbug.com/698073).
773 EXPECT_EQ(std::string(), JoinString({StringPiece()}, separator));
774
775 // With const char*s.
776 EXPECT_EQ("a", JoinString({"a"}, separator));
777 EXPECT_EQ("a, b, c", JoinString({"a", "b", "c"}, separator));
778 EXPECT_EQ("a, b, c, ", JoinString({"a", "b", "c", StringPiece()}, separator));
779 EXPECT_EQ("a|b|c|| ", JoinString({"a", "b", "c", StringPiece(), " "}, "|"));
780
781 // With std::strings.
782 const std::string kA = "a";
783 const std::string kB = "b";
784 EXPECT_EQ("a, b", JoinString({kA, kB}, separator));
785
786 // With StringPieces.
787 const StringPiece kPieceA = kA;
788 const StringPiece kPieceB = kB;
789 EXPECT_EQ("a, b", JoinString({kPieceA, kPieceB}, separator));
790 }
791
TEST(StringUtilTest,JoinStringInitializerList16)792 TEST(StringUtilTest, JoinStringInitializerList16) {
793 string16 separator = ASCIIToUTF16(", ");
794 EXPECT_EQ(string16(), JoinString({}, separator));
795
796 // Test empty first part (https://crbug.com/698073).
797 EXPECT_EQ(string16(), JoinString({StringPiece16()}, separator));
798
799 // With string16s.
800 const string16 kA = ASCIIToUTF16("a");
801 EXPECT_EQ(ASCIIToUTF16("a"), JoinString({kA}, separator));
802
803 const string16 kB = ASCIIToUTF16("b");
804 const string16 kC = ASCIIToUTF16("c");
805 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString({kA, kB, kC}, separator));
806
807 EXPECT_EQ(ASCIIToUTF16("a, b, c, "),
808 JoinString({kA, kB, kC, StringPiece16()}, separator));
809 const string16 kSpace = ASCIIToUTF16(" ");
810 EXPECT_EQ(
811 ASCIIToUTF16("a|b|c|| "),
812 JoinString({kA, kB, kC, StringPiece16(), kSpace}, ASCIIToUTF16("|")));
813
814 // With StringPiece16s.
815 const StringPiece16 kPieceA = kA;
816 const StringPiece16 kPieceB = kB;
817 EXPECT_EQ(ASCIIToUTF16("a, b"), JoinString({kPieceA, kPieceB}, separator));
818 }
819
TEST(StringUtilTest,StartsWith)820 TEST(StringUtilTest, StartsWith) {
821 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
822 base::CompareCase::SENSITIVE));
823 EXPECT_FALSE(StartsWith("JavaScript:url", "javascript",
824 base::CompareCase::SENSITIVE));
825 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
826 base::CompareCase::INSENSITIVE_ASCII));
827 EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",
828 base::CompareCase::INSENSITIVE_ASCII));
829 EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));
830 EXPECT_FALSE(StartsWith("java", "javascript",
831 base::CompareCase::INSENSITIVE_ASCII));
832 EXPECT_FALSE(StartsWith(std::string(), "javascript",
833 base::CompareCase::INSENSITIVE_ASCII));
834 EXPECT_FALSE(StartsWith(std::string(), "javascript",
835 base::CompareCase::SENSITIVE));
836 EXPECT_TRUE(StartsWith("java", std::string(),
837 base::CompareCase::INSENSITIVE_ASCII));
838 EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));
839
840 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
841 ASCIIToUTF16("javascript"),
842 base::CompareCase::SENSITIVE));
843 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
844 ASCIIToUTF16("javascript"),
845 base::CompareCase::SENSITIVE));
846 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
847 ASCIIToUTF16("javascript"),
848 base::CompareCase::INSENSITIVE_ASCII));
849 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
850 ASCIIToUTF16("javascript"),
851 base::CompareCase::INSENSITIVE_ASCII));
852 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
853 base::CompareCase::SENSITIVE));
854 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
855 base::CompareCase::INSENSITIVE_ASCII));
856 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
857 base::CompareCase::INSENSITIVE_ASCII));
858 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
859 base::CompareCase::SENSITIVE));
860 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
861 base::CompareCase::INSENSITIVE_ASCII));
862 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
863 base::CompareCase::SENSITIVE));
864 }
865
TEST(StringUtilTest,EndsWith)866 TEST(StringUtilTest, EndsWith) {
867 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
868 base::CompareCase::SENSITIVE));
869 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
870 base::CompareCase::SENSITIVE));
871 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
872 base::CompareCase::INSENSITIVE_ASCII));
873 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
874 base::CompareCase::INSENSITIVE_ASCII));
875 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
876 base::CompareCase::SENSITIVE));
877 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
878 base::CompareCase::INSENSITIVE_ASCII));
879 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
880 base::CompareCase::SENSITIVE));
881 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
882 base::CompareCase::INSENSITIVE_ASCII));
883 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
884 base::CompareCase::INSENSITIVE_ASCII));
885 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
886 base::CompareCase::SENSITIVE));
887 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
888 base::CompareCase::INSENSITIVE_ASCII));
889 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
890 base::CompareCase::SENSITIVE));
891 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
892 base::CompareCase::INSENSITIVE_ASCII));
893 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
894 base::CompareCase::SENSITIVE));
895 EXPECT_TRUE(
896 EndsWith(string16(), string16(), base::CompareCase::INSENSITIVE_ASCII));
897 EXPECT_TRUE(EndsWith(string16(), string16(), base::CompareCase::SENSITIVE));
898 }
899
TEST(StringUtilTest,GetStringFWithOffsets)900 TEST(StringUtilTest, GetStringFWithOffsets) {
901 std::vector<string16> subst;
902 subst.push_back(ASCIIToUTF16("1"));
903 subst.push_back(ASCIIToUTF16("2"));
904 std::vector<size_t> offsets;
905
906 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
907 subst,
908 &offsets);
909 EXPECT_EQ(2U, offsets.size());
910 EXPECT_EQ(7U, offsets[0]);
911 EXPECT_EQ(25U, offsets[1]);
912 offsets.clear();
913
914 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
915 subst,
916 &offsets);
917 EXPECT_EQ(2U, offsets.size());
918 EXPECT_EQ(25U, offsets[0]);
919 EXPECT_EQ(7U, offsets[1]);
920 offsets.clear();
921 }
922
TEST(StringUtilTest,ReplaceStringPlaceholdersTooFew)923 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
924 // Test whether replacestringplaceholders works as expected when there
925 // are fewer inputs than outputs.
926 std::vector<string16> subst;
927 subst.push_back(ASCIIToUTF16("9a"));
928 subst.push_back(ASCIIToUTF16("8b"));
929 subst.push_back(ASCIIToUTF16("7c"));
930
931 string16 formatted =
932 ReplaceStringPlaceholders(
933 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, nullptr);
934
935 EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"), formatted);
936 }
937
TEST(StringUtilTest,ReplaceStringPlaceholders)938 TEST(StringUtilTest, ReplaceStringPlaceholders) {
939 std::vector<string16> subst;
940 subst.push_back(ASCIIToUTF16("9a"));
941 subst.push_back(ASCIIToUTF16("8b"));
942 subst.push_back(ASCIIToUTF16("7c"));
943 subst.push_back(ASCIIToUTF16("6d"));
944 subst.push_back(ASCIIToUTF16("5e"));
945 subst.push_back(ASCIIToUTF16("4f"));
946 subst.push_back(ASCIIToUTF16("3g"));
947 subst.push_back(ASCIIToUTF16("2h"));
948 subst.push_back(ASCIIToUTF16("1i"));
949
950 string16 formatted =
951 ReplaceStringPlaceholders(
952 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, nullptr);
953
954 EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"), formatted);
955 }
956
TEST(StringUtilTest,ReplaceStringPlaceholdersOneDigit)957 TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) {
958 std::vector<string16> subst;
959 subst.push_back(ASCIIToUTF16("1a"));
960 string16 formatted =
961 ReplaceStringPlaceholders(ASCIIToUTF16(" $16 "), subst, nullptr);
962 EXPECT_EQ(ASCIIToUTF16(" 1a6 "), formatted);
963 }
964
TEST(StringUtilTest,ReplaceStringPlaceholdersInvalidPlaceholder)965 TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) {
966 std::vector<string16> subst;
967 subst.push_back(ASCIIToUTF16("1a"));
968 string16 formatted =
969 ReplaceStringPlaceholders(ASCIIToUTF16("+$-+$A+$1+"), subst, nullptr);
970 EXPECT_EQ(ASCIIToUTF16("+++1a+"), formatted);
971 }
972
TEST(StringUtilTest,StdStringReplaceStringPlaceholders)973 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
974 std::vector<std::string> subst;
975 subst.push_back("9a");
976 subst.push_back("8b");
977 subst.push_back("7c");
978 subst.push_back("6d");
979 subst.push_back("5e");
980 subst.push_back("4f");
981 subst.push_back("3g");
982 subst.push_back("2h");
983 subst.push_back("1i");
984
985 std::string formatted =
986 ReplaceStringPlaceholders(
987 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
988
989 EXPECT_EQ("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
990 }
991
TEST(StringUtilTest,ReplaceStringPlaceholdersConsecutiveDollarSigns)992 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
993 std::vector<std::string> subst;
994 subst.push_back("a");
995 subst.push_back("b");
996 subst.push_back("c");
997 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, nullptr),
998 "$1 $$2 $$$3");
999 }
1000
TEST(StringUtilTest,LcpyTest)1001 TEST(StringUtilTest, LcpyTest) {
1002 // Test the normal case where we fit in our buffer.
1003 {
1004 char dst[10];
1005 wchar_t wdst[10];
1006 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1007 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1008 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1009 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1010 }
1011
1012 // Test dst_size == 0, nothing should be written to |dst| and we should
1013 // have the equivalent of strlen(src).
1014 {
1015 char dst[2] = {1, 2};
1016 wchar_t wdst[2] = {1, 2};
1017 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
1018 EXPECT_EQ(1, dst[0]);
1019 EXPECT_EQ(2, dst[1]);
1020 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
1021 EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1022 EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
1023 }
1024
1025 // Test the case were we _just_ competely fit including the null.
1026 {
1027 char dst[8];
1028 wchar_t wdst[8];
1029 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1030 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1031 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1032 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1033 }
1034
1035 // Test the case were we we are one smaller, so we can't fit the null.
1036 {
1037 char dst[7];
1038 wchar_t wdst[7];
1039 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1040 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1041 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1042 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1043 }
1044
1045 // Test the case were we are just too small.
1046 {
1047 char dst[3];
1048 wchar_t wdst[3];
1049 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1050 EXPECT_EQ(0, memcmp(dst, "ab", 3));
1051 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1052 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1053 }
1054 }
1055
TEST(StringUtilTest,WprintfFormatPortabilityTest)1056 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1057 static const struct {
1058 const wchar_t* input;
1059 bool portable;
1060 } cases[] = {
1061 { L"%ls", true },
1062 { L"%s", false },
1063 { L"%S", false },
1064 { L"%lS", false },
1065 { L"Hello, %s", false },
1066 { L"%lc", true },
1067 { L"%c", false },
1068 { L"%C", false },
1069 { L"%lC", false },
1070 { L"%ls %s", false },
1071 { L"%s %ls", false },
1072 { L"%s %ls %s", false },
1073 { L"%f", true },
1074 { L"%f %F", false },
1075 { L"%d %D", false },
1076 { L"%o %O", false },
1077 { L"%u %U", false },
1078 { L"%f %d %o %u", true },
1079 { L"%-8d (%02.1f%)", true },
1080 { L"% 10s", false },
1081 { L"% 10ls", true }
1082 };
1083 for (size_t i = 0; i < arraysize(cases); ++i)
1084 EXPECT_EQ(cases[i].portable, IsWprintfFormatPortable(cases[i].input));
1085 }
1086
TEST(StringUtilTest,RemoveChars)1087 TEST(StringUtilTest, RemoveChars) {
1088 const char kRemoveChars[] = "-/+*";
1089 std::string input = "A-+bc/d!*";
1090 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1091 EXPECT_EQ("Abcd!", input);
1092
1093 // No characters match kRemoveChars.
1094 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1095 EXPECT_EQ("Abcd!", input);
1096
1097 // Empty string.
1098 input.clear();
1099 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1100 EXPECT_EQ(std::string(), input);
1101 }
1102
TEST(StringUtilTest,ReplaceChars)1103 TEST(StringUtilTest, ReplaceChars) {
1104 struct TestData {
1105 const char* input;
1106 const char* replace_chars;
1107 const char* replace_with;
1108 const char* output;
1109 bool result;
1110 } cases[] = {
1111 { "", "", "", "", false },
1112 { "test", "", "", "test", false },
1113 { "test", "", "!", "test", false },
1114 { "test", "z", "!", "test", false },
1115 { "test", "e", "!", "t!st", true },
1116 { "test", "e", "!?", "t!?st", true },
1117 { "test", "ez", "!", "t!st", true },
1118 { "test", "zed", "!?", "t!?st", true },
1119 { "test", "t", "!?", "!?es!?", true },
1120 { "test", "et", "!>", "!>!>s!>", true },
1121 { "test", "zest", "!", "!!!!", true },
1122 { "test", "szt", "!", "!e!!", true },
1123 { "test", "t", "test", "testestest", true },
1124 };
1125
1126 for (size_t i = 0; i < arraysize(cases); ++i) {
1127 std::string output;
1128 bool result = ReplaceChars(cases[i].input,
1129 cases[i].replace_chars,
1130 cases[i].replace_with,
1131 &output);
1132 EXPECT_EQ(cases[i].result, result);
1133 EXPECT_EQ(cases[i].output, output);
1134 }
1135 }
1136
TEST(StringUtilTest,ContainsOnlyChars)1137 TEST(StringUtilTest, ContainsOnlyChars) {
1138 // Providing an empty list of characters should return false but for the empty
1139 // string.
1140 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1141 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1142
1143 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1144 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1145 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1146 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1147 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1148
1149 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1150 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1151 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1152 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII));
1153 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1154 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII));
1155
1156 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1157 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1158 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1159 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16));
1160 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1161 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),
1162 kWhitespaceUTF16));
1163 }
1164
TEST(StringUtilTest,CompareCaseInsensitiveASCII)1165 TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
1166 EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));
1167 EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));
1168
1169 // Differing lengths.
1170 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));
1171 EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));
1172
1173 // Differing values.
1174 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
1175 EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
1176 }
1177
TEST(StringUtilTest,EqualsCaseInsensitiveASCII)1178 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
1179 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));
1180 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));
1181 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));
1182 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));
1183 }
1184
TEST(StringUtilTest,IsUnicodeWhitespace)1185 TEST(StringUtilTest, IsUnicodeWhitespace) {
1186 // NOT unicode white space.
1187 EXPECT_FALSE(IsUnicodeWhitespace(L'\0'));
1188 EXPECT_FALSE(IsUnicodeWhitespace(L'A'));
1189 EXPECT_FALSE(IsUnicodeWhitespace(L'0'));
1190 EXPECT_FALSE(IsUnicodeWhitespace(L'.'));
1191 EXPECT_FALSE(IsUnicodeWhitespace(L';'));
1192 EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100'));
1193
1194 // Actual unicode whitespace.
1195 EXPECT_TRUE(IsUnicodeWhitespace(L' '));
1196 EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0'));
1197 EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000'));
1198 EXPECT_TRUE(IsUnicodeWhitespace(L'\t'));
1199 EXPECT_TRUE(IsUnicodeWhitespace(L'\r'));
1200 EXPECT_TRUE(IsUnicodeWhitespace(L'\v'));
1201 EXPECT_TRUE(IsUnicodeWhitespace(L'\f'));
1202 EXPECT_TRUE(IsUnicodeWhitespace(L'\n'));
1203 }
1204
1205 class WriteIntoTest : public testing::Test {
1206 protected:
WritesCorrectly(size_t num_chars)1207 static void WritesCorrectly(size_t num_chars) {
1208 std::string buffer;
1209 char kOriginal[] = "supercali";
1210 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1211 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1212 // string at the first \0.
1213 EXPECT_EQ(std::string(kOriginal,
1214 std::min(num_chars, arraysize(kOriginal) - 1)),
1215 std::string(buffer.c_str()));
1216 EXPECT_EQ(num_chars, buffer.size());
1217 }
1218 };
1219
TEST_F(WriteIntoTest,WriteInto)1220 TEST_F(WriteIntoTest, WriteInto) {
1221 // Validate that WriteInto reserves enough space and
1222 // sizes a string correctly.
1223 WritesCorrectly(1);
1224 WritesCorrectly(2);
1225 WritesCorrectly(5000);
1226
1227 // Validate that WriteInto doesn't modify other strings
1228 // when using a Copy-on-Write implementation.
1229 const char kLive[] = "live";
1230 const char kDead[] = "dead";
1231 const std::string live = kLive;
1232 std::string dead = live;
1233 strncpy(WriteInto(&dead, 5), kDead, 4);
1234 EXPECT_EQ(kDead, dead);
1235 EXPECT_EQ(4u, dead.size());
1236 EXPECT_EQ(kLive, live);
1237 EXPECT_EQ(4u, live.size());
1238 }
1239
1240 } // namespace base
1241