1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/string_util.h"
6
7 #include <math.h>
8 #include <stdarg.h>
9 #include <stddef.h>
10 #include <stdint.h>
11
12 #include <algorithm>
13
14 #include "base/macros.h"
15 #include "base/strings/string16.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "testing/gmock/include/gmock/gmock.h"
18 #include "testing/gtest/include/gtest/gtest.h"
19
20 using ::testing::ElementsAre;
21
22 namespace base {
23
24 static const struct trim_case {
25 const wchar_t* input;
26 const TrimPositions positions;
27 const wchar_t* output;
28 const TrimPositions return_value;
29 } trim_cases[] = {
30 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
31 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
32 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
33 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
34 {L"", TRIM_ALL, L"", TRIM_NONE},
35 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
36 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
37 {L" ", TRIM_ALL, L"", TRIM_ALL},
38 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
39 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
40 };
41
42 static const struct trim_case_ascii {
43 const char* input;
44 const TrimPositions positions;
45 const char* output;
46 const TrimPositions return_value;
47 } trim_cases_ascii[] = {
48 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
49 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
50 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
51 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
52 {"", TRIM_ALL, "", TRIM_NONE},
53 {" ", TRIM_LEADING, "", TRIM_LEADING},
54 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
55 {" ", TRIM_ALL, "", TRIM_ALL},
56 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
57 };
58
59 namespace {
60
61 // Helper used to test TruncateUTF8ToByteSize.
Truncated(const std::string & input,const size_t byte_size,std::string * output)62 bool Truncated(const std::string& input,
63 const size_t byte_size,
64 std::string* output) {
65 size_t prev = input.length();
66 TruncateUTF8ToByteSize(input, byte_size, output);
67 return prev != output->length();
68 }
69
70 } // namespace
71
TEST(StringUtilTest,TruncateUTF8ToByteSize)72 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
73 std::string output;
74
75 // Empty strings and invalid byte_size arguments
76 EXPECT_FALSE(Truncated(std::string(), 0, &output));
77 EXPECT_EQ(output, "");
78 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
79 EXPECT_EQ(output, "");
80 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
81 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
82
83 // Testing the truncation of valid UTF8 correctly
84 EXPECT_TRUE(Truncated("abc", 2, &output));
85 EXPECT_EQ(output, "ab");
86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
87 EXPECT_EQ(output.compare("\xc2\x81"), 0);
88 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
89 EXPECT_EQ(output.compare("\xc2\x81"), 0);
90 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
91 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
92
93 {
94 const char array[] = "\x00\x00\xc2\x81\xc2\x81";
95 const std::string array_string(array, arraysize(array));
96 EXPECT_TRUE(Truncated(array_string, 4, &output));
97 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
98 }
99
100 {
101 const char array[] = "\x00\xc2\x81\xc2\x81";
102 const std::string array_string(array, arraysize(array));
103 EXPECT_TRUE(Truncated(array_string, 4, &output));
104 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
105 }
106
107 // Testing invalid UTF8
108 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
109 EXPECT_EQ(output.compare(""), 0);
110 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
111 EXPECT_EQ(output.compare(""), 0);
112 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
113 EXPECT_EQ(output.compare(""), 0);
114
115 // Testing invalid UTF8 mixed with valid UTF8
116 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
117 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
118 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
119 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
120 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
121 10, &output));
122 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
123 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
124 10, &output));
125 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
126 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
127 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
128
129 // Overlong sequences
130 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
131 EXPECT_EQ(output.compare(""), 0);
132 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
133 EXPECT_EQ(output.compare(""), 0);
134 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
135 EXPECT_EQ(output.compare(""), 0);
136 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
137 EXPECT_EQ(output.compare(""), 0);
138 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
139 EXPECT_EQ(output.compare(""), 0);
140 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
141 EXPECT_EQ(output.compare(""), 0);
142 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
143 EXPECT_EQ(output.compare(""), 0);
144 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
145 EXPECT_EQ(output.compare(""), 0);
146 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
147 EXPECT_EQ(output.compare(""), 0);
148 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
149 EXPECT_EQ(output.compare(""), 0);
150 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
151 EXPECT_EQ(output.compare(""), 0);
152
153 // Beyond U+10FFFF (the upper limit of Unicode codespace)
154 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
155 EXPECT_EQ(output.compare(""), 0);
156 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
157 EXPECT_EQ(output.compare(""), 0);
158 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
159 EXPECT_EQ(output.compare(""), 0);
160
161 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
162 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
163 EXPECT_EQ(output.compare(""), 0);
164 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
165 EXPECT_EQ(output.compare(""), 0);
166
167 {
168 const char array[] = "\x00\x00\xfe\xff";
169 const std::string array_string(array, arraysize(array));
170 EXPECT_TRUE(Truncated(array_string, 4, &output));
171 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
172 }
173
174 // Variants on the previous test
175 {
176 const char array[] = "\xff\xfe\x00\x00";
177 const std::string array_string(array, 4);
178 EXPECT_FALSE(Truncated(array_string, 4, &output));
179 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
180 }
181 {
182 const char array[] = "\xff\x00\x00\xfe";
183 const std::string array_string(array, arraysize(array));
184 EXPECT_TRUE(Truncated(array_string, 4, &output));
185 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
186 }
187
188 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
189 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
190 EXPECT_EQ(output.compare(""), 0);
191 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
192 EXPECT_EQ(output.compare(""), 0);
193 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
194 EXPECT_EQ(output.compare(""), 0);
195 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
196 EXPECT_EQ(output.compare(""), 0);
197 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
198 EXPECT_EQ(output.compare(""), 0);
199
200 // Strings in legacy encodings that are valid in UTF-8, but
201 // are invalid as UTF-8 in real data.
202 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
203 EXPECT_EQ(output.compare("caf"), 0);
204 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
205 EXPECT_EQ(output.compare(""), 0);
206 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
207 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
208 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
209 &output));
210 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
211
212 // Testing using the same string as input and output.
213 EXPECT_FALSE(Truncated(output, 4, &output));
214 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
215 EXPECT_TRUE(Truncated(output, 3, &output));
216 EXPECT_EQ(output.compare("\xa7\x41"), 0);
217
218 // "abc" with U+201[CD] in windows-125[0-8]
219 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
220 EXPECT_EQ(output.compare("\x93" "abc"), 0);
221
222 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
223 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
224 EXPECT_EQ(output.compare(""), 0);
225
226 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
227 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
228 EXPECT_EQ(output.compare(""), 0);
229 }
230
TEST(StringUtilTest,TrimWhitespace)231 TEST(StringUtilTest, TrimWhitespace) {
232 string16 output; // Allow contents to carry over to next testcase
233 for (size_t i = 0; i < arraysize(trim_cases); ++i) {
234 const trim_case& value = trim_cases[i];
235 EXPECT_EQ(value.return_value,
236 TrimWhitespace(WideToUTF16(value.input), value.positions,
237 &output));
238 EXPECT_EQ(WideToUTF16(value.output), output);
239 }
240
241 // Test that TrimWhitespace() can take the same string for input and output
242 output = ASCIIToUTF16(" This is a test \r\n");
243 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
244 EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
245
246 // Once more, but with a string of whitespace
247 output = ASCIIToUTF16(" \r\n");
248 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
249 EXPECT_EQ(string16(), output);
250
251 std::string output_ascii;
252 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
253 const trim_case_ascii& value = trim_cases_ascii[i];
254 EXPECT_EQ(value.return_value,
255 TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
256 EXPECT_EQ(value.output, output_ascii);
257 }
258 }
259
260 static const struct collapse_case {
261 const wchar_t* input;
262 const bool trim;
263 const wchar_t* output;
264 } collapse_cases[] = {
265 {L" Google Video ", false, L"Google Video"},
266 {L"Google Video", false, L"Google Video"},
267 {L"", false, L""},
268 {L" ", false, L""},
269 {L"\t\rTest String\n", false, L"Test String"},
270 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
271 {L" Test \n \t String ", false, L"Test String"},
272 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
273 {L" Test String", false, L"Test String"},
274 {L"Test String ", false, L"Test String"},
275 {L"Test String", false, L"Test String"},
276 {L"", true, L""},
277 {L"\n", true, L""},
278 {L" \r ", true, L""},
279 {L"\nFoo", true, L"Foo"},
280 {L"\r Foo ", true, L"Foo"},
281 {L" Foo bar ", true, L"Foo bar"},
282 {L" \tFoo bar \n", true, L"Foo bar"},
283 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
284 };
285
TEST(StringUtilTest,CollapseWhitespace)286 TEST(StringUtilTest, CollapseWhitespace) {
287 for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
288 const collapse_case& value = collapse_cases[i];
289 EXPECT_EQ(WideToUTF16(value.output),
290 CollapseWhitespace(WideToUTF16(value.input), value.trim));
291 }
292 }
293
294 static const struct collapse_case_ascii {
295 const char* input;
296 const bool trim;
297 const char* output;
298 } collapse_cases_ascii[] = {
299 {" Google Video ", false, "Google Video"},
300 {"Google Video", false, "Google Video"},
301 {"", false, ""},
302 {" ", false, ""},
303 {"\t\rTest String\n", false, "Test String"},
304 {" Test \n \t String ", false, "Test String"},
305 {" Test String", false, "Test String"},
306 {"Test String ", false, "Test String"},
307 {"Test String", false, "Test String"},
308 {"", true, ""},
309 {"\n", true, ""},
310 {" \r ", true, ""},
311 {"\nFoo", true, "Foo"},
312 {"\r Foo ", true, "Foo"},
313 {" Foo bar ", true, "Foo bar"},
314 {" \tFoo bar \n", true, "Foo bar"},
315 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
316 };
317
TEST(StringUtilTest,CollapseWhitespaceASCII)318 TEST(StringUtilTest, CollapseWhitespaceASCII) {
319 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
320 const collapse_case_ascii& value = collapse_cases_ascii[i];
321 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
322 }
323 }
324
TEST(StringUtilTest,IsStringUTF8)325 TEST(StringUtilTest, IsStringUTF8) {
326 EXPECT_TRUE(IsStringUTF8("abc"));
327 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
328 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
329 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
330 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
331 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
332
333 // surrogate code points
334 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
335 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
336 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
337
338 // overlong sequences
339 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
340 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
341 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
342 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
343 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
344 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
345 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
346 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
347 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
348 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
349 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
350
351 // Beyond U+10FFFF (the upper limit of Unicode codespace)
352 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
353 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
354 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
355
356 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
357 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
358 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
359 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
360 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
361
362 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
363 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
364 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
365 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
366 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
367 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
368 // Strings in legacy encodings. We can certainly make up strings
369 // in a legacy encoding that are valid in UTF-8, but in real data,
370 // most of them are invalid as UTF-8.
371 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
372 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
373 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
374 // "abc" with U+201[CD] in windows-125[0-8]
375 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
376 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
377 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
378 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
379 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
380
381 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
382 // representation, and the second uses a 2-byte sequence. The second version
383 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
384 // given codepoint must be used.
385 static const char kEmbeddedNull[] = "embedded\0null";
386 EXPECT_TRUE(IsStringUTF8(
387 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
388 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
389 }
390
TEST(StringUtilTest,IsStringASCII)391 TEST(StringUtilTest, IsStringASCII) {
392 static char char_ascii[] =
393 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
394 static char16 char16_ascii[] = {
395 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
396 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
397 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
398 static std::wstring wchar_ascii(
399 L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
400
401 // Test a variety of the fragment start positions and lengths in order to make
402 // sure that bit masking in IsStringASCII works correctly.
403 // Also, test that a non-ASCII character will be detected regardless of its
404 // position inside the string.
405 {
406 const size_t string_length = arraysize(char_ascii) - 1;
407 for (size_t offset = 0; offset < 8; ++offset) {
408 for (size_t len = 0, max_len = string_length - offset; len < max_len;
409 ++len) {
410 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
411 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
412 char_ascii[char_pos] |= '\x80';
413 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
414 char_ascii[char_pos] &= ~'\x80';
415 }
416 }
417 }
418 }
419
420 {
421 const size_t string_length = arraysize(char16_ascii) - 1;
422 for (size_t offset = 0; offset < 4; ++offset) {
423 for (size_t len = 0, max_len = string_length - offset; len < max_len;
424 ++len) {
425 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
426 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
427 char16_ascii[char_pos] |= 0x80;
428 EXPECT_FALSE(
429 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
430 char16_ascii[char_pos] &= ~0x80;
431 // Also test when the upper half is non-zero.
432 char16_ascii[char_pos] |= 0x100;
433 EXPECT_FALSE(
434 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
435 char16_ascii[char_pos] &= ~0x100;
436 }
437 }
438 }
439 }
440
441 {
442 const size_t string_length = wchar_ascii.length();
443 for (size_t len = 0; len < string_length; ++len) {
444 EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
445 for (size_t char_pos = 0; char_pos < len; ++char_pos) {
446 wchar_ascii[char_pos] |= 0x80;
447 EXPECT_FALSE(
448 IsStringASCII(wchar_ascii.substr(0, len)));
449 wchar_ascii[char_pos] &= ~0x80;
450 wchar_ascii[char_pos] |= 0x100;
451 EXPECT_FALSE(
452 IsStringASCII(wchar_ascii.substr(0, len)));
453 wchar_ascii[char_pos] &= ~0x100;
454 #if defined(WCHAR_T_IS_UTF32)
455 wchar_ascii[char_pos] |= 0x10000;
456 EXPECT_FALSE(
457 IsStringASCII(wchar_ascii.substr(0, len)));
458 wchar_ascii[char_pos] &= ~0x10000;
459 #endif // WCHAR_T_IS_UTF32
460 }
461 }
462 }
463 }
464
TEST(StringUtilTest,ConvertASCII)465 TEST(StringUtilTest, ConvertASCII) {
466 static const char* const char_cases[] = {
467 "Google Video",
468 "Hello, world\n",
469 "0123ABCDwxyz \a\b\t\r\n!+,.~"
470 };
471
472 static const wchar_t* const wchar_cases[] = {
473 L"Google Video",
474 L"Hello, world\n",
475 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
476 };
477
478 for (size_t i = 0; i < arraysize(char_cases); ++i) {
479 EXPECT_TRUE(IsStringASCII(char_cases[i]));
480 string16 utf16 = ASCIIToUTF16(char_cases[i]);
481 EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
482
483 std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
484 EXPECT_EQ(char_cases[i], ascii);
485 }
486
487 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
488
489 // Convert empty strings.
490 string16 empty16;
491 std::string empty;
492 EXPECT_EQ(empty, UTF16ToASCII(empty16));
493 EXPECT_EQ(empty16, ASCIIToUTF16(empty));
494
495 // Convert strings with an embedded NUL character.
496 const char chars_with_nul[] = "test\0string";
497 const int length_with_nul = arraysize(chars_with_nul) - 1;
498 std::string string_with_nul(chars_with_nul, length_with_nul);
499 string16 string16_with_nul = ASCIIToUTF16(string_with_nul);
500 EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),
501 string16_with_nul.length());
502 std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
503 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
504 narrow_with_nul.length());
505 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
506 }
507
TEST(StringUtilTest,ToLowerASCII)508 TEST(StringUtilTest, ToLowerASCII) {
509 EXPECT_EQ('c', ToLowerASCII('C'));
510 EXPECT_EQ('c', ToLowerASCII('c'));
511 EXPECT_EQ('2', ToLowerASCII('2'));
512
513 EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('C')));
514 EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('c')));
515 EXPECT_EQ(static_cast<char16>('2'), ToLowerASCII(static_cast<char16>('2')));
516
517 EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
518 EXPECT_EQ(ASCIIToUTF16("cc2"), ToLowerASCII(ASCIIToUTF16("Cc2")));
519 }
520
TEST(StringUtilTest,ToUpperASCII)521 TEST(StringUtilTest, ToUpperASCII) {
522 EXPECT_EQ('C', ToUpperASCII('C'));
523 EXPECT_EQ('C', ToUpperASCII('c'));
524 EXPECT_EQ('2', ToUpperASCII('2'));
525
526 EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('C')));
527 EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('c')));
528 EXPECT_EQ(static_cast<char16>('2'), ToUpperASCII(static_cast<char16>('2')));
529
530 EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
531 EXPECT_EQ(ASCIIToUTF16("CC2"), ToUpperASCII(ASCIIToUTF16("Cc2")));
532 }
533
TEST(StringUtilTest,LowerCaseEqualsASCII)534 TEST(StringUtilTest, LowerCaseEqualsASCII) {
535 static const struct {
536 const char* src_a;
537 const char* dst;
538 } lowercase_cases[] = {
539 { "FoO", "foo" },
540 { "foo", "foo" },
541 { "FOO", "foo" },
542 };
543
544 for (size_t i = 0; i < arraysize(lowercase_cases); ++i) {
545 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
546 lowercase_cases[i].dst));
547 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
548 lowercase_cases[i].dst));
549 }
550 }
551
TEST(StringUtilTest,FormatBytesUnlocalized)552 TEST(StringUtilTest, FormatBytesUnlocalized) {
553 static const struct {
554 int64_t bytes;
555 const char* expected;
556 } cases[] = {
557 // Expected behavior: we show one post-decimal digit when we have
558 // under two pre-decimal digits, except in cases where it makes no
559 // sense (zero or bytes).
560 // Since we switch units once we cross the 1000 mark, this keeps
561 // the display of file sizes or bytes consistently around three
562 // digits.
563 {0, "0 B"},
564 {512, "512 B"},
565 {1024*1024, "1.0 MB"},
566 {1024*1024*1024, "1.0 GB"},
567 {10LL*1024*1024*1024, "10.0 GB"},
568 {99LL*1024*1024*1024, "99.0 GB"},
569 {105LL*1024*1024*1024, "105 GB"},
570 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
571 {~(1LL << 63), "8192 PB"},
572
573 {99*1024 + 103, "99.1 kB"},
574 {1024*1024 + 103, "1.0 MB"},
575 {1024*1024 + 205 * 1024, "1.2 MB"},
576 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
577 {10LL*1024*1024*1024, "10.0 GB"},
578 {100LL*1024*1024*1024, "100 GB"},
579 };
580
581 for (size_t i = 0; i < arraysize(cases); ++i) {
582 EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
583 FormatBytesUnlocalized(cases[i].bytes));
584 }
585 }
TEST(StringUtilTest,ReplaceSubstringsAfterOffset)586 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
587 static const struct {
588 const char* str;
589 string16::size_type start_offset;
590 const char* find_this;
591 const char* replace_with;
592 const char* expected;
593 } cases[] = {
594 {"aaa", 0, "a", "b", "bbb"},
595 {"abb", 0, "ab", "a", "ab"},
596 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
597 {"Not found", 0, "x", "0", "Not found"},
598 {"Not found again", 5, "x", "0", "Not found again"},
599 {" Making it much longer ", 0, " ", "Four score and seven years ago",
600 "Four score and seven years agoMakingFour score and seven years agoit"
601 "Four score and seven years agomuchFour score and seven years agolonger"
602 "Four score and seven years ago"},
603 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
604 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
605 {"abababab", 2, "ab", "c", "abccc"},
606 };
607
608 for (size_t i = 0; i < arraysize(cases); i++) {
609 string16 str = ASCIIToUTF16(cases[i].str);
610 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
611 ASCIIToUTF16(cases[i].find_this),
612 ASCIIToUTF16(cases[i].replace_with));
613 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
614 }
615 }
616
TEST(StringUtilTest,ReplaceFirstSubstringAfterOffset)617 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
618 static const struct {
619 const char* str;
620 string16::size_type start_offset;
621 const char* find_this;
622 const char* replace_with;
623 const char* expected;
624 } cases[] = {
625 {"aaa", 0, "a", "b", "baa"},
626 {"abb", 0, "ab", "a", "ab"},
627 {"Removing some substrings inging", 0, "ing", "",
628 "Remov some substrings inging"},
629 {"Not found", 0, "x", "0", "Not found"},
630 {"Not found again", 5, "x", "0", "Not found again"},
631 {" Making it much longer ", 0, " ", "Four score and seven years ago",
632 "Four score and seven years agoMaking it much longer "},
633 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
634 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
635 {"abababab", 2, "ab", "c", "abcabab"},
636 };
637
638 for (size_t i = 0; i < arraysize(cases); i++) {
639 string16 str = ASCIIToUTF16(cases[i].str);
640 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
641 ASCIIToUTF16(cases[i].find_this),
642 ASCIIToUTF16(cases[i].replace_with));
643 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
644 }
645 }
646
TEST(StringUtilTest,HexDigitToInt)647 TEST(StringUtilTest, HexDigitToInt) {
648 EXPECT_EQ(0, HexDigitToInt('0'));
649 EXPECT_EQ(1, HexDigitToInt('1'));
650 EXPECT_EQ(2, HexDigitToInt('2'));
651 EXPECT_EQ(3, HexDigitToInt('3'));
652 EXPECT_EQ(4, HexDigitToInt('4'));
653 EXPECT_EQ(5, HexDigitToInt('5'));
654 EXPECT_EQ(6, HexDigitToInt('6'));
655 EXPECT_EQ(7, HexDigitToInt('7'));
656 EXPECT_EQ(8, HexDigitToInt('8'));
657 EXPECT_EQ(9, HexDigitToInt('9'));
658 EXPECT_EQ(10, HexDigitToInt('A'));
659 EXPECT_EQ(11, HexDigitToInt('B'));
660 EXPECT_EQ(12, HexDigitToInt('C'));
661 EXPECT_EQ(13, HexDigitToInt('D'));
662 EXPECT_EQ(14, HexDigitToInt('E'));
663 EXPECT_EQ(15, HexDigitToInt('F'));
664
665 // Verify the lower case as well.
666 EXPECT_EQ(10, HexDigitToInt('a'));
667 EXPECT_EQ(11, HexDigitToInt('b'));
668 EXPECT_EQ(12, HexDigitToInt('c'));
669 EXPECT_EQ(13, HexDigitToInt('d'));
670 EXPECT_EQ(14, HexDigitToInt('e'));
671 EXPECT_EQ(15, HexDigitToInt('f'));
672 }
673
TEST(StringUtilTest,JoinString)674 TEST(StringUtilTest, JoinString) {
675 std::string separator(", ");
676 std::vector<std::string> parts;
677 EXPECT_EQ(std::string(), JoinString(parts, separator));
678
679 parts.push_back("a");
680 EXPECT_EQ("a", JoinString(parts, separator));
681
682 parts.push_back("b");
683 parts.push_back("c");
684 EXPECT_EQ("a, b, c", JoinString(parts, separator));
685
686 parts.push_back(std::string());
687 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
688 parts.push_back(" ");
689 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
690 }
691
TEST(StringUtilTest,JoinString16)692 TEST(StringUtilTest, JoinString16) {
693 string16 separator = ASCIIToUTF16(", ");
694 std::vector<string16> parts;
695 EXPECT_EQ(string16(), JoinString(parts, separator));
696
697 parts.push_back(ASCIIToUTF16("a"));
698 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
699
700 parts.push_back(ASCIIToUTF16("b"));
701 parts.push_back(ASCIIToUTF16("c"));
702 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
703
704 parts.push_back(ASCIIToUTF16(""));
705 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
706 parts.push_back(ASCIIToUTF16(" "));
707 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
708 }
709
TEST(StringUtilTest,StartsWith)710 TEST(StringUtilTest, StartsWith) {
711 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
712 base::CompareCase::SENSITIVE));
713 EXPECT_FALSE(StartsWith("JavaScript:url", "javascript",
714 base::CompareCase::SENSITIVE));
715 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
716 base::CompareCase::INSENSITIVE_ASCII));
717 EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",
718 base::CompareCase::INSENSITIVE_ASCII));
719 EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));
720 EXPECT_FALSE(StartsWith("java", "javascript",
721 base::CompareCase::INSENSITIVE_ASCII));
722 EXPECT_FALSE(StartsWith(std::string(), "javascript",
723 base::CompareCase::INSENSITIVE_ASCII));
724 EXPECT_FALSE(StartsWith(std::string(), "javascript",
725 base::CompareCase::SENSITIVE));
726 EXPECT_TRUE(StartsWith("java", std::string(),
727 base::CompareCase::INSENSITIVE_ASCII));
728 EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));
729
730 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
731 ASCIIToUTF16("javascript"),
732 base::CompareCase::SENSITIVE));
733 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
734 ASCIIToUTF16("javascript"),
735 base::CompareCase::SENSITIVE));
736 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
737 ASCIIToUTF16("javascript"),
738 base::CompareCase::INSENSITIVE_ASCII));
739 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
740 ASCIIToUTF16("javascript"),
741 base::CompareCase::INSENSITIVE_ASCII));
742 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
743 base::CompareCase::SENSITIVE));
744 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
745 base::CompareCase::INSENSITIVE_ASCII));
746 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
747 base::CompareCase::INSENSITIVE_ASCII));
748 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
749 base::CompareCase::SENSITIVE));
750 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
751 base::CompareCase::INSENSITIVE_ASCII));
752 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
753 base::CompareCase::SENSITIVE));
754 }
755
TEST(StringUtilTest,EndsWith)756 TEST(StringUtilTest, EndsWith) {
757 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
758 base::CompareCase::SENSITIVE));
759 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
760 base::CompareCase::SENSITIVE));
761 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
762 base::CompareCase::INSENSITIVE_ASCII));
763 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
764 base::CompareCase::INSENSITIVE_ASCII));
765 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
766 base::CompareCase::SENSITIVE));
767 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
768 base::CompareCase::INSENSITIVE_ASCII));
769 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
770 base::CompareCase::SENSITIVE));
771 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
772 base::CompareCase::INSENSITIVE_ASCII));
773 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
774 base::CompareCase::INSENSITIVE_ASCII));
775 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
776 base::CompareCase::SENSITIVE));
777 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
778 base::CompareCase::INSENSITIVE_ASCII));
779 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
780 base::CompareCase::SENSITIVE));
781 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
782 base::CompareCase::INSENSITIVE_ASCII));
783 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
784 base::CompareCase::SENSITIVE));
785 EXPECT_TRUE(
786 EndsWith(string16(), string16(), base::CompareCase::INSENSITIVE_ASCII));
787 EXPECT_TRUE(EndsWith(string16(), string16(), base::CompareCase::SENSITIVE));
788 }
789
TEST(StringUtilTest,GetStringFWithOffsets)790 TEST(StringUtilTest, GetStringFWithOffsets) {
791 std::vector<string16> subst;
792 subst.push_back(ASCIIToUTF16("1"));
793 subst.push_back(ASCIIToUTF16("2"));
794 std::vector<size_t> offsets;
795
796 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
797 subst,
798 &offsets);
799 EXPECT_EQ(2U, offsets.size());
800 EXPECT_EQ(7U, offsets[0]);
801 EXPECT_EQ(25U, offsets[1]);
802 offsets.clear();
803
804 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
805 subst,
806 &offsets);
807 EXPECT_EQ(2U, offsets.size());
808 EXPECT_EQ(25U, offsets[0]);
809 EXPECT_EQ(7U, offsets[1]);
810 offsets.clear();
811 }
812
TEST(StringUtilTest,ReplaceStringPlaceholdersTooFew)813 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
814 // Test whether replacestringplaceholders works as expected when there
815 // are fewer inputs than outputs.
816 std::vector<string16> subst;
817 subst.push_back(ASCIIToUTF16("9a"));
818 subst.push_back(ASCIIToUTF16("8b"));
819 subst.push_back(ASCIIToUTF16("7c"));
820
821 string16 formatted =
822 ReplaceStringPlaceholders(
823 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, nullptr);
824
825 EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"), formatted);
826 }
827
TEST(StringUtilTest,ReplaceStringPlaceholders)828 TEST(StringUtilTest, ReplaceStringPlaceholders) {
829 std::vector<string16> subst;
830 subst.push_back(ASCIIToUTF16("9a"));
831 subst.push_back(ASCIIToUTF16("8b"));
832 subst.push_back(ASCIIToUTF16("7c"));
833 subst.push_back(ASCIIToUTF16("6d"));
834 subst.push_back(ASCIIToUTF16("5e"));
835 subst.push_back(ASCIIToUTF16("4f"));
836 subst.push_back(ASCIIToUTF16("3g"));
837 subst.push_back(ASCIIToUTF16("2h"));
838 subst.push_back(ASCIIToUTF16("1i"));
839
840 string16 formatted =
841 ReplaceStringPlaceholders(
842 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, nullptr);
843
844 EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"), formatted);
845 }
846
TEST(StringUtilTest,ReplaceStringPlaceholdersOneDigit)847 TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) {
848 std::vector<string16> subst;
849 subst.push_back(ASCIIToUTF16("1a"));
850 string16 formatted =
851 ReplaceStringPlaceholders(ASCIIToUTF16(" $16 "), subst, nullptr);
852 EXPECT_EQ(ASCIIToUTF16(" 1a6 "), formatted);
853 }
854
TEST(StringUtilTest,ReplaceStringPlaceholdersInvalidPlaceholder)855 TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) {
856 std::vector<string16> subst;
857 subst.push_back(ASCIIToUTF16("1a"));
858 string16 formatted =
859 ReplaceStringPlaceholders(ASCIIToUTF16("+$-+$A+$1+"), subst, nullptr);
860 EXPECT_EQ(ASCIIToUTF16("+++1a+"), formatted);
861 }
862
TEST(StringUtilTest,StdStringReplaceStringPlaceholders)863 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
864 std::vector<std::string> subst;
865 subst.push_back("9a");
866 subst.push_back("8b");
867 subst.push_back("7c");
868 subst.push_back("6d");
869 subst.push_back("5e");
870 subst.push_back("4f");
871 subst.push_back("3g");
872 subst.push_back("2h");
873 subst.push_back("1i");
874
875 std::string formatted =
876 ReplaceStringPlaceholders(
877 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
878
879 EXPECT_EQ("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
880 }
881
TEST(StringUtilTest,ReplaceStringPlaceholdersConsecutiveDollarSigns)882 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
883 std::vector<std::string> subst;
884 subst.push_back("a");
885 subst.push_back("b");
886 subst.push_back("c");
887 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, nullptr),
888 "$1 $$2 $$$3");
889 }
890
TEST(StringUtilTest,LcpyTest)891 TEST(StringUtilTest, LcpyTest) {
892 // Test the normal case where we fit in our buffer.
893 {
894 char dst[10];
895 wchar_t wdst[10];
896 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
897 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
898 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
899 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
900 }
901
902 // Test dst_size == 0, nothing should be written to |dst| and we should
903 // have the equivalent of strlen(src).
904 {
905 char dst[2] = {1, 2};
906 wchar_t wdst[2] = {1, 2};
907 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
908 EXPECT_EQ(1, dst[0]);
909 EXPECT_EQ(2, dst[1]);
910 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
911 EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
912 EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
913 }
914
915 // Test the case were we _just_ competely fit including the null.
916 {
917 char dst[8];
918 wchar_t wdst[8];
919 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
920 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
921 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
922 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
923 }
924
925 // Test the case were we we are one smaller, so we can't fit the null.
926 {
927 char dst[7];
928 wchar_t wdst[7];
929 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
930 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
931 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
932 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
933 }
934
935 // Test the case were we are just too small.
936 {
937 char dst[3];
938 wchar_t wdst[3];
939 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
940 EXPECT_EQ(0, memcmp(dst, "ab", 3));
941 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
942 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
943 }
944 }
945
TEST(StringUtilTest,WprintfFormatPortabilityTest)946 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
947 static const struct {
948 const wchar_t* input;
949 bool portable;
950 } cases[] = {
951 { L"%ls", true },
952 { L"%s", false },
953 { L"%S", false },
954 { L"%lS", false },
955 { L"Hello, %s", false },
956 { L"%lc", true },
957 { L"%c", false },
958 { L"%C", false },
959 { L"%lC", false },
960 { L"%ls %s", false },
961 { L"%s %ls", false },
962 { L"%s %ls %s", false },
963 { L"%f", true },
964 { L"%f %F", false },
965 { L"%d %D", false },
966 { L"%o %O", false },
967 { L"%u %U", false },
968 { L"%f %d %o %u", true },
969 { L"%-8d (%02.1f%)", true },
970 { L"% 10s", false },
971 { L"% 10ls", true }
972 };
973 for (size_t i = 0; i < arraysize(cases); ++i)
974 EXPECT_EQ(cases[i].portable, IsWprintfFormatPortable(cases[i].input));
975 }
976
TEST(StringUtilTest,RemoveChars)977 TEST(StringUtilTest, RemoveChars) {
978 const char kRemoveChars[] = "-/+*";
979 std::string input = "A-+bc/d!*";
980 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
981 EXPECT_EQ("Abcd!", input);
982
983 // No characters match kRemoveChars.
984 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
985 EXPECT_EQ("Abcd!", input);
986
987 // Empty string.
988 input.clear();
989 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
990 EXPECT_EQ(std::string(), input);
991 }
992
TEST(StringUtilTest,ReplaceChars)993 TEST(StringUtilTest, ReplaceChars) {
994 struct TestData {
995 const char* input;
996 const char* replace_chars;
997 const char* replace_with;
998 const char* output;
999 bool result;
1000 } cases[] = {
1001 { "", "", "", "", false },
1002 { "test", "", "", "test", false },
1003 { "test", "", "!", "test", false },
1004 { "test", "z", "!", "test", false },
1005 { "test", "e", "!", "t!st", true },
1006 { "test", "e", "!?", "t!?st", true },
1007 { "test", "ez", "!", "t!st", true },
1008 { "test", "zed", "!?", "t!?st", true },
1009 { "test", "t", "!?", "!?es!?", true },
1010 { "test", "et", "!>", "!>!>s!>", true },
1011 { "test", "zest", "!", "!!!!", true },
1012 { "test", "szt", "!", "!e!!", true },
1013 { "test", "t", "test", "testestest", true },
1014 };
1015
1016 for (size_t i = 0; i < arraysize(cases); ++i) {
1017 std::string output;
1018 bool result = ReplaceChars(cases[i].input,
1019 cases[i].replace_chars,
1020 cases[i].replace_with,
1021 &output);
1022 EXPECT_EQ(cases[i].result, result);
1023 EXPECT_EQ(cases[i].output, output);
1024 }
1025 }
1026
TEST(StringUtilTest,ContainsOnlyChars)1027 TEST(StringUtilTest, ContainsOnlyChars) {
1028 // Providing an empty list of characters should return false but for the empty
1029 // string.
1030 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1031 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1032
1033 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1034 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1035 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1036 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1037 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1038
1039 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1040 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1041 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1042 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII));
1043 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1044 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII));
1045
1046 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1047 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1048 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1049 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16));
1050 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1051 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),
1052 kWhitespaceUTF16));
1053 }
1054
TEST(StringUtilTest,CompareCaseInsensitiveASCII)1055 TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
1056 EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));
1057 EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));
1058
1059 // Differing lengths.
1060 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));
1061 EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));
1062
1063 // Differing values.
1064 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
1065 EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
1066 }
1067
TEST(StringUtilTest,EqualsCaseInsensitiveASCII)1068 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
1069 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));
1070 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));
1071 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));
1072 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));
1073 }
1074
TEST(StringUtilTest,IsUnicodeWhitespace)1075 TEST(StringUtilTest, IsUnicodeWhitespace) {
1076 // NOT unicode white space.
1077 EXPECT_FALSE(IsUnicodeWhitespace(L'\0'));
1078 EXPECT_FALSE(IsUnicodeWhitespace(L'A'));
1079 EXPECT_FALSE(IsUnicodeWhitespace(L'0'));
1080 EXPECT_FALSE(IsUnicodeWhitespace(L'.'));
1081 EXPECT_FALSE(IsUnicodeWhitespace(L';'));
1082 EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100'));
1083
1084 // Actual unicode whitespace.
1085 EXPECT_TRUE(IsUnicodeWhitespace(L' '));
1086 EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0'));
1087 EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000'));
1088 EXPECT_TRUE(IsUnicodeWhitespace(L'\t'));
1089 EXPECT_TRUE(IsUnicodeWhitespace(L'\r'));
1090 EXPECT_TRUE(IsUnicodeWhitespace(L'\v'));
1091 EXPECT_TRUE(IsUnicodeWhitespace(L'\f'));
1092 EXPECT_TRUE(IsUnicodeWhitespace(L'\n'));
1093 }
1094
1095 class WriteIntoTest : public testing::Test {
1096 protected:
WritesCorrectly(size_t num_chars)1097 static void WritesCorrectly(size_t num_chars) {
1098 std::string buffer;
1099 char kOriginal[] = "supercali";
1100 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1101 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1102 // string at the first \0.
1103 EXPECT_EQ(std::string(kOriginal,
1104 std::min(num_chars, arraysize(kOriginal) - 1)),
1105 std::string(buffer.c_str()));
1106 EXPECT_EQ(num_chars, buffer.size());
1107 }
1108 };
1109
TEST_F(WriteIntoTest,WriteInto)1110 TEST_F(WriteIntoTest, WriteInto) {
1111 // Validate that WriteInto reserves enough space and
1112 // sizes a string correctly.
1113 WritesCorrectly(1);
1114 WritesCorrectly(2);
1115 WritesCorrectly(5000);
1116
1117 // Validate that WriteInto doesn't modify other strings
1118 // when using a Copy-on-Write implementation.
1119 const char kLive[] = "live";
1120 const char kDead[] = "dead";
1121 const std::string live = kLive;
1122 std::string dead = live;
1123 strncpy(WriteInto(&dead, 5), kDead, 4);
1124 EXPECT_EQ(kDead, dead);
1125 EXPECT_EQ(4u, dead.size());
1126 EXPECT_EQ(kLive, live);
1127 EXPECT_EQ(4u, live.size());
1128 }
1129
1130 } // namespace base
1131