1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <math.h>
6 #include <stdarg.h>
7
8 #include <limits>
9 #include <sstream>
10
11 #include "base/basictypes.h"
12 #include "base/string_util.h"
13 #include "base/utf_string_conversions.h"
14 #include "testing/gmock/include/gmock/gmock.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16
17 using ::testing::ElementsAre;
18
19 namespace base {
20
21 static const struct trim_case {
22 const wchar_t* input;
23 const TrimPositions positions;
24 const wchar_t* output;
25 const TrimPositions return_value;
26 } trim_cases[] = {
27 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
28 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
29 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
30 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
31 {L"", TRIM_ALL, L"", TRIM_NONE},
32 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
33 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
34 {L" ", TRIM_ALL, L"", TRIM_ALL},
35 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
36 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
37 };
38
39 static const struct trim_case_ascii {
40 const char* input;
41 const TrimPositions positions;
42 const char* output;
43 const TrimPositions return_value;
44 } trim_cases_ascii[] = {
45 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
46 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
47 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
48 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
49 {"", TRIM_ALL, "", TRIM_NONE},
50 {" ", TRIM_LEADING, "", TRIM_LEADING},
51 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
52 {" ", TRIM_ALL, "", TRIM_ALL},
53 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
54 };
55
56 namespace {
57
58 // Helper used to test TruncateUTF8ToByteSize.
Truncated(const std::string & input,const size_t byte_size,std::string * output)59 bool Truncated(const std::string& input, const size_t byte_size,
60 std::string* output) {
61 size_t prev = input.length();
62 TruncateUTF8ToByteSize(input, byte_size, output);
63 return prev != output->length();
64 }
65
66 } // namespace
67
TEST(StringUtilTest,TruncateUTF8ToByteSize)68 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
69 std::string output;
70
71 // Empty strings and invalid byte_size arguments
72 EXPECT_FALSE(Truncated("", 0, &output));
73 EXPECT_EQ(output, "");
74 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
75 EXPECT_EQ(output, "");
76 EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
77 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
78
79 // Testing the truncation of valid UTF8 correctly
80 EXPECT_TRUE(Truncated("abc", 2, &output));
81 EXPECT_EQ(output, "ab");
82 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
83 EXPECT_EQ(output.compare("\xc2\x81"), 0);
84 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
85 EXPECT_EQ(output.compare("\xc2\x81"), 0);
86 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
87 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
88
89 {
90 const char array[] = "\x00\x00\xc2\x81\xc2\x81";
91 const std::string array_string(array, arraysize(array));
92 EXPECT_TRUE(Truncated(array_string, 4, &output));
93 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
94 }
95
96 {
97 const char array[] = "\x00\xc2\x81\xc2\x81";
98 const std::string array_string(array, arraysize(array));
99 EXPECT_TRUE(Truncated(array_string, 4, &output));
100 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
101 }
102
103 // Testing invalid UTF8
104 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
105 EXPECT_EQ(output.compare(""), 0);
106 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
107 EXPECT_EQ(output.compare(""), 0);
108 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
109 EXPECT_EQ(output.compare(""), 0);
110
111 // Testing invalid UTF8 mixed with valid UTF8
112 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
113 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
114 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
115 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
116 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
117 10, &output));
118 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
119 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
120 10, &output));
121 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
122 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
123 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
124
125 // Overlong sequences
126 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
127 EXPECT_EQ(output.compare(""), 0);
128 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
129 EXPECT_EQ(output.compare(""), 0);
130 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
131 EXPECT_EQ(output.compare(""), 0);
132 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
133 EXPECT_EQ(output.compare(""), 0);
134 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
135 EXPECT_EQ(output.compare(""), 0);
136 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
137 EXPECT_EQ(output.compare(""), 0);
138 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
139 EXPECT_EQ(output.compare(""), 0);
140 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
141 EXPECT_EQ(output.compare(""), 0);
142 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
143 EXPECT_EQ(output.compare(""), 0);
144 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
145 EXPECT_EQ(output.compare(""), 0);
146 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
147 EXPECT_EQ(output.compare(""), 0);
148
149 // Beyond U+10FFFF (the upper limit of Unicode codespace)
150 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
151 EXPECT_EQ(output.compare(""), 0);
152 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
153 EXPECT_EQ(output.compare(""), 0);
154 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
155 EXPECT_EQ(output.compare(""), 0);
156
157 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
158 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
159 EXPECT_EQ(output.compare(""), 0);
160 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
161 EXPECT_EQ(output.compare(""), 0);
162
163 {
164 const char array[] = "\x00\x00\xfe\xff";
165 const std::string array_string(array, arraysize(array));
166 EXPECT_TRUE(Truncated(array_string, 4, &output));
167 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
168 }
169
170 // Variants on the previous test
171 {
172 const char array[] = "\xff\xfe\x00\x00";
173 const std::string array_string(array, 4);
174 EXPECT_FALSE(Truncated(array_string, 4, &output));
175 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
176 }
177 {
178 const char array[] = "\xff\x00\x00\xfe";
179 const std::string array_string(array, arraysize(array));
180 EXPECT_TRUE(Truncated(array_string, 4, &output));
181 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
182 }
183
184 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
185 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
186 EXPECT_EQ(output.compare(""), 0);
187 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
188 EXPECT_EQ(output.compare(""), 0);
189 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
190 EXPECT_EQ(output.compare(""), 0);
191 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
192 EXPECT_EQ(output.compare(""), 0);
193 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
194 EXPECT_EQ(output.compare(""), 0);
195
196 // Strings in legacy encodings that are valid in UTF-8, but
197 // are invalid as UTF-8 in real data.
198 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
199 EXPECT_EQ(output.compare("caf"), 0);
200 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
201 EXPECT_EQ(output.compare(""), 0);
202 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
203 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
204 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
205 &output));
206 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
207
208 // Testing using the same string as input and output.
209 EXPECT_FALSE(Truncated(output, 4, &output));
210 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
211 EXPECT_TRUE(Truncated(output, 3, &output));
212 EXPECT_EQ(output.compare("\xa7\x41"), 0);
213
214 // "abc" with U+201[CD] in windows-125[0-8]
215 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
216 EXPECT_EQ(output.compare("\x93" "abc"), 0);
217
218 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
219 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
220 EXPECT_EQ(output.compare(""), 0);
221
222 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
223 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
224 EXPECT_EQ(output.compare(""), 0);
225 }
226
TEST(StringUtilTest,TrimWhitespace)227 TEST(StringUtilTest, TrimWhitespace) {
228 std::wstring output; // Allow contents to carry over to next testcase
229 for (size_t i = 0; i < arraysize(trim_cases); ++i) {
230 const trim_case& value = trim_cases[i];
231 EXPECT_EQ(value.return_value,
232 TrimWhitespace(value.input, value.positions, &output));
233 EXPECT_EQ(value.output, output);
234 }
235
236 // Test that TrimWhitespace() can take the same string for input and output
237 output = L" This is a test \r\n";
238 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
239 EXPECT_EQ(L"This is a test", output);
240
241 // Once more, but with a string of whitespace
242 output = L" \r\n";
243 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
244 EXPECT_EQ(L"", output);
245
246 std::string output_ascii;
247 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
248 const trim_case_ascii& value = trim_cases_ascii[i];
249 EXPECT_EQ(value.return_value,
250 TrimWhitespace(value.input, value.positions, &output_ascii));
251 EXPECT_EQ(value.output, output_ascii);
252 }
253 }
254
255 static const struct collapse_case {
256 const wchar_t* input;
257 const bool trim;
258 const wchar_t* output;
259 } collapse_cases[] = {
260 {L" Google Video ", false, L"Google Video"},
261 {L"Google Video", false, L"Google Video"},
262 {L"", false, L""},
263 {L" ", false, L""},
264 {L"\t\rTest String\n", false, L"Test String"},
265 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
266 {L" Test \n \t String ", false, L"Test String"},
267 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
268 {L" Test String", false, L"Test String"},
269 {L"Test String ", false, L"Test String"},
270 {L"Test String", false, L"Test String"},
271 {L"", true, L""},
272 {L"\n", true, L""},
273 {L" \r ", true, L""},
274 {L"\nFoo", true, L"Foo"},
275 {L"\r Foo ", true, L"Foo"},
276 {L" Foo bar ", true, L"Foo bar"},
277 {L" \tFoo bar \n", true, L"Foo bar"},
278 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
279 };
280
TEST(StringUtilTest,CollapseWhitespace)281 TEST(StringUtilTest, CollapseWhitespace) {
282 for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
283 const collapse_case& value = collapse_cases[i];
284 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
285 }
286 }
287
288 static const struct collapse_case_ascii {
289 const char* input;
290 const bool trim;
291 const char* output;
292 } collapse_cases_ascii[] = {
293 {" Google Video ", false, "Google Video"},
294 {"Google Video", false, "Google Video"},
295 {"", false, ""},
296 {" ", false, ""},
297 {"\t\rTest String\n", false, "Test String"},
298 {" Test \n \t String ", false, "Test String"},
299 {" Test String", false, "Test String"},
300 {"Test String ", false, "Test String"},
301 {"Test String", false, "Test String"},
302 {"", true, ""},
303 {"\n", true, ""},
304 {" \r ", true, ""},
305 {"\nFoo", true, "Foo"},
306 {"\r Foo ", true, "Foo"},
307 {" Foo bar ", true, "Foo bar"},
308 {" \tFoo bar \n", true, "Foo bar"},
309 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
310 };
311
TEST(StringUtilTest,CollapseWhitespaceASCII)312 TEST(StringUtilTest, CollapseWhitespaceASCII) {
313 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
314 const collapse_case_ascii& value = collapse_cases_ascii[i];
315 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
316 }
317 }
318
TEST(StringUtilTest,ContainsOnlyWhitespaceASCII)319 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
320 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
321 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
322 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
323 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n "));
324 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
325 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n "));
326 }
327
TEST(StringUtilTest,ContainsOnlyWhitespace)328 TEST(StringUtilTest, ContainsOnlyWhitespace) {
329 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("")));
330 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
331 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
332 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n ")));
333 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
334 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n ")));
335 }
336
TEST(StringUtilTest,IsStringUTF8)337 TEST(StringUtilTest, IsStringUTF8) {
338 EXPECT_TRUE(IsStringUTF8("abc"));
339 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
340 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
341 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
342 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
343 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
344
345 // surrogate code points
346 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
347 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
348 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
349
350 // overlong sequences
351 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
352 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
353 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
354 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
355 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
356 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
357 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
358 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
359 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
360 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
361 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
362
363 // Beyond U+10FFFF (the upper limit of Unicode codespace)
364 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
365 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
366 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
367
368 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
369 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
370 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
371 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
372 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
373
374 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
375 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
376 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
377 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
378 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
379 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
380 // Strings in legacy encodings. We can certainly make up strings
381 // in a legacy encoding that are valid in UTF-8, but in real data,
382 // most of them are invalid as UTF-8.
383 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
384 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
385 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
386 // "abc" with U+201[CD] in windows-125[0-8]
387 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
388 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
389 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
390 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
391 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
392
393 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
394 // representation, and the second uses a 2-byte sequence. The second version
395 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
396 // given codepoint must be used.
397 static const char kEmbeddedNull[] = "embedded\0null";
398 EXPECT_TRUE(IsStringUTF8(
399 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
400 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
401 }
402
TEST(StringUtilTest,ConvertASCII)403 TEST(StringUtilTest, ConvertASCII) {
404 static const char* char_cases[] = {
405 "Google Video",
406 "Hello, world\n",
407 "0123ABCDwxyz \a\b\t\r\n!+,.~"
408 };
409
410 static const wchar_t* const wchar_cases[] = {
411 L"Google Video",
412 L"Hello, world\n",
413 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
414 };
415
416 for (size_t i = 0; i < arraysize(char_cases); ++i) {
417 EXPECT_TRUE(IsStringASCII(char_cases[i]));
418 std::wstring wide = ASCIIToWide(char_cases[i]);
419 EXPECT_EQ(wchar_cases[i], wide);
420
421 EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
422 std::string ascii = WideToASCII(wchar_cases[i]);
423 EXPECT_EQ(char_cases[i], ascii);
424 }
425
426 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
427 EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
428
429 // Convert empty strings.
430 std::wstring wempty;
431 std::string empty;
432 EXPECT_EQ(empty, WideToASCII(wempty));
433 EXPECT_EQ(wempty, ASCIIToWide(empty));
434
435 // Convert strings with an embedded NUL character.
436 const char chars_with_nul[] = "test\0string";
437 const int length_with_nul = arraysize(chars_with_nul) - 1;
438 std::string string_with_nul(chars_with_nul, length_with_nul);
439 std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
440 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
441 wide_with_nul.length());
442 std::string narrow_with_nul = WideToASCII(wide_with_nul);
443 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
444 narrow_with_nul.length());
445 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
446 }
447
TEST(StringUtilTest,ToUpperASCII)448 TEST(StringUtilTest, ToUpperASCII) {
449 EXPECT_EQ('C', ToUpperASCII('C'));
450 EXPECT_EQ('C', ToUpperASCII('c'));
451 EXPECT_EQ('2', ToUpperASCII('2'));
452
453 EXPECT_EQ(L'C', ToUpperASCII(L'C'));
454 EXPECT_EQ(L'C', ToUpperASCII(L'c'));
455 EXPECT_EQ(L'2', ToUpperASCII(L'2'));
456
457 std::string in_place_a("Cc2");
458 StringToUpperASCII(&in_place_a);
459 EXPECT_EQ("CC2", in_place_a);
460
461 std::wstring in_place_w(L"Cc2");
462 StringToUpperASCII(&in_place_w);
463 EXPECT_EQ(L"CC2", in_place_w);
464
465 std::string original_a("Cc2");
466 std::string upper_a = StringToUpperASCII(original_a);
467 EXPECT_EQ("CC2", upper_a);
468
469 std::wstring original_w(L"Cc2");
470 std::wstring upper_w = StringToUpperASCII(original_w);
471 EXPECT_EQ(L"CC2", upper_w);
472 }
473
474 static const struct {
475 const wchar_t* src_w;
476 const char* src_a;
477 const char* dst;
478 } lowercase_cases[] = {
479 {L"FoO", "FoO", "foo"},
480 {L"foo", "foo", "foo"},
481 {L"FOO", "FOO", "foo"},
482 };
483
TEST(StringUtilTest,LowerCaseEqualsASCII)484 TEST(StringUtilTest, LowerCaseEqualsASCII) {
485 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
486 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
487 lowercase_cases[i].dst));
488 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
489 lowercase_cases[i].dst));
490 }
491 }
492
TEST(StringUtilTest,GetByteDisplayUnits)493 TEST(StringUtilTest, GetByteDisplayUnits) {
494 static const struct {
495 int64 bytes;
496 DataUnits expected;
497 } cases[] = {
498 {0, DATA_UNITS_BYTE},
499 {512, DATA_UNITS_BYTE},
500 {10*1024, DATA_UNITS_KIBIBYTE},
501 {10*1024*1024, DATA_UNITS_MEBIBYTE},
502 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE},
503 {~(1LL<<63), DATA_UNITS_GIBIBYTE},
504 #ifdef NDEBUG
505 {-1, DATA_UNITS_BYTE},
506 #endif
507 };
508
509 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
510 EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
511 }
512
TEST(StringUtilTest,FormatBytes)513 TEST(StringUtilTest, FormatBytes) {
514 static const struct {
515 int64 bytes;
516 DataUnits units;
517 const char* expected;
518 const char* expected_with_units;
519 } cases[] = {
520 // Expected behavior: we show one post-decimal digit when we have
521 // under two pre-decimal digits, except in cases where it makes no
522 // sense (zero or bytes).
523 // Since we switch units once we cross the 1000 mark, this keeps
524 // the display of file sizes or bytes consistently around three
525 // digits.
526 {0, DATA_UNITS_BYTE, "0", "0 B"},
527 {512, DATA_UNITS_BYTE, "512", "512 B"},
528 {512, DATA_UNITS_KIBIBYTE, "0.5", "0.5 kB"},
529 {1024*1024, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
530 {1024*1024, DATA_UNITS_MEBIBYTE, "1.0", "1.0 MB"},
531 {1024*1024*1024, DATA_UNITS_GIBIBYTE, "1.0", "1.0 GB"},
532 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
533 {99LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "99.0", "99.0 GB"},
534 {105LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "105", "105 GB"},
535 {105LL*1024*1024*1024 + 500LL*1024*1024, DATA_UNITS_GIBIBYTE,
536 "105", "105 GB"},
537 {~(1LL<<63), DATA_UNITS_GIBIBYTE, "8589934592", "8589934592 GB"},
538
539 {99*1024 + 103, DATA_UNITS_KIBIBYTE, "99.1", "99.1 kB"},
540 {1024*1024 + 103, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
541 {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, "1.2", "1.2 MB"},
542 {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE,
543 "1.9", "1.9 GB"},
544 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
545 {100LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "100", "100 GB"},
546 #ifdef NDEBUG
547 {-1, DATA_UNITS_BYTE, "", ""},
548 #endif
549 };
550
551 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
552 EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
553 FormatBytes(cases[i].bytes, cases[i].units, false));
554 EXPECT_EQ(ASCIIToUTF16(cases[i].expected_with_units),
555 FormatBytes(cases[i].bytes, cases[i].units, true));
556 }
557 }
558
TEST(StringUtilTest,ReplaceSubstringsAfterOffset)559 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
560 static const struct {
561 const char* str;
562 string16::size_type start_offset;
563 const char* find_this;
564 const char* replace_with;
565 const char* expected;
566 } cases[] = {
567 {"aaa", 0, "a", "b", "bbb"},
568 {"abb", 0, "ab", "a", "ab"},
569 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
570 {"Not found", 0, "x", "0", "Not found"},
571 {"Not found again", 5, "x", "0", "Not found again"},
572 {" Making it much longer ", 0, " ", "Four score and seven years ago",
573 "Four score and seven years agoMakingFour score and seven years agoit"
574 "Four score and seven years agomuchFour score and seven years agolonger"
575 "Four score and seven years ago"},
576 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
577 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
578 {"abababab", 2, "ab", "c", "abccc"},
579 };
580
581 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
582 string16 str = ASCIIToUTF16(cases[i].str);
583 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
584 ASCIIToUTF16(cases[i].find_this),
585 ASCIIToUTF16(cases[i].replace_with));
586 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
587 }
588 }
589
TEST(StringUtilTest,ReplaceFirstSubstringAfterOffset)590 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
591 static const struct {
592 const char* str;
593 string16::size_type start_offset;
594 const char* find_this;
595 const char* replace_with;
596 const char* expected;
597 } cases[] = {
598 {"aaa", 0, "a", "b", "baa"},
599 {"abb", 0, "ab", "a", "ab"},
600 {"Removing some substrings inging", 0, "ing", "",
601 "Remov some substrings inging"},
602 {"Not found", 0, "x", "0", "Not found"},
603 {"Not found again", 5, "x", "0", "Not found again"},
604 {" Making it much longer ", 0, " ", "Four score and seven years ago",
605 "Four score and seven years agoMaking it much longer "},
606 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
607 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
608 {"abababab", 2, "ab", "c", "abcabab"},
609 };
610
611 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
612 string16 str = ASCIIToUTF16(cases[i].str);
613 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
614 ASCIIToUTF16(cases[i].find_this),
615 ASCIIToUTF16(cases[i].replace_with));
616 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
617 }
618 }
619
TEST(StringUtilTest,HexDigitToInt)620 TEST(StringUtilTest, HexDigitToInt) {
621 EXPECT_EQ(0, HexDigitToInt('0'));
622 EXPECT_EQ(1, HexDigitToInt('1'));
623 EXPECT_EQ(2, HexDigitToInt('2'));
624 EXPECT_EQ(3, HexDigitToInt('3'));
625 EXPECT_EQ(4, HexDigitToInt('4'));
626 EXPECT_EQ(5, HexDigitToInt('5'));
627 EXPECT_EQ(6, HexDigitToInt('6'));
628 EXPECT_EQ(7, HexDigitToInt('7'));
629 EXPECT_EQ(8, HexDigitToInt('8'));
630 EXPECT_EQ(9, HexDigitToInt('9'));
631 EXPECT_EQ(10, HexDigitToInt('A'));
632 EXPECT_EQ(11, HexDigitToInt('B'));
633 EXPECT_EQ(12, HexDigitToInt('C'));
634 EXPECT_EQ(13, HexDigitToInt('D'));
635 EXPECT_EQ(14, HexDigitToInt('E'));
636 EXPECT_EQ(15, HexDigitToInt('F'));
637
638 // Verify the lower case as well.
639 EXPECT_EQ(10, HexDigitToInt('a'));
640 EXPECT_EQ(11, HexDigitToInt('b'));
641 EXPECT_EQ(12, HexDigitToInt('c'));
642 EXPECT_EQ(13, HexDigitToInt('d'));
643 EXPECT_EQ(14, HexDigitToInt('e'));
644 EXPECT_EQ(15, HexDigitToInt('f'));
645 }
646
647 // This checks where we can use the assignment operator for a va_list. We need
648 // a way to do this since Visual C doesn't support va_copy, but assignment on
649 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
650 // capability.
VariableArgsFunc(const char * format,...)651 static void VariableArgsFunc(const char* format, ...) {
652 va_list org;
653 va_start(org, format);
654
655 va_list dup;
656 GG_VA_COPY(dup, org);
657 int i1 = va_arg(org, int);
658 int j1 = va_arg(org, int);
659 char* s1 = va_arg(org, char*);
660 double d1 = va_arg(org, double);
661 va_end(org);
662
663 int i2 = va_arg(dup, int);
664 int j2 = va_arg(dup, int);
665 char* s2 = va_arg(dup, char*);
666 double d2 = va_arg(dup, double);
667
668 EXPECT_EQ(i1, i2);
669 EXPECT_EQ(j1, j2);
670 EXPECT_STREQ(s1, s2);
671 EXPECT_EQ(d1, d2);
672
673 va_end(dup);
674 }
675
TEST(StringUtilTest,VAList)676 TEST(StringUtilTest, VAList) {
677 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
678 }
679
680 // Test for Tokenize
681 template <typename STR>
TokenizeTest()682 void TokenizeTest() {
683 std::vector<STR> r;
684 size_t size;
685
686 size = Tokenize(STR("This is a string"), STR(" "), &r);
687 EXPECT_EQ(4U, size);
688 ASSERT_EQ(4U, r.size());
689 EXPECT_EQ(r[0], STR("This"));
690 EXPECT_EQ(r[1], STR("is"));
691 EXPECT_EQ(r[2], STR("a"));
692 EXPECT_EQ(r[3], STR("string"));
693 r.clear();
694
695 size = Tokenize(STR("one,two,three"), STR(","), &r);
696 EXPECT_EQ(3U, size);
697 ASSERT_EQ(3U, r.size());
698 EXPECT_EQ(r[0], STR("one"));
699 EXPECT_EQ(r[1], STR("two"));
700 EXPECT_EQ(r[2], STR("three"));
701 r.clear();
702
703 size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
704 EXPECT_EQ(3U, size);
705 ASSERT_EQ(3U, r.size());
706 EXPECT_EQ(r[0], STR("one"));
707 EXPECT_EQ(r[1], STR("two"));
708 EXPECT_EQ(r[2], STR("three;four"));
709 r.clear();
710
711 size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
712 EXPECT_EQ(4U, size);
713 ASSERT_EQ(4U, r.size());
714 EXPECT_EQ(r[0], STR("one"));
715 EXPECT_EQ(r[1], STR("two"));
716 EXPECT_EQ(r[2], STR("three"));
717 EXPECT_EQ(r[3], STR("four"));
718 r.clear();
719
720 size = Tokenize(STR("one, two, three"), STR(","), &r);
721 EXPECT_EQ(3U, size);
722 ASSERT_EQ(3U, r.size());
723 EXPECT_EQ(r[0], STR("one"));
724 EXPECT_EQ(r[1], STR(" two"));
725 EXPECT_EQ(r[2], STR(" three"));
726 r.clear();
727
728 size = Tokenize(STR("one, two, three, "), STR(","), &r);
729 EXPECT_EQ(4U, size);
730 ASSERT_EQ(4U, r.size());
731 EXPECT_EQ(r[0], STR("one"));
732 EXPECT_EQ(r[1], STR(" two"));
733 EXPECT_EQ(r[2], STR(" three"));
734 EXPECT_EQ(r[3], STR(" "));
735 r.clear();
736
737 size = Tokenize(STR("one, two, three,"), STR(","), &r);
738 EXPECT_EQ(3U, size);
739 ASSERT_EQ(3U, r.size());
740 EXPECT_EQ(r[0], STR("one"));
741 EXPECT_EQ(r[1], STR(" two"));
742 EXPECT_EQ(r[2], STR(" three"));
743 r.clear();
744
745 size = Tokenize(STR(""), STR(","), &r);
746 EXPECT_EQ(0U, size);
747 ASSERT_EQ(0U, r.size());
748 r.clear();
749
750 size = Tokenize(STR(","), STR(","), &r);
751 EXPECT_EQ(0U, size);
752 ASSERT_EQ(0U, r.size());
753 r.clear();
754
755 size = Tokenize(STR(",;:."), STR(".:;,"), &r);
756 EXPECT_EQ(0U, size);
757 ASSERT_EQ(0U, r.size());
758 r.clear();
759
760 size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
761 EXPECT_EQ(1U, size);
762 ASSERT_EQ(1U, r.size());
763 EXPECT_EQ(r[0], STR("a"));
764 r.clear();
765
766 size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
767 EXPECT_EQ(2U, size);
768 ASSERT_EQ(2U, r.size());
769 EXPECT_EQ(r[0], STR("\ta\t"));
770 EXPECT_EQ(r[1], STR("b\tcc"));
771 r.clear();
772 }
773
TEST(StringUtilTest,TokenizeStdString)774 TEST(StringUtilTest, TokenizeStdString) {
775 TokenizeTest<std::string>();
776 }
777
TEST(StringUtilTest,TokenizeStringPiece)778 TEST(StringUtilTest, TokenizeStringPiece) {
779 TokenizeTest<base::StringPiece>();
780 }
781
782 // Test for JoinString
TEST(StringUtilTest,JoinString)783 TEST(StringUtilTest, JoinString) {
784 std::vector<std::string> in;
785 EXPECT_EQ("", JoinString(in, ','));
786
787 in.push_back("a");
788 EXPECT_EQ("a", JoinString(in, ','));
789
790 in.push_back("b");
791 in.push_back("c");
792 EXPECT_EQ("a,b,c", JoinString(in, ','));
793
794 in.push_back("");
795 EXPECT_EQ("a,b,c,", JoinString(in, ','));
796 in.push_back(" ");
797 EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
798 }
799
TEST(StringUtilTest,StartsWith)800 TEST(StringUtilTest, StartsWith) {
801 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
802 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
803 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
804 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
805 EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
806 EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
807 EXPECT_FALSE(StartsWithASCII("", "javascript", false));
808 EXPECT_FALSE(StartsWithASCII("", "javascript", true));
809 EXPECT_TRUE(StartsWithASCII("java", "", false));
810 EXPECT_TRUE(StartsWithASCII("java", "", true));
811
812 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
813 EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
814 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
815 EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
816 EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
817 EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
818 EXPECT_FALSE(StartsWith(L"", L"javascript", false));
819 EXPECT_FALSE(StartsWith(L"", L"javascript", true));
820 EXPECT_TRUE(StartsWith(L"java", L"", false));
821 EXPECT_TRUE(StartsWith(L"java", L"", true));
822 }
823
TEST(StringUtilTest,EndsWith)824 TEST(StringUtilTest, EndsWith) {
825 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
826 EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
827 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
828 EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
829 EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
830 EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
831 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
832 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
833 EXPECT_FALSE(EndsWith(L"", L".plugin", false));
834 EXPECT_FALSE(EndsWith(L"", L".plugin", true));
835 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
836 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
837 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
838 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
839 EXPECT_TRUE(EndsWith(L"", L"", false));
840 EXPECT_TRUE(EndsWith(L"", L"", true));
841 }
842
TEST(StringUtilTest,GetStringFWithOffsets)843 TEST(StringUtilTest, GetStringFWithOffsets) {
844 std::vector<string16> subst;
845 subst.push_back(ASCIIToUTF16("1"));
846 subst.push_back(ASCIIToUTF16("2"));
847 std::vector<size_t> offsets;
848
849 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
850 subst,
851 &offsets);
852 EXPECT_EQ(2U, offsets.size());
853 EXPECT_EQ(7U, offsets[0]);
854 EXPECT_EQ(25U, offsets[1]);
855 offsets.clear();
856
857 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
858 subst,
859 &offsets);
860 EXPECT_EQ(2U, offsets.size());
861 EXPECT_EQ(25U, offsets[0]);
862 EXPECT_EQ(7U, offsets[1]);
863 offsets.clear();
864 }
865
TEST(StringUtilTest,ReplaceStringPlaceholders)866 TEST(StringUtilTest, ReplaceStringPlaceholders) {
867 std::vector<string16> subst;
868 subst.push_back(ASCIIToUTF16("9a"));
869 subst.push_back(ASCIIToUTF16("8b"));
870 subst.push_back(ASCIIToUTF16("7c"));
871 subst.push_back(ASCIIToUTF16("6d"));
872 subst.push_back(ASCIIToUTF16("5e"));
873 subst.push_back(ASCIIToUTF16("4f"));
874 subst.push_back(ASCIIToUTF16("3g"));
875 subst.push_back(ASCIIToUTF16("2h"));
876 subst.push_back(ASCIIToUTF16("1i"));
877
878 string16 formatted =
879 ReplaceStringPlaceholders(
880 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
881
882 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
883 }
884
TEST(StringUtilTest,ReplaceStringPlaceholdersTooFew)885 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
886 // Test whether replacestringplaceholders works as expected when there
887 // are fewer inputs than outputs.
888 std::vector<string16> subst;
889 subst.push_back(ASCIIToUTF16("9a"));
890 subst.push_back(ASCIIToUTF16("8b"));
891 subst.push_back(ASCIIToUTF16("7c"));
892
893 string16 formatted =
894 ReplaceStringPlaceholders(
895 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
896
897 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
898 }
899
TEST(StringUtilTest,StdStringReplaceStringPlaceholders)900 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
901 std::vector<std::string> subst;
902 subst.push_back("9a");
903 subst.push_back("8b");
904 subst.push_back("7c");
905 subst.push_back("6d");
906 subst.push_back("5e");
907 subst.push_back("4f");
908 subst.push_back("3g");
909 subst.push_back("2h");
910 subst.push_back("1i");
911
912 std::string formatted =
913 ReplaceStringPlaceholders(
914 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
915
916 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
917 }
918
TEST(StringUtilTest,ReplaceStringPlaceholdersConsecutiveDollarSigns)919 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
920 std::vector<std::string> subst;
921 subst.push_back("a");
922 subst.push_back("b");
923 subst.push_back("c");
924 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
925 "$1 $$2 $$$3");
926 }
927
TEST(StringUtilTest,MatchPatternTest)928 TEST(StringUtilTest, MatchPatternTest) {
929 EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
930 EXPECT_TRUE(MatchPattern("www.google.com", "*"));
931 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
932 EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
933 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
934 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
935 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
936 EXPECT_FALSE(MatchPattern("", "*.*"));
937 EXPECT_TRUE(MatchPattern("", "*"));
938 EXPECT_TRUE(MatchPattern("", "?"));
939 EXPECT_TRUE(MatchPattern("", ""));
940 EXPECT_FALSE(MatchPattern("Hello", ""));
941 EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
942 // Stop after a certain recursion depth.
943 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
944
945 // Test UTF8 matching.
946 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
947 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
948 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
949 // Invalid sequences should be handled as a single invalid character.
950 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
951 // If the pattern has invalid characters, it shouldn't match anything.
952 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
953
954 // Test UTF16 character matching.
955 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
956 UTF8ToUTF16("*.com")));
957 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
958 UTF8ToUTF16("He??o\\*1*")));
959
960 // This test verifies that consecutive wild cards are collapsed into 1
961 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
962 // recursion depth).
963 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
964 UTF8ToUTF16("He********************************o")));
965 }
966
TEST(StringUtilTest,LcpyTest)967 TEST(StringUtilTest, LcpyTest) {
968 // Test the normal case where we fit in our buffer.
969 {
970 char dst[10];
971 wchar_t wdst[10];
972 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
973 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
974 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
975 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
976 }
977
978 // Test dst_size == 0, nothing should be written to |dst| and we should
979 // have the equivalent of strlen(src).
980 {
981 char dst[2] = {1, 2};
982 wchar_t wdst[2] = {1, 2};
983 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
984 EXPECT_EQ(1, dst[0]);
985 EXPECT_EQ(2, dst[1]);
986 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
987 #if defined(WCHAR_T_IS_UNSIGNED)
988 EXPECT_EQ(1U, wdst[0]);
989 EXPECT_EQ(2U, wdst[1]);
990 #else
991 EXPECT_EQ(1, wdst[0]);
992 EXPECT_EQ(2, wdst[1]);
993 #endif
994 }
995
996 // Test the case were we _just_ competely fit including the null.
997 {
998 char dst[8];
999 wchar_t wdst[8];
1000 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1001 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1002 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1003 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1004 }
1005
1006 // Test the case were we we are one smaller, so we can't fit the null.
1007 {
1008 char dst[7];
1009 wchar_t wdst[7];
1010 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1011 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1012 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1013 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1014 }
1015
1016 // Test the case were we are just too small.
1017 {
1018 char dst[3];
1019 wchar_t wdst[3];
1020 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1021 EXPECT_EQ(0, memcmp(dst, "ab", 3));
1022 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1023 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1024 }
1025 }
1026
TEST(StringUtilTest,WprintfFormatPortabilityTest)1027 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1028 struct TestData {
1029 const wchar_t* input;
1030 bool portable;
1031 } cases[] = {
1032 { L"%ls", true },
1033 { L"%s", false },
1034 { L"%S", false },
1035 { L"%lS", false },
1036 { L"Hello, %s", false },
1037 { L"%lc", true },
1038 { L"%c", false },
1039 { L"%C", false },
1040 { L"%lC", false },
1041 { L"%ls %s", false },
1042 { L"%s %ls", false },
1043 { L"%s %ls %s", false },
1044 { L"%f", true },
1045 { L"%f %F", false },
1046 { L"%d %D", false },
1047 { L"%o %O", false },
1048 { L"%u %U", false },
1049 { L"%f %d %o %u", true },
1050 { L"%-8d (%02.1f%)", true },
1051 { L"% 10s", false },
1052 { L"% 10ls", true }
1053 };
1054 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1055 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1056 }
1057 }
1058
TEST(StringUtilTest,RemoveChars)1059 TEST(StringUtilTest, RemoveChars) {
1060 const char* kRemoveChars = "-/+*";
1061 std::string input = "A-+bc/d!*";
1062 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1063 EXPECT_EQ("Abcd!", input);
1064
1065 // No characters match kRemoveChars.
1066 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1067 EXPECT_EQ("Abcd!", input);
1068
1069 // Empty string.
1070 input.clear();
1071 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1072 EXPECT_EQ(std::string(), input);
1073 }
1074
TEST(StringUtilTest,ContainsOnlyChars)1075 TEST(StringUtilTest, ContainsOnlyChars) {
1076 // Providing an empty list of characters should return false but for the empty
1077 // string.
1078 EXPECT_TRUE(ContainsOnlyChars("", ""));
1079 EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1080
1081 EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1082 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1083 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1084 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1085 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1086 }
1087
1088 } // namespace base
1089