• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_util.h"
6 
7 #include <math.h>
8 #include <stdarg.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 
12 #include <algorithm>
13 
14 #include "base/macros.h"
15 #include "base/strings/string16.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "testing/gmock/include/gmock/gmock.h"
18 #include "testing/gtest/include/gtest/gtest.h"
19 
20 using ::testing::ElementsAre;
21 
22 namespace base {
23 
24 static const struct trim_case {
25   const wchar_t* input;
26   const TrimPositions positions;
27   const wchar_t* output;
28   const TrimPositions return_value;
29 } trim_cases[] = {
30   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
31   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
32   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
33   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
34   {L"", TRIM_ALL, L"", TRIM_NONE},
35   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
36   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
37   {L"  ", TRIM_ALL, L"", TRIM_ALL},
38   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
39   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
40 };
41 
42 static const struct trim_case_ascii {
43   const char* input;
44   const TrimPositions positions;
45   const char* output;
46   const TrimPositions return_value;
47 } trim_cases_ascii[] = {
48   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
49   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
50   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
51   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
52   {"", TRIM_ALL, "", TRIM_NONE},
53   {"  ", TRIM_LEADING, "", TRIM_LEADING},
54   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
55   {"  ", TRIM_ALL, "", TRIM_ALL},
56   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
57 };
58 
59 namespace {
60 
61 // Helper used to test TruncateUTF8ToByteSize.
Truncated(const std::string & input,const size_t byte_size,std::string * output)62 bool Truncated(const std::string& input,
63                const size_t byte_size,
64                std::string* output) {
65     size_t prev = input.length();
66     TruncateUTF8ToByteSize(input, byte_size, output);
67     return prev != output->length();
68 }
69 
70 }  // namespace
71 
TEST(StringUtilTest,TruncateUTF8ToByteSize)72 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
73   std::string output;
74 
75   // Empty strings and invalid byte_size arguments
76   EXPECT_FALSE(Truncated(std::string(), 0, &output));
77   EXPECT_EQ(output, "");
78   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
79   EXPECT_EQ(output, "");
80   EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
81   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
82 
83   // Testing the truncation of valid UTF8 correctly
84   EXPECT_TRUE(Truncated("abc", 2, &output));
85   EXPECT_EQ(output, "ab");
86   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
87   EXPECT_EQ(output.compare("\xc2\x81"), 0);
88   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
89   EXPECT_EQ(output.compare("\xc2\x81"), 0);
90   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
91   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
92 
93   {
94     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
95     const std::string array_string(array, arraysize(array));
96     EXPECT_TRUE(Truncated(array_string, 4, &output));
97     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
98   }
99 
100   {
101     const char array[] = "\x00\xc2\x81\xc2\x81";
102     const std::string array_string(array, arraysize(array));
103     EXPECT_TRUE(Truncated(array_string, 4, &output));
104     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
105   }
106 
107   // Testing invalid UTF8
108   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
109   EXPECT_EQ(output.compare(""), 0);
110   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
111   EXPECT_EQ(output.compare(""), 0);
112   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
113   EXPECT_EQ(output.compare(""), 0);
114 
115   // Testing invalid UTF8 mixed with valid UTF8
116   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
117   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
118   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
119   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
120   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
121               10, &output));
122   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
123   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
124               10, &output));
125   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
126   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
127   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
128 
129   // Overlong sequences
130   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
131   EXPECT_EQ(output.compare(""), 0);
132   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
133   EXPECT_EQ(output.compare(""), 0);
134   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
135   EXPECT_EQ(output.compare(""), 0);
136   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
137   EXPECT_EQ(output.compare(""), 0);
138   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
139   EXPECT_EQ(output.compare(""), 0);
140   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
141   EXPECT_EQ(output.compare(""), 0);
142   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
143   EXPECT_EQ(output.compare(""), 0);
144   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
145   EXPECT_EQ(output.compare(""), 0);
146   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
147   EXPECT_EQ(output.compare(""), 0);
148   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
149   EXPECT_EQ(output.compare(""), 0);
150   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
151   EXPECT_EQ(output.compare(""), 0);
152 
153   // Beyond U+10FFFF (the upper limit of Unicode codespace)
154   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
155   EXPECT_EQ(output.compare(""), 0);
156   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
157   EXPECT_EQ(output.compare(""), 0);
158   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
159   EXPECT_EQ(output.compare(""), 0);
160 
161   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
162   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
163   EXPECT_EQ(output.compare(""), 0);
164   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
165   EXPECT_EQ(output.compare(""), 0);
166 
167   {
168     const char array[] = "\x00\x00\xfe\xff";
169     const std::string array_string(array, arraysize(array));
170     EXPECT_TRUE(Truncated(array_string, 4, &output));
171     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
172   }
173 
174   // Variants on the previous test
175   {
176     const char array[] = "\xff\xfe\x00\x00";
177     const std::string array_string(array, 4);
178     EXPECT_FALSE(Truncated(array_string, 4, &output));
179     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
180   }
181   {
182     const char array[] = "\xff\x00\x00\xfe";
183     const std::string array_string(array, arraysize(array));
184     EXPECT_TRUE(Truncated(array_string, 4, &output));
185     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
186   }
187 
188   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
189   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
190   EXPECT_EQ(output.compare(""), 0);
191   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
192   EXPECT_EQ(output.compare(""), 0);
193   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
194   EXPECT_EQ(output.compare(""), 0);
195   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
196   EXPECT_EQ(output.compare(""), 0);
197   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
198   EXPECT_EQ(output.compare(""), 0);
199 
200   // Strings in legacy encodings that are valid in UTF-8, but
201   // are invalid as UTF-8 in real data.
202   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
203   EXPECT_EQ(output.compare("caf"), 0);
204   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
205   EXPECT_EQ(output.compare(""), 0);
206   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
207   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
208   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
209               &output));
210   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
211 
212   // Testing using the same string as input and output.
213   EXPECT_FALSE(Truncated(output, 4, &output));
214   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
215   EXPECT_TRUE(Truncated(output, 3, &output));
216   EXPECT_EQ(output.compare("\xa7\x41"), 0);
217 
218   // "abc" with U+201[CD] in windows-125[0-8]
219   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
220   EXPECT_EQ(output.compare("\x93" "abc"), 0);
221 
222   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
223   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
224   EXPECT_EQ(output.compare(""), 0);
225 
226   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
227   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
228   EXPECT_EQ(output.compare(""), 0);
229 }
230 
TEST(StringUtilTest,TrimWhitespace)231 TEST(StringUtilTest, TrimWhitespace) {
232   string16 output;  // Allow contents to carry over to next testcase
233   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
234     const trim_case& value = trim_cases[i];
235     EXPECT_EQ(value.return_value,
236               TrimWhitespace(WideToUTF16(value.input), value.positions,
237                              &output));
238     EXPECT_EQ(WideToUTF16(value.output), output);
239   }
240 
241   // Test that TrimWhitespace() can take the same string for input and output
242   output = ASCIIToUTF16("  This is a test \r\n");
243   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
244   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
245 
246   // Once more, but with a string of whitespace
247   output = ASCIIToUTF16("  \r\n");
248   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
249   EXPECT_EQ(string16(), output);
250 
251   std::string output_ascii;
252   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
253     const trim_case_ascii& value = trim_cases_ascii[i];
254     EXPECT_EQ(value.return_value,
255               TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
256     EXPECT_EQ(value.output, output_ascii);
257   }
258 }
259 
260 static const struct collapse_case {
261   const wchar_t* input;
262   const bool trim;
263   const wchar_t* output;
264 } collapse_cases[] = {
265   {L" Google Video ", false, L"Google Video"},
266   {L"Google Video", false, L"Google Video"},
267   {L"", false, L""},
268   {L"  ", false, L""},
269   {L"\t\rTest String\n", false, L"Test String"},
270   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
271   {L"    Test     \n  \t String    ", false, L"Test String"},
272   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
273   {L"   Test String", false, L"Test String"},
274   {L"Test String    ", false, L"Test String"},
275   {L"Test String", false, L"Test String"},
276   {L"", true, L""},
277   {L"\n", true, L""},
278   {L"  \r  ", true, L""},
279   {L"\nFoo", true, L"Foo"},
280   {L"\r  Foo  ", true, L"Foo"},
281   {L" Foo bar ", true, L"Foo bar"},
282   {L"  \tFoo  bar  \n", true, L"Foo bar"},
283   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
284 };
285 
TEST(StringUtilTest,CollapseWhitespace)286 TEST(StringUtilTest, CollapseWhitespace) {
287   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
288     const collapse_case& value = collapse_cases[i];
289     EXPECT_EQ(WideToUTF16(value.output),
290               CollapseWhitespace(WideToUTF16(value.input), value.trim));
291   }
292 }
293 
294 static const struct collapse_case_ascii {
295   const char* input;
296   const bool trim;
297   const char* output;
298 } collapse_cases_ascii[] = {
299   {" Google Video ", false, "Google Video"},
300   {"Google Video", false, "Google Video"},
301   {"", false, ""},
302   {"  ", false, ""},
303   {"\t\rTest String\n", false, "Test String"},
304   {"    Test     \n  \t String    ", false, "Test String"},
305   {"   Test String", false, "Test String"},
306   {"Test String    ", false, "Test String"},
307   {"Test String", false, "Test String"},
308   {"", true, ""},
309   {"\n", true, ""},
310   {"  \r  ", true, ""},
311   {"\nFoo", true, "Foo"},
312   {"\r  Foo  ", true, "Foo"},
313   {" Foo bar ", true, "Foo bar"},
314   {"  \tFoo  bar  \n", true, "Foo bar"},
315   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
316 };
317 
TEST(StringUtilTest,CollapseWhitespaceASCII)318 TEST(StringUtilTest, CollapseWhitespaceASCII) {
319   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
320     const collapse_case_ascii& value = collapse_cases_ascii[i];
321     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
322   }
323 }
324 
TEST(StringUtilTest,IsStringUTF8)325 TEST(StringUtilTest, IsStringUTF8) {
326   EXPECT_TRUE(IsStringUTF8("abc"));
327   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
328   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
329   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
330   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
331   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
332 
333   // surrogate code points
334   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
335   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
336   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
337 
338   // overlong sequences
339   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
340   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
341   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
342   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
343   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
344   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
345   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
346   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
347   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
348   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
349   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
350 
351   // Beyond U+10FFFF (the upper limit of Unicode codespace)
352   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
353   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
354   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
355 
356   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
357   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
358   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
359   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
360   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
361 
362   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
363   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
364   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
365   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
366   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
367   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
368   // Strings in legacy encodings. We can certainly make up strings
369   // in a legacy encoding that are valid in UTF-8, but in real data,
370   // most of them are invalid as UTF-8.
371   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
372   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
373   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
374   // "abc" with U+201[CD] in windows-125[0-8]
375   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
376   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
377   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
378   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
379   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
380 
381   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
382   // representation, and the second uses a 2-byte sequence. The second version
383   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
384   // given codepoint must be used.
385   static const char kEmbeddedNull[] = "embedded\0null";
386   EXPECT_TRUE(IsStringUTF8(
387       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
388   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
389 }
390 
TEST(StringUtilTest,IsStringASCII)391 TEST(StringUtilTest, IsStringASCII) {
392   static char char_ascii[] =
393       "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
394   static char16 char16_ascii[] = {
395       '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
396       'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
397       '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
398   static std::wstring wchar_ascii(
399       L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
400 
401   // Test a variety of the fragment start positions and lengths in order to make
402   // sure that bit masking in IsStringASCII works correctly.
403   // Also, test that a non-ASCII character will be detected regardless of its
404   // position inside the string.
405   {
406     const size_t string_length = arraysize(char_ascii) - 1;
407     for (size_t offset = 0; offset < 8; ++offset) {
408       for (size_t len = 0, max_len = string_length - offset; len < max_len;
409            ++len) {
410         EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
411         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
412           char_ascii[char_pos] |= '\x80';
413           EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
414           char_ascii[char_pos] &= ~'\x80';
415         }
416       }
417     }
418   }
419 
420   {
421     const size_t string_length = arraysize(char16_ascii) - 1;
422     for (size_t offset = 0; offset < 4; ++offset) {
423       for (size_t len = 0, max_len = string_length - offset; len < max_len;
424            ++len) {
425         EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
426         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
427           char16_ascii[char_pos] |= 0x80;
428           EXPECT_FALSE(
429               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
430           char16_ascii[char_pos] &= ~0x80;
431           // Also test when the upper half is non-zero.
432           char16_ascii[char_pos] |= 0x100;
433           EXPECT_FALSE(
434               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
435           char16_ascii[char_pos] &= ~0x100;
436         }
437       }
438     }
439   }
440 
441   {
442     const size_t string_length = wchar_ascii.length();
443     for (size_t len = 0; len < string_length; ++len) {
444       EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
445       for (size_t char_pos = 0; char_pos < len; ++char_pos) {
446         wchar_ascii[char_pos] |= 0x80;
447         EXPECT_FALSE(
448             IsStringASCII(wchar_ascii.substr(0, len)));
449         wchar_ascii[char_pos] &= ~0x80;
450         wchar_ascii[char_pos] |= 0x100;
451         EXPECT_FALSE(
452             IsStringASCII(wchar_ascii.substr(0, len)));
453         wchar_ascii[char_pos] &= ~0x100;
454 #if defined(WCHAR_T_IS_UTF32)
455         wchar_ascii[char_pos] |= 0x10000;
456         EXPECT_FALSE(
457             IsStringASCII(wchar_ascii.substr(0, len)));
458         wchar_ascii[char_pos] &= ~0x10000;
459 #endif  // WCHAR_T_IS_UTF32
460       }
461     }
462   }
463 }
464 
TEST(StringUtilTest,ConvertASCII)465 TEST(StringUtilTest, ConvertASCII) {
466   static const char* const char_cases[] = {
467     "Google Video",
468     "Hello, world\n",
469     "0123ABCDwxyz \a\b\t\r\n!+,.~"
470   };
471 
472   static const wchar_t* const wchar_cases[] = {
473     L"Google Video",
474     L"Hello, world\n",
475     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
476   };
477 
478   for (size_t i = 0; i < arraysize(char_cases); ++i) {
479     EXPECT_TRUE(IsStringASCII(char_cases[i]));
480     string16 utf16 = ASCIIToUTF16(char_cases[i]);
481     EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
482 
483     std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
484     EXPECT_EQ(char_cases[i], ascii);
485   }
486 
487   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
488 
489   // Convert empty strings.
490   string16 empty16;
491   std::string empty;
492   EXPECT_EQ(empty, UTF16ToASCII(empty16));
493   EXPECT_EQ(empty16, ASCIIToUTF16(empty));
494 
495   // Convert strings with an embedded NUL character.
496   const char chars_with_nul[] = "test\0string";
497   const int length_with_nul = arraysize(chars_with_nul) - 1;
498   std::string string_with_nul(chars_with_nul, length_with_nul);
499   string16 string16_with_nul = ASCIIToUTF16(string_with_nul);
500   EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),
501             string16_with_nul.length());
502   std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
503   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
504             narrow_with_nul.length());
505   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
506 }
507 
TEST(StringUtilTest,ToLowerASCII)508 TEST(StringUtilTest, ToLowerASCII) {
509   EXPECT_EQ('c', ToLowerASCII('C'));
510   EXPECT_EQ('c', ToLowerASCII('c'));
511   EXPECT_EQ('2', ToLowerASCII('2'));
512 
513   EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('C')));
514   EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('c')));
515   EXPECT_EQ(static_cast<char16>('2'), ToLowerASCII(static_cast<char16>('2')));
516 
517   EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
518   EXPECT_EQ(ASCIIToUTF16("cc2"), ToLowerASCII(ASCIIToUTF16("Cc2")));
519 }
520 
TEST(StringUtilTest,ToUpperASCII)521 TEST(StringUtilTest, ToUpperASCII) {
522   EXPECT_EQ('C', ToUpperASCII('C'));
523   EXPECT_EQ('C', ToUpperASCII('c'));
524   EXPECT_EQ('2', ToUpperASCII('2'));
525 
526   EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('C')));
527   EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('c')));
528   EXPECT_EQ(static_cast<char16>('2'), ToUpperASCII(static_cast<char16>('2')));
529 
530   EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
531   EXPECT_EQ(ASCIIToUTF16("CC2"), ToUpperASCII(ASCIIToUTF16("Cc2")));
532 }
533 
TEST(StringUtilTest,LowerCaseEqualsASCII)534 TEST(StringUtilTest, LowerCaseEqualsASCII) {
535   static const struct {
536     const char*    src_a;
537     const char*    dst;
538   } lowercase_cases[] = {
539     { "FoO", "foo" },
540     { "foo", "foo" },
541     { "FOO", "foo" },
542   };
543 
544   for (size_t i = 0; i < arraysize(lowercase_cases); ++i) {
545     EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
546                                      lowercase_cases[i].dst));
547     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
548                                      lowercase_cases[i].dst));
549   }
550 }
551 
TEST(StringUtilTest,FormatBytesUnlocalized)552 TEST(StringUtilTest, FormatBytesUnlocalized) {
553   static const struct {
554     int64_t bytes;
555     const char* expected;
556   } cases[] = {
557     // Expected behavior: we show one post-decimal digit when we have
558     // under two pre-decimal digits, except in cases where it makes no
559     // sense (zero or bytes).
560     // Since we switch units once we cross the 1000 mark, this keeps
561     // the display of file sizes or bytes consistently around three
562     // digits.
563     {0, "0 B"},
564     {512, "512 B"},
565     {1024*1024, "1.0 MB"},
566     {1024*1024*1024, "1.0 GB"},
567     {10LL*1024*1024*1024, "10.0 GB"},
568     {99LL*1024*1024*1024, "99.0 GB"},
569     {105LL*1024*1024*1024, "105 GB"},
570     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
571     {~(1LL << 63), "8192 PB"},
572 
573     {99*1024 + 103, "99.1 kB"},
574     {1024*1024 + 103, "1.0 MB"},
575     {1024*1024 + 205 * 1024, "1.2 MB"},
576     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
577     {10LL*1024*1024*1024, "10.0 GB"},
578     {100LL*1024*1024*1024, "100 GB"},
579   };
580 
581   for (size_t i = 0; i < arraysize(cases); ++i) {
582     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
583               FormatBytesUnlocalized(cases[i].bytes));
584   }
585 }
TEST(StringUtilTest,ReplaceSubstringsAfterOffset)586 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
587   static const struct {
588     StringPiece str;
589     size_t start_offset;
590     StringPiece find_this;
591     StringPiece replace_with;
592     StringPiece expected;
593   } cases[] = {
594       {"aaa", 0, "", "b", "aaa"},
595       {"aaa", 1, "", "b", "aaa"},
596       {"aaa", 0, "a", "b", "bbb"},
597       {"aaa", 0, "aa", "b", "ba"},
598       {"aaa", 0, "aa", "bbb", "bbba"},
599       {"aaaaa", 0, "aa", "b", "bba"},
600       {"ababaaababa", 0, "aba", "", "baaba"},
601       {"ababaaababa", 0, "aba", "_", "_baa_ba"},
602       {"ababaaababa", 0, "aba", "__", "__baa__ba"},
603       {"ababaaababa", 0, "aba", "___", "___baa___ba"},
604       {"ababaaababa", 0, "aba", "____", "____baa____ba"},
605       {"ababaaababa", 0, "aba", "_____", "_____baa_____ba"},
606       {"abb", 0, "ab", "a", "ab"},
607       {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
608       {"Not found", 0, "x", "0", "Not found"},
609       {"Not found again", 5, "x", "0", "Not found again"},
610       {" Making it much longer ", 0, " ", "Four score and seven years ago",
611        "Four score and seven years agoMakingFour score and seven years agoit"
612        "Four score and seven years agomuchFour score and seven years agolonger"
613        "Four score and seven years ago"},
614       {" Making it much much much much shorter ", 0,
615        "Making it much much much much shorter", "", "  "},
616       {"so much much much much much very much much much shorter", 0, "much ",
617        "", "so very shorter"},
618       {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
619       {"Replace me only me once", 9, "me ", "", "Replace me only once"},
620       {"abababab", 2, "ab", "c", "abccc"},
621       {"abababab", 1, "ab", "c", "abccc"},
622       {"abababab", 1, "aba", "c", "abcbab"},
623   };
624 
625   // base::string16 variant
626   for (const auto& scenario : cases) {
627     string16 str = ASCIIToUTF16(scenario.str);
628     ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
629                                  ASCIIToUTF16(scenario.find_this),
630                                  ASCIIToUTF16(scenario.replace_with));
631     EXPECT_EQ(ASCIIToUTF16(scenario.expected), str);
632   }
633 
634   // std::string with insufficient capacity: expansion must realloc the buffer.
635   for (const auto& scenario : cases) {
636     std::string str = scenario.str.as_string();
637     str.shrink_to_fit();  // This is nonbinding, but it's the best we've got.
638     ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
639                                  scenario.find_this, scenario.replace_with);
640     EXPECT_EQ(scenario.expected, str);
641   }
642 
643   // std::string with ample capacity: should be possible to grow in-place.
644   for (const auto& scenario : cases) {
645     std::string str = scenario.str.as_string();
646     str.reserve(std::max(scenario.str.length(), scenario.expected.length()) *
647                 2);
648 
649     ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
650                                  scenario.find_this, scenario.replace_with);
651     EXPECT_EQ(scenario.expected, str);
652   }
653 }
654 
TEST(StringUtilTest,ReplaceFirstSubstringAfterOffset)655 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
656   static const struct {
657     const char* str;
658     string16::size_type start_offset;
659     const char* find_this;
660     const char* replace_with;
661     const char* expected;
662   } cases[] = {
663     {"aaa", 0, "a", "b", "baa"},
664     {"abb", 0, "ab", "a", "ab"},
665     {"Removing some substrings inging", 0, "ing", "",
666       "Remov some substrings inging"},
667     {"Not found", 0, "x", "0", "Not found"},
668     {"Not found again", 5, "x", "0", "Not found again"},
669     {" Making it much longer ", 0, " ", "Four score and seven years ago",
670      "Four score and seven years agoMaking it much longer "},
671     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
672     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
673     {"abababab", 2, "ab", "c", "abcabab"},
674   };
675 
676   for (size_t i = 0; i < arraysize(cases); i++) {
677     string16 str = ASCIIToUTF16(cases[i].str);
678     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
679                                      ASCIIToUTF16(cases[i].find_this),
680                                      ASCIIToUTF16(cases[i].replace_with));
681     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
682   }
683 }
684 
TEST(StringUtilTest,HexDigitToInt)685 TEST(StringUtilTest, HexDigitToInt) {
686   EXPECT_EQ(0, HexDigitToInt('0'));
687   EXPECT_EQ(1, HexDigitToInt('1'));
688   EXPECT_EQ(2, HexDigitToInt('2'));
689   EXPECT_EQ(3, HexDigitToInt('3'));
690   EXPECT_EQ(4, HexDigitToInt('4'));
691   EXPECT_EQ(5, HexDigitToInt('5'));
692   EXPECT_EQ(6, HexDigitToInt('6'));
693   EXPECT_EQ(7, HexDigitToInt('7'));
694   EXPECT_EQ(8, HexDigitToInt('8'));
695   EXPECT_EQ(9, HexDigitToInt('9'));
696   EXPECT_EQ(10, HexDigitToInt('A'));
697   EXPECT_EQ(11, HexDigitToInt('B'));
698   EXPECT_EQ(12, HexDigitToInt('C'));
699   EXPECT_EQ(13, HexDigitToInt('D'));
700   EXPECT_EQ(14, HexDigitToInt('E'));
701   EXPECT_EQ(15, HexDigitToInt('F'));
702 
703   // Verify the lower case as well.
704   EXPECT_EQ(10, HexDigitToInt('a'));
705   EXPECT_EQ(11, HexDigitToInt('b'));
706   EXPECT_EQ(12, HexDigitToInt('c'));
707   EXPECT_EQ(13, HexDigitToInt('d'));
708   EXPECT_EQ(14, HexDigitToInt('e'));
709   EXPECT_EQ(15, HexDigitToInt('f'));
710 }
711 
TEST(StringUtilTest,JoinString)712 TEST(StringUtilTest, JoinString) {
713   std::string separator(", ");
714   std::vector<std::string> parts;
715   EXPECT_EQ(std::string(), JoinString(parts, separator));
716 
717   parts.push_back(std::string());
718   EXPECT_EQ(std::string(), JoinString(parts, separator));
719   parts.clear();
720 
721   parts.push_back("a");
722   EXPECT_EQ("a", JoinString(parts, separator));
723 
724   parts.push_back("b");
725   parts.push_back("c");
726   EXPECT_EQ("a, b, c", JoinString(parts, separator));
727 
728   parts.push_back(std::string());
729   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
730   parts.push_back(" ");
731   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
732 }
733 
TEST(StringUtilTest,JoinString16)734 TEST(StringUtilTest, JoinString16) {
735   string16 separator = ASCIIToUTF16(", ");
736   std::vector<string16> parts;
737   EXPECT_EQ(string16(), JoinString(parts, separator));
738 
739   parts.push_back(string16());
740   EXPECT_EQ(string16(), JoinString(parts, separator));
741   parts.clear();
742 
743   parts.push_back(ASCIIToUTF16("a"));
744   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
745 
746   parts.push_back(ASCIIToUTF16("b"));
747   parts.push_back(ASCIIToUTF16("c"));
748   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
749 
750   parts.push_back(ASCIIToUTF16(""));
751   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
752   parts.push_back(ASCIIToUTF16(" "));
753   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
754 }
755 
TEST(StringUtilTest,JoinStringPiece)756 TEST(StringUtilTest, JoinStringPiece) {
757   std::string separator(", ");
758   std::vector<StringPiece> parts;
759   EXPECT_EQ(std::string(), JoinString(parts, separator));
760 
761   // Test empty first part (https://crbug.com/698073).
762   parts.push_back(StringPiece());
763   EXPECT_EQ(std::string(), JoinString(parts, separator));
764   parts.clear();
765 
766   parts.push_back("a");
767   EXPECT_EQ("a", JoinString(parts, separator));
768 
769   parts.push_back("b");
770   parts.push_back("c");
771   EXPECT_EQ("a, b, c", JoinString(parts, separator));
772 
773   parts.push_back(StringPiece());
774   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
775   parts.push_back(" ");
776   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
777 }
778 
TEST(StringUtilTest,JoinStringPiece16)779 TEST(StringUtilTest, JoinStringPiece16) {
780   string16 separator = ASCIIToUTF16(", ");
781   std::vector<StringPiece16> parts;
782   EXPECT_EQ(string16(), JoinString(parts, separator));
783 
784   // Test empty first part (https://crbug.com/698073).
785   parts.push_back(StringPiece16());
786   EXPECT_EQ(string16(), JoinString(parts, separator));
787   parts.clear();
788 
789   const string16 kA = ASCIIToUTF16("a");
790   parts.push_back(kA);
791   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
792 
793   const string16 kB = ASCIIToUTF16("b");
794   parts.push_back(kB);
795   const string16 kC = ASCIIToUTF16("c");
796   parts.push_back(kC);
797   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
798 
799   parts.push_back(StringPiece16());
800   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
801   const string16 kSpace = ASCIIToUTF16(" ");
802   parts.push_back(kSpace);
803   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
804 }
805 
TEST(StringUtilTest,JoinStringInitializerList)806 TEST(StringUtilTest, JoinStringInitializerList) {
807   std::string separator(", ");
808   EXPECT_EQ(std::string(), JoinString({}, separator));
809 
810   // Test empty first part (https://crbug.com/698073).
811   EXPECT_EQ(std::string(), JoinString({StringPiece()}, separator));
812 
813   // With const char*s.
814   EXPECT_EQ("a", JoinString({"a"}, separator));
815   EXPECT_EQ("a, b, c", JoinString({"a", "b", "c"}, separator));
816   EXPECT_EQ("a, b, c, ", JoinString({"a", "b", "c", StringPiece()}, separator));
817   EXPECT_EQ("a|b|c|| ", JoinString({"a", "b", "c", StringPiece(), " "}, "|"));
818 
819   // With std::strings.
820   const std::string kA = "a";
821   const std::string kB = "b";
822   EXPECT_EQ("a, b", JoinString({kA, kB}, separator));
823 
824   // With StringPieces.
825   const StringPiece kPieceA = kA;
826   const StringPiece kPieceB = kB;
827   EXPECT_EQ("a, b", JoinString({kPieceA, kPieceB}, separator));
828 }
829 
TEST(StringUtilTest,JoinStringInitializerList16)830 TEST(StringUtilTest, JoinStringInitializerList16) {
831   string16 separator = ASCIIToUTF16(", ");
832   EXPECT_EQ(string16(), JoinString({}, separator));
833 
834   // Test empty first part (https://crbug.com/698073).
835   EXPECT_EQ(string16(), JoinString({StringPiece16()}, separator));
836 
837   // With string16s.
838   const string16 kA = ASCIIToUTF16("a");
839   EXPECT_EQ(ASCIIToUTF16("a"), JoinString({kA}, separator));
840 
841   const string16 kB = ASCIIToUTF16("b");
842   const string16 kC = ASCIIToUTF16("c");
843   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString({kA, kB, kC}, separator));
844 
845   EXPECT_EQ(ASCIIToUTF16("a, b, c, "),
846             JoinString({kA, kB, kC, StringPiece16()}, separator));
847   const string16 kSpace = ASCIIToUTF16(" ");
848   EXPECT_EQ(
849       ASCIIToUTF16("a|b|c|| "),
850       JoinString({kA, kB, kC, StringPiece16(), kSpace}, ASCIIToUTF16("|")));
851 
852   // With StringPiece16s.
853   const StringPiece16 kPieceA = kA;
854   const StringPiece16 kPieceB = kB;
855   EXPECT_EQ(ASCIIToUTF16("a, b"), JoinString({kPieceA, kPieceB}, separator));
856 }
857 
TEST(StringUtilTest,StartsWith)858 TEST(StringUtilTest, StartsWith) {
859   EXPECT_TRUE(StartsWith("javascript:url", "javascript",
860                          base::CompareCase::SENSITIVE));
861   EXPECT_FALSE(StartsWith("JavaScript:url", "javascript",
862                           base::CompareCase::SENSITIVE));
863   EXPECT_TRUE(StartsWith("javascript:url", "javascript",
864                          base::CompareCase::INSENSITIVE_ASCII));
865   EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",
866                          base::CompareCase::INSENSITIVE_ASCII));
867   EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));
868   EXPECT_FALSE(StartsWith("java", "javascript",
869                           base::CompareCase::INSENSITIVE_ASCII));
870   EXPECT_FALSE(StartsWith(std::string(), "javascript",
871                           base::CompareCase::INSENSITIVE_ASCII));
872   EXPECT_FALSE(StartsWith(std::string(), "javascript",
873                           base::CompareCase::SENSITIVE));
874   EXPECT_TRUE(StartsWith("java", std::string(),
875                          base::CompareCase::INSENSITIVE_ASCII));
876   EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));
877 
878   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
879                          ASCIIToUTF16("javascript"),
880                          base::CompareCase::SENSITIVE));
881   EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
882                           ASCIIToUTF16("javascript"),
883                           base::CompareCase::SENSITIVE));
884   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
885                          ASCIIToUTF16("javascript"),
886                          base::CompareCase::INSENSITIVE_ASCII));
887   EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
888                          ASCIIToUTF16("javascript"),
889                          base::CompareCase::INSENSITIVE_ASCII));
890   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
891                           base::CompareCase::SENSITIVE));
892   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
893                           base::CompareCase::INSENSITIVE_ASCII));
894   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
895                           base::CompareCase::INSENSITIVE_ASCII));
896   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
897                           base::CompareCase::SENSITIVE));
898   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
899                          base::CompareCase::INSENSITIVE_ASCII));
900   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
901                          base::CompareCase::SENSITIVE));
902 }
903 
TEST(StringUtilTest,EndsWith)904 TEST(StringUtilTest, EndsWith) {
905   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
906                        base::CompareCase::SENSITIVE));
907   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
908                         base::CompareCase::SENSITIVE));
909   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
910                        base::CompareCase::INSENSITIVE_ASCII));
911   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
912                        base::CompareCase::INSENSITIVE_ASCII));
913   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
914                         base::CompareCase::SENSITIVE));
915   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
916                         base::CompareCase::INSENSITIVE_ASCII));
917   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
918                         base::CompareCase::SENSITIVE));
919   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
920                         base::CompareCase::INSENSITIVE_ASCII));
921   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
922                         base::CompareCase::INSENSITIVE_ASCII));
923   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
924                         base::CompareCase::SENSITIVE));
925   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
926                        base::CompareCase::INSENSITIVE_ASCII));
927   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
928                        base::CompareCase::SENSITIVE));
929   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
930                        base::CompareCase::INSENSITIVE_ASCII));
931   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
932                        base::CompareCase::SENSITIVE));
933   EXPECT_TRUE(
934       EndsWith(string16(), string16(), base::CompareCase::INSENSITIVE_ASCII));
935   EXPECT_TRUE(EndsWith(string16(), string16(), base::CompareCase::SENSITIVE));
936 }
937 
TEST(StringUtilTest,GetStringFWithOffsets)938 TEST(StringUtilTest, GetStringFWithOffsets) {
939   std::vector<string16> subst;
940   subst.push_back(ASCIIToUTF16("1"));
941   subst.push_back(ASCIIToUTF16("2"));
942   std::vector<size_t> offsets;
943 
944   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
945                             subst,
946                             &offsets);
947   EXPECT_EQ(2U, offsets.size());
948   EXPECT_EQ(7U, offsets[0]);
949   EXPECT_EQ(25U, offsets[1]);
950   offsets.clear();
951 
952   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
953                             subst,
954                             &offsets);
955   EXPECT_EQ(2U, offsets.size());
956   EXPECT_EQ(25U, offsets[0]);
957   EXPECT_EQ(7U, offsets[1]);
958   offsets.clear();
959 }
960 
TEST(StringUtilTest,ReplaceStringPlaceholdersTooFew)961 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
962   // Test whether replacestringplaceholders works as expected when there
963   // are fewer inputs than outputs.
964   std::vector<string16> subst;
965   subst.push_back(ASCIIToUTF16("9a"));
966   subst.push_back(ASCIIToUTF16("8b"));
967   subst.push_back(ASCIIToUTF16("7c"));
968 
969   string16 formatted =
970       ReplaceStringPlaceholders(
971           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, nullptr);
972 
973   EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"), formatted);
974 }
975 
TEST(StringUtilTest,ReplaceStringPlaceholders)976 TEST(StringUtilTest, ReplaceStringPlaceholders) {
977   std::vector<string16> subst;
978   subst.push_back(ASCIIToUTF16("9a"));
979   subst.push_back(ASCIIToUTF16("8b"));
980   subst.push_back(ASCIIToUTF16("7c"));
981   subst.push_back(ASCIIToUTF16("6d"));
982   subst.push_back(ASCIIToUTF16("5e"));
983   subst.push_back(ASCIIToUTF16("4f"));
984   subst.push_back(ASCIIToUTF16("3g"));
985   subst.push_back(ASCIIToUTF16("2h"));
986   subst.push_back(ASCIIToUTF16("1i"));
987 
988   string16 formatted =
989       ReplaceStringPlaceholders(
990           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, nullptr);
991 
992   EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"), formatted);
993 }
994 
TEST(StringUtilTest,ReplaceStringPlaceholdersNetExpansionWithContraction)995 TEST(StringUtilTest, ReplaceStringPlaceholdersNetExpansionWithContraction) {
996   // In this test, some of the substitutions are shorter than the placeholders,
997   // but overall the string gets longer.
998   std::vector<string16> subst;
999   subst.push_back(ASCIIToUTF16("9a____"));
1000   subst.push_back(ASCIIToUTF16("B"));
1001   subst.push_back(ASCIIToUTF16("7c___"));
1002   subst.push_back(ASCIIToUTF16("d"));
1003   subst.push_back(ASCIIToUTF16("5e____"));
1004   subst.push_back(ASCIIToUTF16("F"));
1005   subst.push_back(ASCIIToUTF16("3g___"));
1006   subst.push_back(ASCIIToUTF16("h"));
1007   subst.push_back(ASCIIToUTF16("1i_____"));
1008 
1009   string16 original = ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i");
1010   string16 expected =
1011       ASCIIToUTF16("9a____a,Bb,7c___c,dd,5e____e,Ff,3g___g,hh,1i_____i");
1012 
1013   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1014 
1015   std::vector<size_t> offsets;
1016   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1017   std::vector<size_t> expected_offsets = {0, 8, 11, 18, 21, 29, 32, 39, 42};
1018   EXPECT_EQ(offsets.size(), subst.size());
1019   EXPECT_EQ(expected_offsets, offsets);
1020   for (size_t i = 0; i < offsets.size(); i++) {
1021     EXPECT_EQ(expected.substr(expected_offsets[i], subst[i].length()),
1022               subst[i]);
1023   }
1024 }
1025 
TEST(StringUtilTest,ReplaceStringPlaceholdersNetContractionWithExpansion)1026 TEST(StringUtilTest, ReplaceStringPlaceholdersNetContractionWithExpansion) {
1027   // In this test, some of the substitutions are longer than the placeholders,
1028   // but overall the string gets smaller. Additionally, the placeholders appear
1029   // in a permuted order.
1030   std::vector<string16> subst;
1031   subst.push_back(ASCIIToUTF16("z"));
1032   subst.push_back(ASCIIToUTF16("y"));
1033   subst.push_back(ASCIIToUTF16("XYZW"));
1034   subst.push_back(ASCIIToUTF16("x"));
1035   subst.push_back(ASCIIToUTF16("w"));
1036 
1037   string16 formatted =
1038       ReplaceStringPlaceholders(ASCIIToUTF16("$3_$4$2$1$5"), subst, nullptr);
1039 
1040   EXPECT_EQ(ASCIIToUTF16("XYZW_xyzw"), formatted);
1041 }
1042 
TEST(StringUtilTest,ReplaceStringPlaceholdersOneDigit)1043 TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) {
1044   std::vector<string16> subst;
1045   subst.push_back(ASCIIToUTF16("1a"));
1046   string16 formatted =
1047       ReplaceStringPlaceholders(ASCIIToUTF16(" $16 "), subst, nullptr);
1048   EXPECT_EQ(ASCIIToUTF16(" 1a6 "), formatted);
1049 }
1050 
TEST(StringUtilTest,ReplaceStringPlaceholdersInvalidPlaceholder)1051 TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) {
1052   std::vector<string16> subst;
1053   subst.push_back(ASCIIToUTF16("1a"));
1054   string16 formatted =
1055       ReplaceStringPlaceholders(ASCIIToUTF16("+$-+$A+$1+"), subst, nullptr);
1056   EXPECT_EQ(ASCIIToUTF16("+++1a+"), formatted);
1057 }
1058 
TEST(StringUtilTest,StdStringReplaceStringPlaceholders)1059 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
1060   std::vector<std::string> subst;
1061   subst.push_back("9a");
1062   subst.push_back("8b");
1063   subst.push_back("7c");
1064   subst.push_back("6d");
1065   subst.push_back("5e");
1066   subst.push_back("4f");
1067   subst.push_back("3g");
1068   subst.push_back("2h");
1069   subst.push_back("1i");
1070 
1071   std::string formatted =
1072       ReplaceStringPlaceholders(
1073           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
1074 
1075   EXPECT_EQ("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
1076 }
1077 
TEST(StringUtilTest,StdStringReplaceStringPlaceholdersMultipleMatches)1078 TEST(StringUtilTest, StdStringReplaceStringPlaceholdersMultipleMatches) {
1079   std::vector<std::string> subst;
1080   subst.push_back("4");   // Referenced twice.
1081   subst.push_back("?");   // Unreferenced.
1082   subst.push_back("!");   // Unreferenced.
1083   subst.push_back("16");  // Referenced once.
1084 
1085   std::string original = "$1 * $1 == $4";
1086   std::string expected = "4 * 4 == 16";
1087   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1088   std::vector<size_t> offsets;
1089   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1090   std::vector<size_t> expected_offsets = {0, 4, 9};
1091   EXPECT_EQ(expected_offsets, offsets);
1092 }
1093 
TEST(StringUtilTest,ReplaceStringPlaceholdersConsecutiveDollarSigns)1094 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
1095   std::vector<std::string> subst;
1096   subst.push_back("a");
1097   subst.push_back("b");
1098   subst.push_back("c");
1099   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, nullptr),
1100             "$1 $$2 $$$3");
1101 }
1102 
TEST(StringUtilTest,LcpyTest)1103 TEST(StringUtilTest, LcpyTest) {
1104   // Test the normal case where we fit in our buffer.
1105   {
1106     char dst[10];
1107     wchar_t wdst[10];
1108     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1109     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1110     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1111     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1112   }
1113 
1114   // Test dst_size == 0, nothing should be written to |dst| and we should
1115   // have the equivalent of strlen(src).
1116   {
1117     char dst[2] = {1, 2};
1118     wchar_t wdst[2] = {1, 2};
1119     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
1120     EXPECT_EQ(1, dst[0]);
1121     EXPECT_EQ(2, dst[1]);
1122     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
1123     EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1124     EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
1125   }
1126 
1127   // Test the case were we _just_ competely fit including the null.
1128   {
1129     char dst[8];
1130     wchar_t wdst[8];
1131     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1132     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1133     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1134     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1135   }
1136 
1137   // Test the case were we we are one smaller, so we can't fit the null.
1138   {
1139     char dst[7];
1140     wchar_t wdst[7];
1141     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1142     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1143     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1144     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1145   }
1146 
1147   // Test the case were we are just too small.
1148   {
1149     char dst[3];
1150     wchar_t wdst[3];
1151     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1152     EXPECT_EQ(0, memcmp(dst, "ab", 3));
1153     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1154     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1155   }
1156 }
1157 
TEST(StringUtilTest,WprintfFormatPortabilityTest)1158 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1159   static const struct {
1160     const wchar_t* input;
1161     bool portable;
1162   } cases[] = {
1163     { L"%ls", true },
1164     { L"%s", false },
1165     { L"%S", false },
1166     { L"%lS", false },
1167     { L"Hello, %s", false },
1168     { L"%lc", true },
1169     { L"%c", false },
1170     { L"%C", false },
1171     { L"%lC", false },
1172     { L"%ls %s", false },
1173     { L"%s %ls", false },
1174     { L"%s %ls %s", false },
1175     { L"%f", true },
1176     { L"%f %F", false },
1177     { L"%d %D", false },
1178     { L"%o %O", false },
1179     { L"%u %U", false },
1180     { L"%f %d %o %u", true },
1181     { L"%-8d (%02.1f%)", true },
1182     { L"% 10s", false },
1183     { L"% 10ls", true }
1184   };
1185   for (size_t i = 0; i < arraysize(cases); ++i)
1186     EXPECT_EQ(cases[i].portable, IsWprintfFormatPortable(cases[i].input));
1187 }
1188 
TEST(StringUtilTest,RemoveChars)1189 TEST(StringUtilTest, RemoveChars) {
1190   const char kRemoveChars[] = "-/+*";
1191   std::string input = "A-+bc/d!*";
1192   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1193   EXPECT_EQ("Abcd!", input);
1194 
1195   // No characters match kRemoveChars.
1196   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1197   EXPECT_EQ("Abcd!", input);
1198 
1199   // Empty string.
1200   input.clear();
1201   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1202   EXPECT_EQ(std::string(), input);
1203 }
1204 
TEST(StringUtilTest,ReplaceChars)1205 TEST(StringUtilTest, ReplaceChars) {
1206   struct TestData {
1207     const char* input;
1208     const char* replace_chars;
1209     const char* replace_with;
1210     const char* output;
1211     bool result;
1212   } cases[] = {
1213       {"", "", "", "", false},
1214       {"t", "t", "t", "t", true},
1215       {"a", "b", "c", "a", false},
1216       {"b", "b", "c", "c", true},
1217       {"bob", "b", "p", "pop", true},
1218       {"bob", "o", "i", "bib", true},
1219       {"test", "", "", "test", false},
1220       {"test", "", "!", "test", false},
1221       {"test", "z", "!", "test", false},
1222       {"test", "e", "!", "t!st", true},
1223       {"test", "e", "!?", "t!?st", true},
1224       {"test", "ez", "!", "t!st", true},
1225       {"test", "zed", "!?", "t!?st", true},
1226       {"test", "t", "!?", "!?es!?", true},
1227       {"test", "et", "!>", "!>!>s!>", true},
1228       {"test", "zest", "!", "!!!!", true},
1229       {"test", "szt", "!", "!e!!", true},
1230       {"test", "t", "test", "testestest", true},
1231       {"tetst", "t", "test", "testeteststest", true},
1232       {"ttttttt", "t", "-", "-------", true},
1233       {"aAaAaAAaAAa", "A", "", "aaaaa", true},
1234       {"xxxxxxxxxx", "x", "", "", true},
1235       {"xxxxxxxxxx", "x", "x", "xxxxxxxxxx", true},
1236       {"xxxxxxxxxx", "x", "y-", "y-y-y-y-y-y-y-y-y-y-", true},
1237       {"xxxxxxxxxx", "x", "xy", "xyxyxyxyxyxyxyxyxyxy", true},
1238       {"xxxxxxxxxx", "x", "zyx", "zyxzyxzyxzyxzyxzyxzyxzyxzyxzyx", true},
1239       {"xaxxaxxxaxxxax", "x", "xy", "xyaxyxyaxyxyxyaxyxyxyaxy", true},
1240       {"-xaxxaxxxaxxxax-", "x", "xy", "-xyaxyxyaxyxyxyaxyxyxyaxy-", true},
1241   };
1242 
1243   for (const TestData& scenario : cases) {
1244     // Test with separate output and input vars.
1245     std::string output;
1246     bool result = ReplaceChars(scenario.input, scenario.replace_chars,
1247                                scenario.replace_with, &output);
1248     EXPECT_EQ(scenario.result, result) << scenario.input;
1249     EXPECT_EQ(scenario.output, output);
1250   }
1251 
1252   for (const TestData& scenario : cases) {
1253     // Test with an input/output var of limited capacity.
1254     std::string input_output = scenario.input;
1255     input_output.shrink_to_fit();
1256     bool result = ReplaceChars(input_output, scenario.replace_chars,
1257                                scenario.replace_with, &input_output);
1258     EXPECT_EQ(scenario.result, result) << scenario.input;
1259     EXPECT_EQ(scenario.output, input_output);
1260   }
1261 
1262   for (const TestData& scenario : cases) {
1263     // Test with an input/output var of ample capacity; should
1264     // not realloc.
1265     std::string input_output = scenario.input;
1266     input_output.reserve(strlen(scenario.output) * 2);
1267     const void* original_buffer = input_output.data();
1268     bool result = ReplaceChars(input_output, scenario.replace_chars,
1269                                scenario.replace_with, &input_output);
1270     EXPECT_EQ(scenario.result, result) << scenario.input;
1271     EXPECT_EQ(scenario.output, input_output);
1272     EXPECT_EQ(original_buffer, input_output.data());
1273   }
1274 }
1275 
TEST(StringUtilTest,ContainsOnlyChars)1276 TEST(StringUtilTest, ContainsOnlyChars) {
1277   // Providing an empty list of characters should return false but for the empty
1278   // string.
1279   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1280   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1281 
1282   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1283   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1284   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1285   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1286   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1287 
1288   EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1289   EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1290   EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1291   EXPECT_TRUE(ContainsOnlyChars("\t \r \n  ", kWhitespaceASCII));
1292   EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1293   EXPECT_FALSE(ContainsOnlyChars("\thello\r \n  ", kWhitespaceASCII));
1294 
1295   EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1296   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1297   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1298   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n  "), kWhitespaceUTF16));
1299   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1300   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n  "),
1301                                   kWhitespaceUTF16));
1302 }
1303 
TEST(StringUtilTest,CompareCaseInsensitiveASCII)1304 TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
1305   EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));
1306   EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));
1307 
1308   // Differing lengths.
1309   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));
1310   EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));
1311 
1312   // Differing values.
1313   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
1314   EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
1315 }
1316 
TEST(StringUtilTest,EqualsCaseInsensitiveASCII)1317 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
1318   EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));
1319   EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));
1320   EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));
1321   EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));
1322 }
1323 
TEST(StringUtilTest,IsUnicodeWhitespace)1324 TEST(StringUtilTest, IsUnicodeWhitespace) {
1325   // NOT unicode white space.
1326   EXPECT_FALSE(IsUnicodeWhitespace(L'\0'));
1327   EXPECT_FALSE(IsUnicodeWhitespace(L'A'));
1328   EXPECT_FALSE(IsUnicodeWhitespace(L'0'));
1329   EXPECT_FALSE(IsUnicodeWhitespace(L'.'));
1330   EXPECT_FALSE(IsUnicodeWhitespace(L';'));
1331   EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100'));
1332 
1333   // Actual unicode whitespace.
1334   EXPECT_TRUE(IsUnicodeWhitespace(L' '));
1335   EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0'));
1336   EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000'));
1337   EXPECT_TRUE(IsUnicodeWhitespace(L'\t'));
1338   EXPECT_TRUE(IsUnicodeWhitespace(L'\r'));
1339   EXPECT_TRUE(IsUnicodeWhitespace(L'\v'));
1340   EXPECT_TRUE(IsUnicodeWhitespace(L'\f'));
1341   EXPECT_TRUE(IsUnicodeWhitespace(L'\n'));
1342 }
1343 
1344 class WriteIntoTest : public testing::Test {
1345  protected:
WritesCorrectly(size_t num_chars)1346   static void WritesCorrectly(size_t num_chars) {
1347     std::string buffer;
1348     char kOriginal[] = "supercali";
1349     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1350     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1351     // string at the first \0.
1352     EXPECT_EQ(std::string(kOriginal,
1353                           std::min(num_chars, arraysize(kOriginal) - 1)),
1354               std::string(buffer.c_str()));
1355     EXPECT_EQ(num_chars, buffer.size());
1356   }
1357 };
1358 
TEST_F(WriteIntoTest,WriteInto)1359 TEST_F(WriteIntoTest, WriteInto) {
1360   // Validate that WriteInto reserves enough space and
1361   // sizes a string correctly.
1362   WritesCorrectly(1);
1363   WritesCorrectly(2);
1364   WritesCorrectly(5000);
1365 
1366   // Validate that WriteInto doesn't modify other strings
1367   // when using a Copy-on-Write implementation.
1368   const char kLive[] = "live";
1369   const char kDead[] = "dead";
1370   const std::string live = kLive;
1371   std::string dead = live;
1372   strncpy(WriteInto(&dead, 5), kDead, 4);
1373   EXPECT_EQ(kDead, dead);
1374   EXPECT_EQ(4u, dead.size());
1375   EXPECT_EQ(kLive, live);
1376   EXPECT_EQ(4u, live.size());
1377 }
1378 
1379 }  // namespace base
1380