1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/escaping.h"
16
17 #include <array>
18 #include <cstdio>
19 #include <cstring>
20 #include <memory>
21 #include <vector>
22
23 #include "gmock/gmock.h"
24 #include "gtest/gtest.h"
25 #include "absl/container/fixed_array.h"
26 #include "absl/strings/str_cat.h"
27
28 #include "absl/strings/internal/escaping_test_common.h"
29
30 namespace {
31
32 struct epair {
33 std::string escaped;
34 std::string unescaped;
35 };
36
TEST(CEscape,EscapeAndUnescape)37 TEST(CEscape, EscapeAndUnescape) {
38 const std::string inputs[] = {
39 std::string("foo\nxx\r\b\0023"),
40 std::string(""),
41 std::string("abc"),
42 std::string("\1chad_rules"),
43 std::string("\1arnar_drools"),
44 std::string("xxxx\r\t'\"\\"),
45 std::string("\0xx\0", 4),
46 std::string("\x01\x31"),
47 std::string("abc\xb\x42\141bc"),
48 std::string("123\1\x31\x32\x33"),
49 std::string("\xc1\xca\x1b\x62\x19o\xcc\x04"),
50 std::string(
51 "\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name"),
52 };
53 // Do this twice, once for octal escapes and once for hex escapes.
54 for (int kind = 0; kind < 4; kind++) {
55 for (const std::string& original : inputs) {
56 std::string escaped;
57 switch (kind) {
58 case 0:
59 escaped = absl::CEscape(original);
60 break;
61 case 1:
62 escaped = absl::CHexEscape(original);
63 break;
64 case 2:
65 escaped = absl::Utf8SafeCEscape(original);
66 break;
67 case 3:
68 escaped = absl::Utf8SafeCHexEscape(original);
69 break;
70 }
71 std::string unescaped_str;
72 EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str));
73 EXPECT_EQ(unescaped_str, original);
74
75 unescaped_str.erase();
76 std::string error;
77 EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str, &error));
78 EXPECT_EQ(error, "");
79
80 // Check in-place unescaping
81 std::string s = escaped;
82 EXPECT_TRUE(absl::CUnescape(s, &s));
83 ASSERT_EQ(s, original);
84 }
85 }
86 // Check that all possible two character strings can be escaped then
87 // unescaped successfully.
88 for (int char0 = 0; char0 < 256; char0++) {
89 for (int char1 = 0; char1 < 256; char1++) {
90 char chars[2];
91 chars[0] = char0;
92 chars[1] = char1;
93 std::string s(chars, 2);
94 std::string escaped = absl::CHexEscape(s);
95 std::string unescaped;
96 EXPECT_TRUE(absl::CUnescape(escaped, &unescaped));
97 EXPECT_EQ(s, unescaped);
98 }
99 }
100 }
101
TEST(CEscape,BasicEscaping)102 TEST(CEscape, BasicEscaping) {
103 epair oct_values[] = {
104 {"foo\\rbar\\nbaz\\t", "foo\rbar\nbaz\t"},
105 {"\\'full of \\\"sound\\\" and \\\"fury\\\"\\'",
106 "'full of \"sound\" and \"fury\"'"},
107 {"signi\\\\fying\\\\ nothing\\\\", "signi\\fying\\ nothing\\"},
108 {"\\010\\t\\n\\013\\014\\r", "\010\011\012\013\014\015"}
109 };
110 epair hex_values[] = {
111 {"ubik\\rubik\\nubik\\t", "ubik\rubik\nubik\t"},
112 {"I\\\'ve just seen a \\\"face\\\"",
113 "I've just seen a \"face\""},
114 {"hel\\\\ter\\\\skel\\\\ter\\\\", "hel\\ter\\skel\\ter\\"},
115 {"\\x08\\t\\n\\x0b\\x0c\\r", "\010\011\012\013\014\015"}
116 };
117 epair utf8_oct_values[] = {
118 {"\xe8\xb0\xb7\xe6\xad\x8c\\r\xe8\xb0\xb7\xe6\xad\x8c\\nbaz\\t",
119 "\xe8\xb0\xb7\xe6\xad\x8c\r\xe8\xb0\xb7\xe6\xad\x8c\nbaz\t"},
120 {"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name",
121 "\"\xe8\xb0\xb7\xe6\xad\x8c\" is Google\'s Chinese name"},
122 {"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\\\are\\\\Japanese\\\\chars\\\\",
123 "\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\are\\Japanese\\chars\\"},
124 {"\xed\x81\xac\xeb\xa1\xac\\010\\t\\n\\013\\014\\r",
125 "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
126 };
127 epair utf8_hex_values[] = {
128 {"\x20\xe4\xbd\xa0\\t\xe5\xa5\xbd,\\r!\\n",
129 "\x20\xe4\xbd\xa0\t\xe5\xa5\xbd,\r!\n"},
130 {"\xe8\xa9\xa6\xe9\xa8\x93\\\' means \\\"test\\\"",
131 "\xe8\xa9\xa6\xe9\xa8\x93\' means \"test\""},
132 {"\\\\\xe6\x88\x91\\\\:\\\\\xe6\x9d\xa8\xe6\xac\xa2\\\\",
133 "\\\xe6\x88\x91\\:\\\xe6\x9d\xa8\xe6\xac\xa2\\"},
134 {"\xed\x81\xac\xeb\xa1\xac\\x08\\t\\n\\x0b\\x0c\\r",
135 "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
136 };
137
138 for (const epair& val : oct_values) {
139 std::string escaped = absl::CEscape(val.unescaped);
140 EXPECT_EQ(escaped, val.escaped);
141 }
142 for (const epair& val : hex_values) {
143 std::string escaped = absl::CHexEscape(val.unescaped);
144 EXPECT_EQ(escaped, val.escaped);
145 }
146 for (const epair& val : utf8_oct_values) {
147 std::string escaped = absl::Utf8SafeCEscape(val.unescaped);
148 EXPECT_EQ(escaped, val.escaped);
149 }
150 for (const epair& val : utf8_hex_values) {
151 std::string escaped = absl::Utf8SafeCHexEscape(val.unescaped);
152 EXPECT_EQ(escaped, val.escaped);
153 }
154 }
155
TEST(Unescape,BasicFunction)156 TEST(Unescape, BasicFunction) {
157 epair tests[] =
158 {{"", ""},
159 {"\\u0030", "0"},
160 {"\\u00A3", "\xC2\xA3"},
161 {"\\u22FD", "\xE2\x8B\xBD"},
162 {"\\U00010000", "\xF0\x90\x80\x80"},
163 {"\\U0010FFFD", "\xF4\x8F\xBF\xBD"}};
164 for (const epair& val : tests) {
165 std::string out;
166 EXPECT_TRUE(absl::CUnescape(val.escaped, &out));
167 EXPECT_EQ(out, val.unescaped);
168 }
169 std::string bad[] = {"\\u1", // too short
170 "\\U1", // too short
171 "\\Uffffff", // exceeds 0x10ffff (largest Unicode)
172 "\\U00110000", // exceeds 0x10ffff (largest Unicode)
173 "\\uD835", // surrogate character (D800-DFFF)
174 "\\U0000DD04", // surrogate character (D800-DFFF)
175 "\\777", // exceeds 0xff
176 "\\xABCD"}; // exceeds 0xff
177 for (const std::string& e : bad) {
178 std::string error;
179 std::string out;
180 EXPECT_FALSE(absl::CUnescape(e, &out, &error));
181 EXPECT_FALSE(error.empty());
182
183 out.erase();
184 EXPECT_FALSE(absl::CUnescape(e, &out));
185 }
186 }
187
188 class CUnescapeTest : public testing::Test {
189 protected:
190 static const char kStringWithMultipleOctalNulls[];
191 static const char kStringWithMultipleHexNulls[];
192 static const char kStringWithMultipleUnicodeNulls[];
193
194 std::string result_string_;
195 };
196
197 const char CUnescapeTest::kStringWithMultipleOctalNulls[] =
198 "\\0\\n" // null escape \0 plus newline
199 "0\\n" // just a number 0 (not a null escape) plus newline
200 "\\00\\12" // null escape \00 plus octal newline code
201 "\\000"; // null escape \000
202
203 // This has the same ingredients as kStringWithMultipleOctalNulls
204 // but with \x hex escapes instead of octal escapes.
205 const char CUnescapeTest::kStringWithMultipleHexNulls[] =
206 "\\x0\\n"
207 "0\\n"
208 "\\x00\\xa"
209 "\\x000";
210
211 const char CUnescapeTest::kStringWithMultipleUnicodeNulls[] =
212 "\\u0000\\n" // short-form (4-digit) null escape plus newline
213 "0\\n" // just a number 0 (not a null escape) plus newline
214 "\\U00000000"; // long-form (8-digit) null escape
215
TEST_F(CUnescapeTest,Unescapes1CharOctalNull)216 TEST_F(CUnescapeTest, Unescapes1CharOctalNull) {
217 std::string original_string = "\\0";
218 EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
219 EXPECT_EQ(std::string("\0", 1), result_string_);
220 }
221
TEST_F(CUnescapeTest,Unescapes2CharOctalNull)222 TEST_F(CUnescapeTest, Unescapes2CharOctalNull) {
223 std::string original_string = "\\00";
224 EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
225 EXPECT_EQ(std::string("\0", 1), result_string_);
226 }
227
TEST_F(CUnescapeTest,Unescapes3CharOctalNull)228 TEST_F(CUnescapeTest, Unescapes3CharOctalNull) {
229 std::string original_string = "\\000";
230 EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
231 EXPECT_EQ(std::string("\0", 1), result_string_);
232 }
233
TEST_F(CUnescapeTest,Unescapes1CharHexNull)234 TEST_F(CUnescapeTest, Unescapes1CharHexNull) {
235 std::string original_string = "\\x0";
236 EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
237 EXPECT_EQ(std::string("\0", 1), result_string_);
238 }
239
TEST_F(CUnescapeTest,Unescapes2CharHexNull)240 TEST_F(CUnescapeTest, Unescapes2CharHexNull) {
241 std::string original_string = "\\x00";
242 EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
243 EXPECT_EQ(std::string("\0", 1), result_string_);
244 }
245
TEST_F(CUnescapeTest,Unescapes3CharHexNull)246 TEST_F(CUnescapeTest, Unescapes3CharHexNull) {
247 std::string original_string = "\\x000";
248 EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
249 EXPECT_EQ(std::string("\0", 1), result_string_);
250 }
251
TEST_F(CUnescapeTest,Unescapes4CharUnicodeNull)252 TEST_F(CUnescapeTest, Unescapes4CharUnicodeNull) {
253 std::string original_string = "\\u0000";
254 EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
255 EXPECT_EQ(std::string("\0", 1), result_string_);
256 }
257
TEST_F(CUnescapeTest,Unescapes8CharUnicodeNull)258 TEST_F(CUnescapeTest, Unescapes8CharUnicodeNull) {
259 std::string original_string = "\\U00000000";
260 EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
261 EXPECT_EQ(std::string("\0", 1), result_string_);
262 }
263
TEST_F(CUnescapeTest,UnescapesMultipleOctalNulls)264 TEST_F(CUnescapeTest, UnescapesMultipleOctalNulls) {
265 std::string original_string(kStringWithMultipleOctalNulls);
266 EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
267 // All escapes, including newlines and null escapes, should have been
268 // converted to the equivalent characters.
269 EXPECT_EQ(std::string("\0\n"
270 "0\n"
271 "\0\n"
272 "\0",
273 7),
274 result_string_);
275 }
276
277
TEST_F(CUnescapeTest,UnescapesMultipleHexNulls)278 TEST_F(CUnescapeTest, UnescapesMultipleHexNulls) {
279 std::string original_string(kStringWithMultipleHexNulls);
280 EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
281 EXPECT_EQ(std::string("\0\n"
282 "0\n"
283 "\0\n"
284 "\0",
285 7),
286 result_string_);
287 }
288
TEST_F(CUnescapeTest,UnescapesMultipleUnicodeNulls)289 TEST_F(CUnescapeTest, UnescapesMultipleUnicodeNulls) {
290 std::string original_string(kStringWithMultipleUnicodeNulls);
291 EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
292 EXPECT_EQ(std::string("\0\n"
293 "0\n"
294 "\0",
295 5),
296 result_string_);
297 }
298
299 static struct {
300 absl::string_view plaintext;
301 absl::string_view cyphertext;
302 } const base64_tests[] = {
303 // Empty string.
304 {{"", 0}, {"", 0}},
305 {{nullptr, 0},
306 {"", 0}}, // if length is zero, plaintext ptr must be ignored!
307
308 // Basic bit patterns;
309 // values obtained with "echo -n '...' | uuencode -m test"
310
311 {{"\000", 1}, "AA=="},
312 {{"\001", 1}, "AQ=="},
313 {{"\002", 1}, "Ag=="},
314 {{"\004", 1}, "BA=="},
315 {{"\010", 1}, "CA=="},
316 {{"\020", 1}, "EA=="},
317 {{"\040", 1}, "IA=="},
318 {{"\100", 1}, "QA=="},
319 {{"\200", 1}, "gA=="},
320
321 {{"\377", 1}, "/w=="},
322 {{"\376", 1}, "/g=="},
323 {{"\375", 1}, "/Q=="},
324 {{"\373", 1}, "+w=="},
325 {{"\367", 1}, "9w=="},
326 {{"\357", 1}, "7w=="},
327 {{"\337", 1}, "3w=="},
328 {{"\277", 1}, "vw=="},
329 {{"\177", 1}, "fw=="},
330 {{"\000\000", 2}, "AAA="},
331 {{"\000\001", 2}, "AAE="},
332 {{"\000\002", 2}, "AAI="},
333 {{"\000\004", 2}, "AAQ="},
334 {{"\000\010", 2}, "AAg="},
335 {{"\000\020", 2}, "ABA="},
336 {{"\000\040", 2}, "ACA="},
337 {{"\000\100", 2}, "AEA="},
338 {{"\000\200", 2}, "AIA="},
339 {{"\001\000", 2}, "AQA="},
340 {{"\002\000", 2}, "AgA="},
341 {{"\004\000", 2}, "BAA="},
342 {{"\010\000", 2}, "CAA="},
343 {{"\020\000", 2}, "EAA="},
344 {{"\040\000", 2}, "IAA="},
345 {{"\100\000", 2}, "QAA="},
346 {{"\200\000", 2}, "gAA="},
347
348 {{"\377\377", 2}, "//8="},
349 {{"\377\376", 2}, "//4="},
350 {{"\377\375", 2}, "//0="},
351 {{"\377\373", 2}, "//s="},
352 {{"\377\367", 2}, "//c="},
353 {{"\377\357", 2}, "/+8="},
354 {{"\377\337", 2}, "/98="},
355 {{"\377\277", 2}, "/78="},
356 {{"\377\177", 2}, "/38="},
357 {{"\376\377", 2}, "/v8="},
358 {{"\375\377", 2}, "/f8="},
359 {{"\373\377", 2}, "+/8="},
360 {{"\367\377", 2}, "9/8="},
361 {{"\357\377", 2}, "7/8="},
362 {{"\337\377", 2}, "3/8="},
363 {{"\277\377", 2}, "v/8="},
364 {{"\177\377", 2}, "f/8="},
365
366 {{"\000\000\000", 3}, "AAAA"},
367 {{"\000\000\001", 3}, "AAAB"},
368 {{"\000\000\002", 3}, "AAAC"},
369 {{"\000\000\004", 3}, "AAAE"},
370 {{"\000\000\010", 3}, "AAAI"},
371 {{"\000\000\020", 3}, "AAAQ"},
372 {{"\000\000\040", 3}, "AAAg"},
373 {{"\000\000\100", 3}, "AABA"},
374 {{"\000\000\200", 3}, "AACA"},
375 {{"\000\001\000", 3}, "AAEA"},
376 {{"\000\002\000", 3}, "AAIA"},
377 {{"\000\004\000", 3}, "AAQA"},
378 {{"\000\010\000", 3}, "AAgA"},
379 {{"\000\020\000", 3}, "ABAA"},
380 {{"\000\040\000", 3}, "ACAA"},
381 {{"\000\100\000", 3}, "AEAA"},
382 {{"\000\200\000", 3}, "AIAA"},
383 {{"\001\000\000", 3}, "AQAA"},
384 {{"\002\000\000", 3}, "AgAA"},
385 {{"\004\000\000", 3}, "BAAA"},
386 {{"\010\000\000", 3}, "CAAA"},
387 {{"\020\000\000", 3}, "EAAA"},
388 {{"\040\000\000", 3}, "IAAA"},
389 {{"\100\000\000", 3}, "QAAA"},
390 {{"\200\000\000", 3}, "gAAA"},
391
392 {{"\377\377\377", 3}, "////"},
393 {{"\377\377\376", 3}, "///+"},
394 {{"\377\377\375", 3}, "///9"},
395 {{"\377\377\373", 3}, "///7"},
396 {{"\377\377\367", 3}, "///3"},
397 {{"\377\377\357", 3}, "///v"},
398 {{"\377\377\337", 3}, "///f"},
399 {{"\377\377\277", 3}, "//+/"},
400 {{"\377\377\177", 3}, "//9/"},
401 {{"\377\376\377", 3}, "//7/"},
402 {{"\377\375\377", 3}, "//3/"},
403 {{"\377\373\377", 3}, "//v/"},
404 {{"\377\367\377", 3}, "//f/"},
405 {{"\377\357\377", 3}, "/+//"},
406 {{"\377\337\377", 3}, "/9//"},
407 {{"\377\277\377", 3}, "/7//"},
408 {{"\377\177\377", 3}, "/3//"},
409 {{"\376\377\377", 3}, "/v//"},
410 {{"\375\377\377", 3}, "/f//"},
411 {{"\373\377\377", 3}, "+///"},
412 {{"\367\377\377", 3}, "9///"},
413 {{"\357\377\377", 3}, "7///"},
414 {{"\337\377\377", 3}, "3///"},
415 {{"\277\377\377", 3}, "v///"},
416 {{"\177\377\377", 3}, "f///"},
417
418 // Random numbers: values obtained with
419 //
420 // #! /bin/bash
421 // dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random
422 // od -N $1 -t o1 /tmp/bar.random
423 // uuencode -m test < /tmp/bar.random
424 //
425 // where $1 is the number of bytes (2, 3)
426
427 {{"\243\361", 2}, "o/E="},
428 {{"\024\167", 2}, "FHc="},
429 {{"\313\252", 2}, "y6o="},
430 {{"\046\041", 2}, "JiE="},
431 {{"\145\236", 2}, "ZZ4="},
432 {{"\254\325", 2}, "rNU="},
433 {{"\061\330", 2}, "Mdg="},
434 {{"\245\032", 2}, "pRo="},
435 {{"\006\000", 2}, "BgA="},
436 {{"\375\131", 2}, "/Vk="},
437 {{"\303\210", 2}, "w4g="},
438 {{"\040\037", 2}, "IB8="},
439 {{"\261\372", 2}, "sfo="},
440 {{"\335\014", 2}, "3Qw="},
441 {{"\233\217", 2}, "m48="},
442 {{"\373\056", 2}, "+y4="},
443 {{"\247\232", 2}, "p5o="},
444 {{"\107\053", 2}, "Rys="},
445 {{"\204\077", 2}, "hD8="},
446 {{"\276\211", 2}, "vok="},
447 {{"\313\110", 2}, "y0g="},
448 {{"\363\376", 2}, "8/4="},
449 {{"\251\234", 2}, "qZw="},
450 {{"\103\262", 2}, "Q7I="},
451 {{"\142\312", 2}, "Yso="},
452 {{"\067\211", 2}, "N4k="},
453 {{"\220\001", 2}, "kAE="},
454 {{"\152\240", 2}, "aqA="},
455 {{"\367\061", 2}, "9zE="},
456 {{"\133\255", 2}, "W60="},
457 {{"\176\035", 2}, "fh0="},
458 {{"\032\231", 2}, "Gpk="},
459
460 {{"\013\007\144", 3}, "Cwdk"},
461 {{"\030\112\106", 3}, "GEpG"},
462 {{"\047\325\046", 3}, "J9Um"},
463 {{"\310\160\022", 3}, "yHAS"},
464 {{"\131\100\237", 3}, "WUCf"},
465 {{"\064\342\134", 3}, "NOJc"},
466 {{"\010\177\004", 3}, "CH8E"},
467 {{"\345\147\205", 3}, "5WeF"},
468 {{"\300\343\360", 3}, "wOPw"},
469 {{"\061\240\201", 3}, "MaCB"},
470 {{"\225\333\044", 3}, "ldsk"},
471 {{"\215\137\352", 3}, "jV/q"},
472 {{"\371\147\160", 3}, "+Wdw"},
473 {{"\030\320\051", 3}, "GNAp"},
474 {{"\044\174\241", 3}, "JHyh"},
475 {{"\260\127\037", 3}, "sFcf"},
476 {{"\111\045\033", 3}, "SSUb"},
477 {{"\202\114\107", 3}, "gkxH"},
478 {{"\057\371\042", 3}, "L/ki"},
479 {{"\223\247\244", 3}, "k6ek"},
480 {{"\047\216\144", 3}, "J45k"},
481 {{"\203\070\327", 3}, "gzjX"},
482 {{"\247\140\072", 3}, "p2A6"},
483 {{"\124\115\116", 3}, "VE1O"},
484 {{"\157\162\050", 3}, "b3Io"},
485 {{"\357\223\004", 3}, "75ME"},
486 {{"\052\117\156", 3}, "Kk9u"},
487 {{"\347\154\000", 3}, "52wA"},
488 {{"\303\012\142", 3}, "wwpi"},
489 {{"\060\035\362", 3}, "MB3y"},
490 {{"\130\226\361", 3}, "WJbx"},
491 {{"\173\013\071", 3}, "ews5"},
492 {{"\336\004\027", 3}, "3gQX"},
493 {{"\357\366\234", 3}, "7/ac"},
494 {{"\353\304\111", 3}, "68RJ"},
495 {{"\024\264\131", 3}, "FLRZ"},
496 {{"\075\114\251", 3}, "PUyp"},
497 {{"\315\031\225", 3}, "zRmV"},
498 {{"\154\201\276", 3}, "bIG+"},
499 {{"\200\066\072", 3}, "gDY6"},
500 {{"\142\350\267", 3}, "Yui3"},
501 {{"\033\000\166", 3}, "GwB2"},
502 {{"\210\055\077", 3}, "iC0/"},
503 {{"\341\037\124", 3}, "4R9U"},
504 {{"\161\103\152", 3}, "cUNq"},
505 {{"\270\142\131", 3}, "uGJZ"},
506 {{"\337\076\074", 3}, "3z48"},
507 {{"\375\106\362", 3}, "/Uby"},
508 {{"\227\301\127", 3}, "l8FX"},
509 {{"\340\002\234", 3}, "4AKc"},
510 {{"\121\064\033", 3}, "UTQb"},
511 {{"\157\134\143", 3}, "b1xj"},
512 {{"\247\055\327", 3}, "py3X"},
513 {{"\340\142\005", 3}, "4GIF"},
514 {{"\060\260\143", 3}, "MLBj"},
515 {{"\075\203\170", 3}, "PYN4"},
516 {{"\143\160\016", 3}, "Y3AO"},
517 {{"\313\013\063", 3}, "ywsz"},
518 {{"\174\236\135", 3}, "fJ5d"},
519 {{"\103\047\026", 3}, "QycW"},
520 {{"\365\005\343", 3}, "9QXj"},
521 {{"\271\160\223", 3}, "uXCT"},
522 {{"\362\255\172", 3}, "8q16"},
523 {{"\113\012\015", 3}, "SwoN"},
524
525 // various lengths, generated by this python script:
526 //
527 // from std::string import lowercase as lc
528 // for i in range(27):
529 // print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i),
530 // lc[:i].encode('base64').strip())
531
532 {{"", 0}, {"", 0}},
533 {"a", "YQ=="},
534 {"ab", "YWI="},
535 {"abc", "YWJj"},
536 {"abcd", "YWJjZA=="},
537 {"abcde", "YWJjZGU="},
538 {"abcdef", "YWJjZGVm"},
539 {"abcdefg", "YWJjZGVmZw=="},
540 {"abcdefgh", "YWJjZGVmZ2g="},
541 {"abcdefghi", "YWJjZGVmZ2hp"},
542 {"abcdefghij", "YWJjZGVmZ2hpag=="},
543 {"abcdefghijk", "YWJjZGVmZ2hpams="},
544 {"abcdefghijkl", "YWJjZGVmZ2hpamts"},
545 {"abcdefghijklm", "YWJjZGVmZ2hpamtsbQ=="},
546 {"abcdefghijklmn", "YWJjZGVmZ2hpamtsbW4="},
547 {"abcdefghijklmno", "YWJjZGVmZ2hpamtsbW5v"},
548 {"abcdefghijklmnop", "YWJjZGVmZ2hpamtsbW5vcA=="},
549 {"abcdefghijklmnopq", "YWJjZGVmZ2hpamtsbW5vcHE="},
550 {"abcdefghijklmnopqr", "YWJjZGVmZ2hpamtsbW5vcHFy"},
551 {"abcdefghijklmnopqrs", "YWJjZGVmZ2hpamtsbW5vcHFycw=="},
552 {"abcdefghijklmnopqrst", "YWJjZGVmZ2hpamtsbW5vcHFyc3Q="},
553 {"abcdefghijklmnopqrstu", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1"},
554 {"abcdefghijklmnopqrstuv", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg=="},
555 {"abcdefghijklmnopqrstuvw", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc="},
556 {"abcdefghijklmnopqrstuvwx", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4"},
557 {"abcdefghijklmnopqrstuvwxy", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ=="},
558 {"abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="},
559 };
560
561 template <typename StringType>
TestEscapeAndUnescape()562 void TestEscapeAndUnescape() {
563 // Check the short strings; this tests the math (and boundaries)
564 for (const auto& tc : base64_tests) {
565 // Test plain base64.
566 StringType encoded("this junk should be ignored");
567 absl::Base64Escape(tc.plaintext, &encoded);
568 EXPECT_EQ(encoded, tc.cyphertext);
569 EXPECT_EQ(absl::Base64Escape(tc.plaintext), tc.cyphertext);
570
571 StringType decoded("this junk should be ignored");
572 EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded));
573 EXPECT_EQ(decoded, tc.plaintext);
574
575 StringType websafe_with_padding(tc.cyphertext);
576 for (unsigned int c = 0; c < websafe_with_padding.size(); ++c) {
577 if ('+' == websafe_with_padding[c]) websafe_with_padding[c] = '-';
578 if ('/' == websafe_with_padding[c]) websafe_with_padding[c] = '_';
579 // Intentionally keeping padding aka '='.
580 }
581
582 // Test plain websafe (aka without padding).
583 StringType websafe(websafe_with_padding);
584 for (unsigned int c = 0; c < websafe.size(); ++c) {
585 if ('=' == websafe[c]) {
586 websafe.resize(c);
587 break;
588 }
589 }
590 encoded = "this junk should be ignored";
591 absl::WebSafeBase64Escape(tc.plaintext, &encoded);
592 EXPECT_EQ(encoded, websafe);
593 EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe);
594
595 decoded = "this junk should be ignored";
596 EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded));
597 EXPECT_EQ(decoded, tc.plaintext);
598 }
599
600 // Now try the long strings, this tests the streaming
601 for (const auto& tc : absl::strings_internal::base64_strings()) {
602 StringType buffer;
603 absl::WebSafeBase64Escape(tc.plaintext, &buffer);
604 EXPECT_EQ(tc.cyphertext, buffer);
605 EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), tc.cyphertext);
606 }
607
608 // Verify the behavior when decoding bad data
609 {
610 absl::string_view data_set[] = {"ab-/", absl::string_view("\0bcd", 4),
611 absl::string_view("abc.\0", 5)};
612 for (absl::string_view bad_data : data_set) {
613 StringType buf;
614 EXPECT_FALSE(absl::Base64Unescape(bad_data, &buf));
615 EXPECT_FALSE(absl::WebSafeBase64Unescape(bad_data, &buf));
616 EXPECT_TRUE(buf.empty());
617 }
618 }
619 }
620
TEST(Base64,EscapeAndUnescape)621 TEST(Base64, EscapeAndUnescape) {
622 TestEscapeAndUnescape<std::string>();
623 }
624
TEST(Base64,Padding)625 TEST(Base64, Padding) {
626 // Padding is optional.
627 // '.' is an acceptable padding character, just like '='.
628 std::initializer_list<absl::string_view> good_padding = {
629 "YQ",
630 "YQ==",
631 "YQ=.",
632 "YQ.=",
633 "YQ..",
634 };
635 for (absl::string_view b64 : good_padding) {
636 std::string decoded;
637 EXPECT_TRUE(absl::Base64Unescape(b64, &decoded));
638 EXPECT_EQ(decoded, "a");
639 std::string websafe_decoded;
640 EXPECT_TRUE(absl::WebSafeBase64Unescape(b64, &websafe_decoded));
641 EXPECT_EQ(websafe_decoded, "a");
642 }
643 std::initializer_list<absl::string_view> bad_padding = {
644 "YQ=",
645 "YQ.",
646 "YQ===",
647 "YQ==.",
648 "YQ=.=",
649 "YQ=..",
650 "YQ.==",
651 "YQ.=.",
652 "YQ..=",
653 "YQ...",
654 "YQ====",
655 "YQ....",
656 "YQ=====",
657 "YQ.....",
658 };
659 for (absl::string_view b64 : bad_padding) {
660 std::string decoded;
661 EXPECT_FALSE(absl::Base64Unescape(b64, &decoded));
662 std::string websafe_decoded;
663 EXPECT_FALSE(absl::WebSafeBase64Unescape(b64, &websafe_decoded));
664 }
665 }
666
TEST(Base64,DISABLED_HugeData)667 TEST(Base64, DISABLED_HugeData) {
668 const size_t kSize = size_t(3) * 1000 * 1000 * 1000;
669 static_assert(kSize % 3 == 0, "kSize must be divisible by 3");
670 const std::string huge(kSize, 'x');
671
672 std::string escaped;
673 absl::Base64Escape(huge, &escaped);
674
675 // Generates the string that should match a base64 encoded "xxx..." string.
676 // "xxx" in base64 is "eHh4".
677 std::string expected_encoding;
678 expected_encoding.reserve(kSize / 3 * 4);
679 for (size_t i = 0; i < kSize / 3; ++i) {
680 expected_encoding.append("eHh4");
681 }
682 EXPECT_EQ(expected_encoding, escaped);
683
684 std::string unescaped;
685 EXPECT_TRUE(absl::Base64Unescape(escaped, &unescaped));
686 EXPECT_EQ(huge, unescaped);
687 }
688
TEST(HexAndBack,HexStringToBytes_and_BytesToHexString)689 TEST(HexAndBack, HexStringToBytes_and_BytesToHexString) {
690 std::string hex_mixed = "0123456789abcdefABCDEF";
691 std::string bytes_expected = "\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF";
692 std::string hex_only_lower = "0123456789abcdefabcdef";
693
694 std::string bytes_result = absl::HexStringToBytes(hex_mixed);
695 EXPECT_EQ(bytes_expected, bytes_result);
696
697 std::string prefix_valid = hex_mixed + "?";
698 std::string prefix_valid_result = absl::HexStringToBytes(
699 absl::string_view(prefix_valid.data(), prefix_valid.size() - 1));
700 EXPECT_EQ(bytes_expected, prefix_valid_result);
701
702 std::string infix_valid = "?" + hex_mixed + "???";
703 std::string infix_valid_result = absl::HexStringToBytes(
704 absl::string_view(infix_valid.data() + 1, hex_mixed.size()));
705 EXPECT_EQ(bytes_expected, infix_valid_result);
706
707 std::string hex_result = absl::BytesToHexString(bytes_expected);
708 EXPECT_EQ(hex_only_lower, hex_result);
709 }
710
711 } // namespace
712