1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32
33 #include <google/protobuf/stubs/strutil.h>
34
35 #include <locale.h>
36
37 #include <google/protobuf/stubs/stl_util.h>
38 #include <google/protobuf/testing/googletest.h>
39 #include <gtest/gtest.h>
40
41 #ifdef _WIN32
42 #define snprintf _snprintf
43 #endif
44
45 namespace google {
46 namespace protobuf {
47 namespace {
48
49 // TODO(kenton): Copy strutil tests from google3?
50
TEST(StringUtilityTest,ImmuneToLocales)51 TEST(StringUtilityTest, ImmuneToLocales) {
52 // Remember the old locale.
53 char* old_locale_cstr = setlocale(LC_NUMERIC, NULL);
54 ASSERT_TRUE(old_locale_cstr != NULL);
55 string old_locale = old_locale_cstr;
56
57 // Set the locale to "C".
58 ASSERT_TRUE(setlocale(LC_NUMERIC, "C") != NULL);
59
60 EXPECT_EQ("1.5", SimpleDtoa(1.5));
61 EXPECT_EQ("1.5", SimpleFtoa(1.5));
62
63 if (setlocale(LC_NUMERIC, "es_ES") == NULL &&
64 setlocale(LC_NUMERIC, "es_ES.utf8") == NULL) {
65 // Some systems may not have the desired locale available.
66 GOOGLE_LOG(WARNING)
67 << "Couldn't set locale to es_ES. Skipping this test.";
68 } else {
69 EXPECT_EQ("1.5", SimpleDtoa(1.5));
70 EXPECT_EQ("1.5", SimpleFtoa(1.5));
71 }
72
73 // Return to original locale.
74 setlocale(LC_NUMERIC, old_locale.c_str());
75 }
76
77 #define EXPECT_EQ_ARRAY(len, x, y, msg) \
78 for (int j = 0; j < len; ++j) { \
79 EXPECT_EQ(x[j], y[j]) << "" # x << " != " # y \
80 << " byte " << j << ": " << msg; \
81 }
82
83 static struct {
84 int plain_length;
85 const char* plaintext;
86 const char* cyphertext;
87 } base64_tests[] = {
88 // Empty string.
89 { 0, "", ""},
90
91 // Basic bit patterns;
92 // values obtained with "echo -n '...' | uuencode -m test"
93
94 { 1, "\000", "AA==" },
95 { 1, "\001", "AQ==" },
96 { 1, "\002", "Ag==" },
97 { 1, "\004", "BA==" },
98 { 1, "\010", "CA==" },
99 { 1, "\020", "EA==" },
100 { 1, "\040", "IA==" },
101 { 1, "\100", "QA==" },
102 { 1, "\200", "gA==" },
103
104 { 1, "\377", "/w==" },
105 { 1, "\376", "/g==" },
106 { 1, "\375", "/Q==" },
107 { 1, "\373", "+w==" },
108 { 1, "\367", "9w==" },
109 { 1, "\357", "7w==" },
110 { 1, "\337", "3w==" },
111 { 1, "\277", "vw==" },
112 { 1, "\177", "fw==" },
113 { 2, "\000\000", "AAA=" },
114 { 2, "\000\001", "AAE=" },
115 { 2, "\000\002", "AAI=" },
116 { 2, "\000\004", "AAQ=" },
117 { 2, "\000\010", "AAg=" },
118 { 2, "\000\020", "ABA=" },
119 { 2, "\000\040", "ACA=" },
120 { 2, "\000\100", "AEA=" },
121 { 2, "\000\200", "AIA=" },
122 { 2, "\001\000", "AQA=" },
123 { 2, "\002\000", "AgA=" },
124 { 2, "\004\000", "BAA=" },
125 { 2, "\010\000", "CAA=" },
126 { 2, "\020\000", "EAA=" },
127 { 2, "\040\000", "IAA=" },
128 { 2, "\100\000", "QAA=" },
129 { 2, "\200\000", "gAA=" },
130
131 { 2, "\377\377", "//8=" },
132 { 2, "\377\376", "//4=" },
133 { 2, "\377\375", "//0=" },
134 { 2, "\377\373", "//s=" },
135 { 2, "\377\367", "//c=" },
136 { 2, "\377\357", "/+8=" },
137 { 2, "\377\337", "/98=" },
138 { 2, "\377\277", "/78=" },
139 { 2, "\377\177", "/38=" },
140 { 2, "\376\377", "/v8=" },
141 { 2, "\375\377", "/f8=" },
142 { 2, "\373\377", "+/8=" },
143 { 2, "\367\377", "9/8=" },
144 { 2, "\357\377", "7/8=" },
145 { 2, "\337\377", "3/8=" },
146 { 2, "\277\377", "v/8=" },
147 { 2, "\177\377", "f/8=" },
148
149 { 3, "\000\000\000", "AAAA" },
150 { 3, "\000\000\001", "AAAB" },
151 { 3, "\000\000\002", "AAAC" },
152 { 3, "\000\000\004", "AAAE" },
153 { 3, "\000\000\010", "AAAI" },
154 { 3, "\000\000\020", "AAAQ" },
155 { 3, "\000\000\040", "AAAg" },
156 { 3, "\000\000\100", "AABA" },
157 { 3, "\000\000\200", "AACA" },
158 { 3, "\000\001\000", "AAEA" },
159 { 3, "\000\002\000", "AAIA" },
160 { 3, "\000\004\000", "AAQA" },
161 { 3, "\000\010\000", "AAgA" },
162 { 3, "\000\020\000", "ABAA" },
163 { 3, "\000\040\000", "ACAA" },
164 { 3, "\000\100\000", "AEAA" },
165 { 3, "\000\200\000", "AIAA" },
166 { 3, "\001\000\000", "AQAA" },
167 { 3, "\002\000\000", "AgAA" },
168 { 3, "\004\000\000", "BAAA" },
169 { 3, "\010\000\000", "CAAA" },
170 { 3, "\020\000\000", "EAAA" },
171 { 3, "\040\000\000", "IAAA" },
172 { 3, "\100\000\000", "QAAA" },
173 { 3, "\200\000\000", "gAAA" },
174
175 { 3, "\377\377\377", "////" },
176 { 3, "\377\377\376", "///+" },
177 { 3, "\377\377\375", "///9" },
178 { 3, "\377\377\373", "///7" },
179 { 3, "\377\377\367", "///3" },
180 { 3, "\377\377\357", "///v" },
181 { 3, "\377\377\337", "///f" },
182 { 3, "\377\377\277", "//+/" },
183 { 3, "\377\377\177", "//9/" },
184 { 3, "\377\376\377", "//7/" },
185 { 3, "\377\375\377", "//3/" },
186 { 3, "\377\373\377", "//v/" },
187 { 3, "\377\367\377", "//f/" },
188 { 3, "\377\357\377", "/+//" },
189 { 3, "\377\337\377", "/9//" },
190 { 3, "\377\277\377", "/7//" },
191 { 3, "\377\177\377", "/3//" },
192 { 3, "\376\377\377", "/v//" },
193 { 3, "\375\377\377", "/f//" },
194 { 3, "\373\377\377", "+///" },
195 { 3, "\367\377\377", "9///" },
196 { 3, "\357\377\377", "7///" },
197 { 3, "\337\377\377", "3///" },
198 { 3, "\277\377\377", "v///" },
199 { 3, "\177\377\377", "f///" },
200
201 // Random numbers: values obtained with
202 //
203 // #! /bin/bash
204 // dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random
205 // od -N $1 -t o1 /tmp/bar.random
206 // uuencode -m test < /tmp/bar.random
207 //
208 // where $1 is the number of bytes (2, 3)
209
210 { 2, "\243\361", "o/E=" },
211 { 2, "\024\167", "FHc=" },
212 { 2, "\313\252", "y6o=" },
213 { 2, "\046\041", "JiE=" },
214 { 2, "\145\236", "ZZ4=" },
215 { 2, "\254\325", "rNU=" },
216 { 2, "\061\330", "Mdg=" },
217 { 2, "\245\032", "pRo=" },
218 { 2, "\006\000", "BgA=" },
219 { 2, "\375\131", "/Vk=" },
220 { 2, "\303\210", "w4g=" },
221 { 2, "\040\037", "IB8=" },
222 { 2, "\261\372", "sfo=" },
223 { 2, "\335\014", "3Qw=" },
224 { 2, "\233\217", "m48=" },
225 { 2, "\373\056", "+y4=" },
226 { 2, "\247\232", "p5o=" },
227 { 2, "\107\053", "Rys=" },
228 { 2, "\204\077", "hD8=" },
229 { 2, "\276\211", "vok=" },
230 { 2, "\313\110", "y0g=" },
231 { 2, "\363\376", "8/4=" },
232 { 2, "\251\234", "qZw=" },
233 { 2, "\103\262", "Q7I=" },
234 { 2, "\142\312", "Yso=" },
235 { 2, "\067\211", "N4k=" },
236 { 2, "\220\001", "kAE=" },
237 { 2, "\152\240", "aqA=" },
238 { 2, "\367\061", "9zE=" },
239 { 2, "\133\255", "W60=" },
240 { 2, "\176\035", "fh0=" },
241 { 2, "\032\231", "Gpk=" },
242
243 { 3, "\013\007\144", "Cwdk" },
244 { 3, "\030\112\106", "GEpG" },
245 { 3, "\047\325\046", "J9Um" },
246 { 3, "\310\160\022", "yHAS" },
247 { 3, "\131\100\237", "WUCf" },
248 { 3, "\064\342\134", "NOJc" },
249 { 3, "\010\177\004", "CH8E" },
250 { 3, "\345\147\205", "5WeF" },
251 { 3, "\300\343\360", "wOPw" },
252 { 3, "\061\240\201", "MaCB" },
253 { 3, "\225\333\044", "ldsk" },
254 { 3, "\215\137\352", "jV/q" },
255 { 3, "\371\147\160", "+Wdw" },
256 { 3, "\030\320\051", "GNAp" },
257 { 3, "\044\174\241", "JHyh" },
258 { 3, "\260\127\037", "sFcf" },
259 { 3, "\111\045\033", "SSUb" },
260 { 3, "\202\114\107", "gkxH" },
261 { 3, "\057\371\042", "L/ki" },
262 { 3, "\223\247\244", "k6ek" },
263 { 3, "\047\216\144", "J45k" },
264 { 3, "\203\070\327", "gzjX" },
265 { 3, "\247\140\072", "p2A6" },
266 { 3, "\124\115\116", "VE1O" },
267 { 3, "\157\162\050", "b3Io" },
268 { 3, "\357\223\004", "75ME" },
269 { 3, "\052\117\156", "Kk9u" },
270 { 3, "\347\154\000", "52wA" },
271 { 3, "\303\012\142", "wwpi" },
272 { 3, "\060\035\362", "MB3y" },
273 { 3, "\130\226\361", "WJbx" },
274 { 3, "\173\013\071", "ews5" },
275 { 3, "\336\004\027", "3gQX" },
276 { 3, "\357\366\234", "7/ac" },
277 { 3, "\353\304\111", "68RJ" },
278 { 3, "\024\264\131", "FLRZ" },
279 { 3, "\075\114\251", "PUyp" },
280 { 3, "\315\031\225", "zRmV" },
281 { 3, "\154\201\276", "bIG+" },
282 { 3, "\200\066\072", "gDY6" },
283 { 3, "\142\350\267", "Yui3" },
284 { 3, "\033\000\166", "GwB2" },
285 { 3, "\210\055\077", "iC0/" },
286 { 3, "\341\037\124", "4R9U" },
287 { 3, "\161\103\152", "cUNq" },
288 { 3, "\270\142\131", "uGJZ" },
289 { 3, "\337\076\074", "3z48" },
290 { 3, "\375\106\362", "/Uby" },
291 { 3, "\227\301\127", "l8FX" },
292 { 3, "\340\002\234", "4AKc" },
293 { 3, "\121\064\033", "UTQb" },
294 { 3, "\157\134\143", "b1xj" },
295 { 3, "\247\055\327", "py3X" },
296 { 3, "\340\142\005", "4GIF" },
297 { 3, "\060\260\143", "MLBj" },
298 { 3, "\075\203\170", "PYN4" },
299 { 3, "\143\160\016", "Y3AO" },
300 { 3, "\313\013\063", "ywsz" },
301 { 3, "\174\236\135", "fJ5d" },
302 { 3, "\103\047\026", "QycW" },
303 { 3, "\365\005\343", "9QXj" },
304 { 3, "\271\160\223", "uXCT" },
305 { 3, "\362\255\172", "8q16" },
306 { 3, "\113\012\015", "SwoN" },
307
308 // various lengths, generated by this python script:
309 //
310 // from string import lowercase as lc
311 // for i in range(27):
312 // print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i),
313 // lc[:i].encode('base64').strip())
314
315 { 0, "", "" },
316 { 1, "a", "YQ==" },
317 { 2, "ab", "YWI=" },
318 { 3, "abc", "YWJj" },
319 { 4, "abcd", "YWJjZA==" },
320 { 5, "abcde", "YWJjZGU=" },
321 { 6, "abcdef", "YWJjZGVm" },
322 { 7, "abcdefg", "YWJjZGVmZw==" },
323 { 8, "abcdefgh", "YWJjZGVmZ2g=" },
324 { 9, "abcdefghi", "YWJjZGVmZ2hp" },
325 { 10, "abcdefghij", "YWJjZGVmZ2hpag==" },
326 { 11, "abcdefghijk", "YWJjZGVmZ2hpams=" },
327 { 12, "abcdefghijkl", "YWJjZGVmZ2hpamts" },
328 { 13, "abcdefghijklm", "YWJjZGVmZ2hpamtsbQ==" },
329 { 14, "abcdefghijklmn", "YWJjZGVmZ2hpamtsbW4=" },
330 { 15, "abcdefghijklmno", "YWJjZGVmZ2hpamtsbW5v" },
331 { 16, "abcdefghijklmnop", "YWJjZGVmZ2hpamtsbW5vcA==" },
332 { 17, "abcdefghijklmnopq", "YWJjZGVmZ2hpamtsbW5vcHE=" },
333 { 18, "abcdefghijklmnopqr", "YWJjZGVmZ2hpamtsbW5vcHFy" },
334 { 19, "abcdefghijklmnopqrs", "YWJjZGVmZ2hpamtsbW5vcHFycw==" },
335 { 20, "abcdefghijklmnopqrst", "YWJjZGVmZ2hpamtsbW5vcHFyc3Q=" },
336 { 21, "abcdefghijklmnopqrstu", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1" },
337 { 22, "abcdefghijklmnopqrstuv", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg==" },
338 { 23, "abcdefghijklmnopqrstuvw", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc=" },
339 { 24, "abcdefghijklmnopqrstuvwx", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4" },
340 { 25, "abcdefghijklmnopqrstuvwxy", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ==" },
341 { 26, "abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=" },
342 };
343
344 static struct {
345 const char* plaintext;
346 const char* cyphertext;
347 } base64_strings[] = {
348 // Some google quotes
349 // Cyphertext created with "uuencode (GNU sharutils) 4.6.3"
350 // (Note that we're testing the websafe encoding, though, so if
351 // you add messages, be sure to run "tr -- '+/' '-_'" on the output)
352 { "I was always good at math and science, and I never realized "
353 "that was unusual or somehow undesirable. So one of the things "
354 "I care a lot about is helping to remove that stigma, "
355 "to show girls that you can be feminine, you can like the things "
356 "that girls like, but you can also be really good at technology. "
357 "You can be really good at building things."
358 " - Marissa Meyer, Newsweek, 2010-12-22" "\n",
359
360 "SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg"
361 "bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu"
362 "ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg"
363 "YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo"
364 "b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp"
365 "a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs"
366 "c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy"
367 "ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll"
368 "ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" },
369
370 { "Typical first year for a new cluster: "
371 "~0.5 overheating "
372 "~1 PDU failure "
373 "~1 rack-move "
374 "~1 network rewiring "
375 "~20 rack failures "
376 "~5 racks go wonky "
377 "~8 network maintenances "
378 "~12 router reloads "
379 "~3 router failures "
380 "~dozens of minor 30-second blips for dns "
381 "~1000 individual machine failures "
382 "~thousands of hard drive failures "
383 "slow disks, bad memory, misconfigured machines, flaky machines, etc."
384 " - Jeff Dean, The Joys of Real Hardware" "\n",
385
386 "VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92"
387 "ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3"
388 "b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv"
389 "bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk"
390 "cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv"
391 "bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp"
392 "bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg"
393 "ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs"
394 "YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS"
395 "ZWFsIEhhcmR3YXJlCg" },
396
397 { "I'm the head of the webspam team at Google. "
398 "That means that if you type your name into Google and get porn back, "
399 "it's my fault. Unless you're a porn star, in which case porn is a "
400 "completely reasonable response."
401 " - Matt Cutts, Google Plus" "\n",
402
403 "SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg"
404 "VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv"
405 "b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz"
406 "IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg"
407 "Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs"
408 "IEdvb2dsZSBQbHVzCg" },
409
410 { "It will still be a long time before machines approach human intelligence. "
411 "But luckily, machines don't actually have to be intelligent; "
412 "they just have to fake it. Access to a wealth of information, "
413 "combined with a rudimentary decision-making capacity, "
414 "can often be almost as useful. Of course, the results are better yet "
415 "when coupled with intelligence. A reference librarian with access to "
416 "a good search engine is a formidable tool."
417 " - Craig Silverstein, Siemens Pictures of the Future, Spring 2004" "\n",
418
419 "SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg"
420 "YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj"
421 "aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg"
422 "dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo"
423 "IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg"
424 "ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0"
425 "IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy"
426 "IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl"
427 "bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu"
428 "Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp"
429 "biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw"
430 "NAo" },
431
432 // Degenerate edge case
433 { "",
434 "" },
435 };
436
TEST(Base64,EscapeAndUnescape)437 TEST(Base64, EscapeAndUnescape) {
438 // Check the short strings; this tests the math (and boundaries)
439 for (int i = 0; i < sizeof(base64_tests) / sizeof(base64_tests[0]); ++i) {
440 char encode_buffer[100];
441 int encode_length;
442 char decode_buffer[100];
443 int decode_length;
444 int cypher_length;
445 string decode_str;
446
447 const unsigned char* unsigned_plaintext =
448 reinterpret_cast<const unsigned char*>(base64_tests[i].plaintext);
449
450 StringPiece plaintext(base64_tests[i].plaintext,
451 base64_tests[i].plain_length);
452
453 cypher_length = strlen(base64_tests[i].cyphertext);
454
455 // The basic escape function:
456 memset(encode_buffer, 0, sizeof(encode_buffer));
457 encode_length = Base64Escape(unsigned_plaintext,
458 base64_tests[i].plain_length,
459 encode_buffer,
460 sizeof(encode_buffer));
461 // Is it of the expected length?
462 EXPECT_EQ(encode_length, cypher_length);
463 // Would it have been okay to allocate only CalculateBase64EscapeLen()?
464 EXPECT_EQ(CalculateBase64EscapedLen(base64_tests[i].plain_length),
465 encode_length);
466
467 // Is it the expected encoded value?
468 ASSERT_STREQ(encode_buffer, base64_tests[i].cyphertext);
469
470 // If we encode it into a buffer of exactly the right length...
471 memset(encode_buffer, 0, sizeof(encode_buffer));
472 encode_length = Base64Escape(unsigned_plaintext,
473 base64_tests[i].plain_length,
474 encode_buffer,
475 cypher_length);
476 // Is it still of the expected length?
477 EXPECT_EQ(encode_length, cypher_length);
478
479 // And is the value still correct? (i.e., not losing the last byte)
480 EXPECT_STREQ(encode_buffer, base64_tests[i].cyphertext);
481
482 // If we decode it back:
483 decode_str.clear();
484 EXPECT_TRUE(Base64Unescape(
485 StringPiece(encode_buffer, cypher_length), &decode_str));
486
487 // Is it of the expected length?
488 EXPECT_EQ(base64_tests[i].plain_length, decode_str.length());
489
490 // Is it the expected decoded value?
491 EXPECT_EQ(plaintext, decode_str);
492
493 // Let's try with a pre-populated string.
494 string encoded("this junk should be ignored");
495 Base64Escape(string(base64_tests[i].plaintext,
496 base64_tests[i].plain_length),
497 &encoded);
498 EXPECT_EQ(encoded, string(encode_buffer, cypher_length));
499
500 string decoded("this junk should be ignored");
501 EXPECT_TRUE(Base64Unescape(
502 StringPiece(encode_buffer, cypher_length), &decoded));
503 EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
504 EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
505
506 // Our decoder treats the padding '=' characters at the end as
507 // optional (but if there are any, there must be the correct
508 // number of them.) If encode_buffer has any, run some additional
509 // tests that fiddle with them.
510 char* first_equals = strchr(encode_buffer, '=');
511 if (first_equals) {
512 // How many equals signs does the string start with?
513 int equals = (*(first_equals+1) == '=') ? 2 : 1;
514
515 // Try chopping off the equals sign(s) entirely. The decoder
516 // should still be okay with this.
517 string decoded2("this junk should also be ignored");
518 *first_equals = '\0';
519 EXPECT_TRUE(Base64Unescape(
520 StringPiece(encode_buffer, first_equals - encode_buffer), &decoded2));
521 EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
522 EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
523
524 // Now test chopping off the equals sign(s) and adding
525 // whitespace. Our decoder should still accept this.
526 decoded2.assign("this junk should be ignored");
527 *first_equals = ' ';
528 *(first_equals+1) = '\0';
529 EXPECT_TRUE(Base64Unescape(
530 StringPiece(encode_buffer, first_equals - encode_buffer + 1),
531 &decoded2));
532 EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
533 EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
534
535 // Now stick a bad character at the end of the string. The decoder
536 // should refuse this string.
537 decoded2.assign("this junk should be ignored");
538 *first_equals = '?';
539 *(first_equals+1) = '\0';
540 EXPECT_TRUE(
541 !Base64Unescape(
542 StringPiece(encode_buffer, first_equals - encode_buffer + 1),
543 &decoded2));
544
545 int len;
546
547 // Test whitespace mixed with the padding. (eg "AA = = ") The
548 // decoder should accept this.
549 if (equals == 2) {
550 snprintf(first_equals, 6, " = = ");
551 len = first_equals - encode_buffer + 5;
552 } else {
553 snprintf(first_equals, 6, " = ");
554 len = first_equals - encode_buffer + 3;
555 }
556 decoded2.assign("this junk should be ignored");
557 EXPECT_TRUE(
558 Base64Unescape(StringPiece(encode_buffer, len), &decoded2));
559 EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
560 EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
561
562 // Test whitespace mixed with the padding, but with the wrong
563 // number of equals signs (eg "AA = "). The decoder should
564 // refuse these strings.
565 if (equals == 1) {
566 snprintf(first_equals, 6, " = = ");
567 len = first_equals - encode_buffer + 5;
568 } else {
569 snprintf(first_equals, 6, " = ");
570 len = first_equals - encode_buffer + 3;
571 }
572 EXPECT_TRUE(
573 !Base64Unescape(StringPiece(encode_buffer, len), &decoded2));
574 }
575
576 // Cool! the basic Base64 encoder/decoder works.
577 // Let's try the alternate alphabet: tr -- '+/' '-_'
578
579 char websafe[100];
580 memset(websafe, 0, sizeof(websafe));
581 strncpy(websafe, base64_tests[i].cyphertext, cypher_length);
582 for (int c = 0; c < sizeof(websafe); ++c) {
583 if ('+' == websafe[c]) { websafe[c] = '-'; }
584 if ('/' == websafe[c]) { websafe[c] = '_'; }
585 }
586
587 // The websafe escape function:
588 memset(encode_buffer, 0, sizeof(encode_buffer));
589 encode_length = WebSafeBase64Escape(unsigned_plaintext,
590 base64_tests[i].plain_length,
591 encode_buffer,
592 sizeof(encode_buffer),
593 true);
594 // Is it of the expected length?
595 EXPECT_EQ(encode_length, cypher_length);
596 EXPECT_EQ(
597 CalculateBase64EscapedLen(base64_tests[i].plain_length, true),
598 encode_length);
599
600 // Is it the expected encoded value?
601 EXPECT_STREQ(encode_buffer, websafe);
602
603 // If we encode it into a buffer of exactly the right length...
604 memset(encode_buffer, 0, sizeof(encode_buffer));
605 encode_length = WebSafeBase64Escape(unsigned_plaintext,
606 base64_tests[i].plain_length,
607 encode_buffer,
608 cypher_length,
609 true);
610 // Is it still of the expected length?
611 EXPECT_EQ(encode_length, cypher_length);
612
613 // And is the value still correct? (i.e., not losing the last byte)
614 EXPECT_STREQ(encode_buffer, websafe);
615
616 // Let's try the string version of the encoder
617 encoded = "this junk should be ignored";
618 WebSafeBase64Escape(
619 unsigned_plaintext, base64_tests[i].plain_length,
620 &encoded, true);
621 EXPECT_EQ(encoded.size(), cypher_length);
622 EXPECT_STREQ(encoded.c_str(), websafe);
623
624 // If we decode it back:
625 memset(decode_buffer, 0, sizeof(decode_buffer));
626 decode_length = WebSafeBase64Unescape(encode_buffer,
627 cypher_length,
628 decode_buffer,
629 sizeof(decode_buffer));
630
631 // Is it of the expected length?
632 EXPECT_EQ(decode_length, base64_tests[i].plain_length);
633
634 // Is it the expected decoded value?
635 EXPECT_EQ(0,
636 memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
637
638 // If we decode it into a buffer of exactly the right length...
639 memset(decode_buffer, 0, sizeof(decode_buffer));
640 decode_length = WebSafeBase64Unescape(encode_buffer,
641 cypher_length,
642 decode_buffer,
643 decode_length);
644
645 // Is it still of the expected length?
646 EXPECT_EQ(decode_length, base64_tests[i].plain_length);
647
648 // And is it the expected decoded value?
649 EXPECT_EQ(0,
650 memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
651
652 // Try using '.' for the pad character.
653 for (int c = cypher_length - 1; c >= 0 && '=' == encode_buffer[c]; --c) {
654 encode_buffer[c] = '.';
655 }
656
657 // If we decode it back:
658 memset(decode_buffer, 0, sizeof(decode_buffer));
659 decode_length = WebSafeBase64Unescape(encode_buffer,
660 cypher_length,
661 decode_buffer,
662 sizeof(decode_buffer));
663
664 // Is it of the expected length?
665 EXPECT_EQ(decode_length, base64_tests[i].plain_length);
666
667 // Is it the expected decoded value?
668 EXPECT_EQ(0,
669 memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
670
671 // If we decode it into a buffer of exactly the right length...
672 memset(decode_buffer, 0, sizeof(decode_buffer));
673 decode_length = WebSafeBase64Unescape(encode_buffer,
674 cypher_length,
675 decode_buffer,
676 decode_length);
677
678 // Is it still of the expected length?
679 EXPECT_EQ(decode_length, base64_tests[i].plain_length);
680
681 // And is it the expected decoded value?
682 EXPECT_EQ(0,
683 memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
684
685 // Let's try the string version of the decoder
686 decoded = "this junk should be ignored";
687 EXPECT_TRUE(WebSafeBase64Unescape(
688 StringPiece(encode_buffer, cypher_length), &decoded));
689 EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
690 EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
691
692 // Okay! the websafe Base64 encoder/decoder works.
693 // Let's try the unpadded version
694
695 for (int c = 0; c < sizeof(websafe); ++c) {
696 if ('=' == websafe[c]) {
697 websafe[c] = '\0';
698 cypher_length = c;
699 break;
700 }
701 }
702
703 // The websafe escape function:
704 memset(encode_buffer, 0, sizeof(encode_buffer));
705 encode_length = WebSafeBase64Escape(unsigned_plaintext,
706 base64_tests[i].plain_length,
707 encode_buffer,
708 sizeof(encode_buffer),
709 false);
710 // Is it of the expected length?
711 EXPECT_EQ(encode_length, cypher_length);
712 EXPECT_EQ(
713 CalculateBase64EscapedLen(base64_tests[i].plain_length, false),
714 encode_length);
715
716 // Is it the expected encoded value?
717 EXPECT_STREQ(encode_buffer, websafe);
718
719 // If we encode it into a buffer of exactly the right length...
720 memset(encode_buffer, 0, sizeof(encode_buffer));
721 encode_length = WebSafeBase64Escape(unsigned_plaintext,
722 base64_tests[i].plain_length,
723 encode_buffer,
724 cypher_length,
725 false);
726 // Is it still of the expected length?
727 EXPECT_EQ(encode_length, cypher_length);
728
729 // And is the value still correct? (i.e., not losing the last byte)
730 EXPECT_STREQ(encode_buffer, websafe);
731
732 // Let's try the (other) string version of the encoder
733 string plain(base64_tests[i].plaintext, base64_tests[i].plain_length);
734 encoded = "this junk should be ignored";
735 WebSafeBase64Escape(plain, &encoded);
736 EXPECT_EQ(encoded.size(), cypher_length);
737 EXPECT_STREQ(encoded.c_str(), websafe);
738
739 // If we decode it back:
740 memset(decode_buffer, 0, sizeof(decode_buffer));
741 decode_length = WebSafeBase64Unescape(encode_buffer,
742 cypher_length,
743 decode_buffer,
744 sizeof(decode_buffer));
745
746 // Is it of the expected length?
747 EXPECT_EQ(decode_length, base64_tests[i].plain_length);
748
749 // Is it the expected decoded value?
750 EXPECT_EQ(0,
751 memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
752
753 // If we decode it into a buffer of exactly the right length...
754 memset(decode_buffer, 0, sizeof(decode_buffer));
755 decode_length = WebSafeBase64Unescape(encode_buffer,
756 cypher_length,
757 decode_buffer,
758 decode_length);
759
760 // Is it still of the expected length?
761 EXPECT_EQ(decode_length, base64_tests[i].plain_length);
762
763 // And is it the expected decoded value?
764 EXPECT_EQ(0,
765 memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
766
767
768 // Let's try the string version of the decoder
769 decoded = "this junk should be ignored";
770 EXPECT_TRUE(WebSafeBase64Unescape(
771 StringPiece(encode_buffer, cypher_length), &decoded));
772 EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
773 EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
774
775 // This value works. Try the next.
776 }
777
778 // Now try the long strings, this tests the streaming
779 for (int i = 0; i < sizeof(base64_strings) / sizeof(base64_strings[0]);
780 ++i) {
781 const unsigned char* unsigned_plaintext =
782 reinterpret_cast<const unsigned char*>(base64_strings[i].plaintext);
783 int plain_length = strlen(base64_strings[i].plaintext);
784 int cypher_length = strlen(base64_strings[i].cyphertext);
785 vector<char> buffer(cypher_length+1);
786 int encode_length = WebSafeBase64Escape(unsigned_plaintext,
787 plain_length,
788 &buffer[0],
789 buffer.size(),
790 false);
791 EXPECT_EQ(cypher_length, encode_length);
792 EXPECT_EQ(
793 CalculateBase64EscapedLen(plain_length, false), encode_length);
794 buffer[ encode_length ] = '\0';
795 EXPECT_STREQ(base64_strings[i].cyphertext, &buffer[0]);
796 }
797
798 // Verify the behavior when decoding bad data
799 {
800 const char* bad_data = "ab-/";
801 string buf;
802 EXPECT_FALSE(Base64Unescape(StringPiece(bad_data), &buf));
803 EXPECT_TRUE(!WebSafeBase64Unescape(bad_data, &buf));
804 EXPECT_TRUE(buf.empty());
805 }
806 }
807
808 } // anonymous namespace
809 } // namespace protobuf
810 } // namespace google
811