1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/tools/dump_cache/url_to_filename_encoder.h"
6
7 #include <string>
8 #include <vector>
9
10 #include "base/string_piece.h"
11 #include "base/string_util.h"
12 #include "base/stringprintf.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14
15 using base::StringPiece;
16 using std::string;
17
18 namespace net {
19
20 #ifdef WIN32
21 char kDirSeparator = '\\';
22 char kOtherDirSeparator = '/';
23 #else
24 char kDirSeparator = '/';
25 char kOtherDirSeparator = '\\';
26 #endif
27
28 class UrlToFilenameEncoderTest : public ::testing::Test {
29 protected:
UrlToFilenameEncoderTest()30 UrlToFilenameEncoderTest() : escape_(1, UrlToFilenameEncoder::kEscapeChar),
31 dir_sep_(1, kDirSeparator) {
32 }
33
CheckSegmentLength(const StringPiece & escaped_word)34 void CheckSegmentLength(const StringPiece& escaped_word) {
35 std::vector<StringPiece> components;
36 Tokenize(escaped_word, StringPiece("/"), &components);
37 for (size_t i = 0; i < components.size(); ++i) {
38 EXPECT_GE(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
39 components[i].size());
40 }
41 }
42
CheckValidChars(const StringPiece & escaped_word,char invalid_slash)43 void CheckValidChars(const StringPiece& escaped_word, char invalid_slash) {
44 // These characters are invalid in Windows. We add in ', as that's pretty
45 // inconvenient in a Unix filename.
46 //
47 // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx
48 const string kInvalidChars = "<>:\"|?*'";
49 for (size_t i = 0; i < escaped_word.size(); ++i) {
50 char c = escaped_word[i];
51 EXPECT_EQ(string::npos, kInvalidChars.find(c));
52 EXPECT_NE(invalid_slash, c);
53 EXPECT_NE('\0', c); // only invalid character in Posix
54 EXPECT_GT(0x7E, c); // only English printable characters
55 }
56 }
57
Validate(const string & in_word,const string & gold_word)58 void Validate(const string& in_word, const string& gold_word) {
59 string escaped_word, url;
60 UrlToFilenameEncoder::EncodeSegment("", in_word, '/', &escaped_word);
61 EXPECT_EQ(gold_word, escaped_word);
62 CheckSegmentLength(escaped_word);
63 CheckValidChars(escaped_word, '\\');
64 UrlToFilenameEncoder::Decode(escaped_word, '/', &url);
65 EXPECT_EQ(in_word, url);
66 }
67
ValidateAllSegmentsSmall(const string & in_word)68 void ValidateAllSegmentsSmall(const string& in_word) {
69 string escaped_word, url;
70 UrlToFilenameEncoder::EncodeSegment("", in_word, '/', &escaped_word);
71 CheckSegmentLength(escaped_word);
72 CheckValidChars(escaped_word, '\\');
73 UrlToFilenameEncoder::Decode(escaped_word, '/', &url);
74 EXPECT_EQ(in_word, url);
75 }
76
ValidateNoChange(const string & word)77 void ValidateNoChange(const string& word) {
78 // We always suffix the leaf with kEscapeChar, unless the leaf is empty.
79 Validate(word, word + escape_);
80 }
81
ValidateEscaped(unsigned char ch)82 void ValidateEscaped(unsigned char ch) {
83 // We always suffix the leaf with kEscapeChar, unless the leaf is empty.
84 char escaped[100];
85 const char escape = UrlToFilenameEncoder::kEscapeChar;
86 base::snprintf(escaped, sizeof(escaped), "%c%02X%c", escape, ch, escape);
87 Validate(string(1, ch), escaped);
88 }
89
ValidateUrl(const string & url,const string & base_path,bool legacy_escape,const string & gold_filename)90 void ValidateUrl(const string& url, const string& base_path,
91 bool legacy_escape, const string& gold_filename) {
92 string encoded_filename = UrlToFilenameEncoder::Encode(
93 url, base_path, legacy_escape);
94 EXPECT_EQ(gold_filename, encoded_filename);
95 if (!legacy_escape) {
96 CheckSegmentLength(encoded_filename);
97 CheckValidChars(encoded_filename, kOtherDirSeparator);
98 string decoded_url;
99 UrlToFilenameEncoder::Decode(encoded_filename, kDirSeparator,
100 &decoded_url);
101 if (url != decoded_url) {
102 EXPECT_EQ(url, "http://" + decoded_url);
103 }
104 }
105 }
106
ValidateUrlOldNew(const string & url,const string & gold_old_filename,const string & gold_new_filename)107 void ValidateUrlOldNew(const string& url, const string& gold_old_filename,
108 const string& gold_new_filename) {
109 ValidateUrl(url, "", true, gold_old_filename);
110 ValidateUrl(url, "", false, gold_new_filename);
111 }
112
ValidateEncodeSame(const string & url1,const string & url2)113 void ValidateEncodeSame(const string& url1, const string& url2) {
114 string filename1 = UrlToFilenameEncoder::Encode(url1, "", false);
115 string filename2 = UrlToFilenameEncoder::Encode(url2, "", false);
116 EXPECT_EQ(filename1, filename2);
117 }
118
119 string escape_;
120 string dir_sep_;
121 };
122
TEST_F(UrlToFilenameEncoderTest,DoesNotEscape)123 TEST_F(UrlToFilenameEncoderTest, DoesNotEscape) {
124 ValidateNoChange("");
125 ValidateNoChange("abcdefg");
126 ValidateNoChange("abcdefghijklmnopqrstuvwxyz");
127 ValidateNoChange("ZYXWVUT");
128 ValidateNoChange("ZYXWVUTSRQPONMLKJIHGFEDCBA");
129 ValidateNoChange("01234567689");
130 ValidateNoChange("_.=+-");
131 ValidateNoChange("abcdefghijklmnopqrstuvwxyzZYXWVUTSRQPONMLKJIHGFEDCBA"
132 "01234567689_.=+-");
133 ValidateNoChange("index.html");
134 ValidateNoChange("/");
135 ValidateNoChange("/.");
136 ValidateNoChange(".");
137 ValidateNoChange("..");
138 }
139
TEST_F(UrlToFilenameEncoderTest,Escapes)140 TEST_F(UrlToFilenameEncoderTest, Escapes) {
141 const string bad_chars =
142 "<>:\"\\|?*" // Illegal on Windows
143 "~`!$^&(){}[]';" // Bad for Unix shells
144 "^@" // Build tool doesn't like
145 "#%" // Tool doesn't like
146 ","; // The escape char has to be escaped
147
148 for (size_t i = 0; i < bad_chars.size(); ++i) {
149 ValidateEscaped(bad_chars[i]);
150 }
151
152 // Check non-printable characters.
153 ValidateEscaped('\0');
154 for (size_t i = 127; i < 256; ++i) {
155 ValidateEscaped(static_cast<char>(i));
156 }
157 }
158
TEST_F(UrlToFilenameEncoderTest,DoesEscapeCorrectly)159 TEST_F(UrlToFilenameEncoderTest, DoesEscapeCorrectly) {
160 Validate("mysite.com&x", "mysite.com" + escape_ + "26x" + escape_);
161 Validate("/./", "/" + escape_ + "./" + escape_);
162 Validate("/../", "/" + escape_ + "../" + escape_);
163 Validate("//", "/" + escape_ + "2F" + escape_);
164 Validate("/./leaf", "/" + escape_ + "./leaf" + escape_);
165 Validate("/../leaf", "/" + escape_ + "../leaf" + escape_);
166 Validate("//leaf", "/" + escape_ + "2Fleaf" + escape_);
167 Validate("mysite/u?param1=x¶m2=y",
168 "mysite/u" + escape_ + "3Fparam1=x" + escape_ + "26param2=y" +
169 escape_);
170 Validate("search?q=dogs&go=&form=QBLH&qs=n", // from Latency Labs bing test.
171 "search" + escape_ + "3Fq=dogs" + escape_ + "26go=" + escape_ +
172 "26form=QBLH" + escape_ + "26qs=n" + escape_);
173 Validate("~joebob/my_neeto-website+with_stuff.asp?id=138&content=true",
174 "" + escape_ + "7Ejoebob/my_neeto-website+with_stuff.asp" + escape_ +
175 "3Fid=138" + escape_ + "26content=true" + escape_);
176 }
177
TEST_F(UrlToFilenameEncoderTest,EncodeUrlCorrectly)178 TEST_F(UrlToFilenameEncoderTest, EncodeUrlCorrectly) {
179 ValidateUrlOldNew("http://www.google.com/index.html",
180 "www.google.com" + dir_sep_ + "indexx2Ehtml",
181 "www.google.com" + dir_sep_ + "index.html" + escape_);
182 ValidateUrlOldNew("http://www.google.com/x/search?hl=en&q=dogs&oq=",
183 "www.google.com" + dir_sep_ + "x" + dir_sep_ +
184 "searchx3Fhlx3Denx26qx3Ddogsx26oqx3D",
185
186 "www.google.com" + dir_sep_ + "x" + dir_sep_ + "search" +
187 escape_ + "3Fhl=en" + escape_ + "26q=dogs" + escape_ +
188 "26oq=" + escape_);
189 ValidateUrlOldNew("http://www.foo.com/a//",
190 "www.foo.com" + dir_sep_ + "ax255Cx255Cindexx2Ehtml",
191 "www.foo.com" + dir_sep_ + "a" + dir_sep_ + escape_ + "2F" +
192 escape_);
193
194 // From bug: Double slash preserved.
195 ValidateUrl("http://www.foo.com/u?site=http://www.google.com/index.html",
196 "", false,
197 "www.foo.com" + dir_sep_ + "u" + escape_ + "3Fsite=http" +
198 escape_ + "3A" + dir_sep_ + escape_ + "2Fwww.google.com" +
199 dir_sep_ + "index.html" + escape_);
200 ValidateUrlOldNew(
201 "http://blogutils.net/olct/online.php?"
202 "site=http://thelwordfanfics.blogspot.&interval=600",
203
204 "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "onlinex2Ephpx3F"
205 "sitex3Dhttpx3Ax255Cx255Cthelwordfanficsx2Eblogspotx2Ex26intervalx3D600",
206
207 "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "online.php" + escape_ +
208 "3Fsite=http" + escape_ + "3A" + dir_sep_ + escape_ +
209 "2Fthelwordfanfics.blogspot." + escape_ + "26interval=600" + escape_);
210 }
211
212 // From bug: Escapes treated the same as normal char.
TEST_F(UrlToFilenameEncoderTest,UnescapeUrlsBeforeEncode)213 TEST_F(UrlToFilenameEncoderTest, UnescapeUrlsBeforeEncode) {
214 for (int i = 0; i < 128; ++i) {
215 string unescaped(1, static_cast<char>(i));
216 string escaped = base::StringPrintf("%%%02X", i);
217 ValidateEncodeSame(unescaped, escaped);
218 }
219
220 ValidateEncodeSame(
221 "http://www.blogger.com/navbar.g?bName=God!&Mode=FOO&searchRoot"
222 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch",
223
224 "http://www.blogger.com/navbar.g?bName=God%21&Mode=FOO&searchRoot"
225 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch");
226 }
227
228 // From bug: Filename encoding is not prefix-free.
TEST_F(UrlToFilenameEncoderTest,EscapeSecondSlash)229 TEST_F(UrlToFilenameEncoderTest, EscapeSecondSlash) {
230 Validate("/", "/" + escape_);
231 Validate("//", "/" + escape_ + "2F" + escape_);
232 Validate("///", "/" + escape_ + "2F" + "/" + escape_);
233 }
234
TEST_F(UrlToFilenameEncoderTest,LongTail)235 TEST_F(UrlToFilenameEncoderTest, LongTail) {
236 static char long_word[] =
237 "~joebob/briggs/12345678901234567890123456789012345678901234567890"
238 "1234567890123456789012345678901234567890123456789012345678901234567890"
239 "1234567890123456789012345678901234567890123456789012345678901234567890"
240 "1234567890123456789012345678901234567890123456789012345678901234567890"
241 "1234567890123456789012345678901234567890123456789012345678901234567890"
242 "1234567890123456789012345678901234567890123456789012345678901234567890";
243
244 // the long lines in the string below are 64 characters, so we can see
245 // the slashes every 128.
246 string gold_long_word =
247 escape_ + "7Ejoebob/briggs/"
248 "1234567890123456789012345678901234567890123456789012345678901234"
249 "56789012345678901234567890123456789012345678901234567890123456" +
250 escape_ + "-/"
251 "7890123456789012345678901234567890123456789012345678901234567890"
252 "12345678901234567890123456789012345678901234567890123456789012" +
253 escape_ + "-/"
254 "3456789012345678901234567890123456789012345678901234567890123456"
255 "78901234567890123456789012345678901234567890123456789012345678" +
256 escape_ + "-/"
257 "9012345678901234567890" + escape_;
258 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
259 sizeof(long_word));
260 Validate(long_word, gold_long_word);
261 }
262
TEST_F(UrlToFilenameEncoderTest,LongTailQuestion)263 TEST_F(UrlToFilenameEncoderTest, LongTailQuestion) {
264 // Here the '?' in the last path segment expands to @3F, making
265 // it hit 128 chars before the input segment gets that big.
266 static char long_word[] =
267 "~joebob/briggs/1234567?1234567?1234567?1234567?1234567?"
268 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
269 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
270 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
271 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
272 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?";
273
274 // Notice that at the end of the third segment, we avoid splitting
275 // the (escape_ + "3F") that was generated from the "?", so that segment is
276 // only 127 characters.
277 string pattern = "1234567" + escape_ + "3F"; // 10 characters
278 string gold_long_word =
279 escape_ + "7Ejoebob/briggs/" +
280 pattern + pattern + pattern + pattern + pattern + pattern + "1234"
281 "567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern +
282 "123456" + escape_ + "-/"
283 "7" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern +
284 pattern + pattern + pattern + pattern + pattern + pattern + pattern +
285 "12" +
286 escape_ + "-/"
287 "34567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern
288 + "1234567" + escape_ + "3F" + pattern + pattern + pattern + pattern
289 + pattern + "1234567" +
290 escape_ + "-/" +
291 escape_ + "3F" + pattern + pattern + escape_;
292 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
293 sizeof(long_word));
294 Validate(long_word, gold_long_word);
295 }
296
TEST_F(UrlToFilenameEncoderTest,CornerCasesNearMaxLenNoEscape)297 TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenNoEscape) {
298 // hit corner cases, +/- 4 characters from kMaxLen
299 for (int i = -4; i <= 4; ++i) {
300 string input;
301 input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength, 'x');
302 ValidateAllSegmentsSmall(input);
303 }
304 }
305
TEST_F(UrlToFilenameEncoderTest,CornerCasesNearMaxLenWithEscape)306 TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenWithEscape) {
307 // hit corner cases, +/- 4 characters from kMaxLen. This time we
308 // leave off the last 'x' and put in a '.', which ensures that we
309 // are truncating with '/' *after* the expansion.
310 for (int i = -4; i <= 4; ++i) {
311 string input;
312 input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength - 1, 'x');
313 input.append(1, '.'); // this will expand to 3 characters.
314 ValidateAllSegmentsSmall(input);
315 }
316 }
317
TEST_F(UrlToFilenameEncoderTest,LeafBranchAlias)318 TEST_F(UrlToFilenameEncoderTest, LeafBranchAlias) {
319 Validate("/a/b/c", "/a/b/c" + escape_); // c is leaf file "c,"
320 Validate("/a/b/c/d", "/a/b/c/d" + escape_); // c is directory "c"
321 Validate("/a/b/c/d/", "/a/b/c/d/" + escape_);
322 }
323
324
TEST_F(UrlToFilenameEncoderTest,BackslashSeparator)325 TEST_F(UrlToFilenameEncoderTest, BackslashSeparator) {
326 string long_word;
327 string escaped_word;
328 long_word.append(UrlToFilenameEncoder::kMaximumSubdirectoryLength + 1, 'x');
329 UrlToFilenameEncoder::EncodeSegment("", long_word, '\\', &escaped_word);
330
331 // check that one backslash, plus the escape ",-", and the ending , got added.
332 EXPECT_EQ(long_word.size() + 4, escaped_word.size());
333 ASSERT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
334 escaped_word.size());
335 // Check that the backslash got inserted at the correct spot.
336 EXPECT_EQ('\\', escaped_word[
337 UrlToFilenameEncoder::kMaximumSubdirectoryLength]);
338 }
339
340 } // namespace net
341
342