• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 #include <string>
5 
6 #include "base/file_util.h"
7 #include "base/memory/scoped_temp_dir.h"
8 #include "chrome/browser/net/url_request_mock_http_job.h"
9 #include "chrome/browser/download/save_package.h"
10 #include "chrome/common/pref_names.h"
11 #include "chrome/test/automation/browser_proxy.h"
12 #include "chrome/test/automation/tab_proxy.h"
13 #include "chrome/test/ui/ui_test.h"
14 #include "chrome/test/ui_test_utils.h"
15 
16 static const FilePath::CharType* kTestDir = FILE_PATH_LITERAL("encoding_tests");
17 
18 class BrowserEncodingTest : public UITest {
19  protected:
BrowserEncodingTest()20   BrowserEncodingTest() : UITest() {}
21 
22   // Make sure the content of the page are as expected
23   // after override or auto-detect
CheckFile(const FilePath & generated_file,const FilePath & expected_result_file,bool check_equal)24   void CheckFile(const FilePath& generated_file,
25                  const FilePath& expected_result_file,
26                  bool check_equal) {
27     FilePath expected_result_filepath = ui_test_utils::GetTestFilePath(
28         FilePath(kTestDir), expected_result_file);
29 
30     ASSERT_TRUE(file_util::PathExists(expected_result_filepath));
31     WaitForGeneratedFileAndCheck(generated_file,
32                                  expected_result_filepath,
33                                  true,  // We do care whether they are equal.
34                                  check_equal,
35                                  true);  // Delete the generated file when done.
36   }
37 
SetUp()38   virtual void SetUp() {
39     UITest::SetUp();
40     ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
41     save_dir_ = temp_dir_.path();
42     temp_sub_resource_dir_ = save_dir_.AppendASCII("sub_resource_files");
43   }
44 
45   ScopedTempDir temp_dir_;
46   FilePath save_dir_;
47   FilePath temp_sub_resource_dir_;
48 };
49 
50 // TODO(jnd): 1. Some encodings are missing here. It'll be added later. See
51 // http://crbug.com/13306.
52 // 2. Add more files with multiple encoding name variants for each canonical
53 // encoding name). Webkit layout tests cover some, but testing in the UI test is
54 // also necessary.
TEST_F(BrowserEncodingTest,TestEncodingAliasMapping)55 TEST_F(BrowserEncodingTest, TestEncodingAliasMapping) {
56   struct EncodingTestData {
57     const char* file_name;
58     const char* encoding_name;
59   };
60 
61   const EncodingTestData kEncodingTestDatas[] = {
62     { "Big5.html", "Big5" },
63     { "EUC-JP.html", "EUC-JP" },
64     { "gb18030.html", "gb18030" },
65     { "iso-8859-1.html", "ISO-8859-1" },
66     { "ISO-8859-2.html", "ISO-8859-2" },
67     { "ISO-8859-4.html", "ISO-8859-4" },
68     { "ISO-8859-5.html", "ISO-8859-5" },
69     { "ISO-8859-6.html", "ISO-8859-6" },
70     { "ISO-8859-7.html", "ISO-8859-7" },
71     { "ISO-8859-8.html", "ISO-8859-8" },
72     { "ISO-8859-13.html", "ISO-8859-13" },
73     { "ISO-8859-15.html", "ISO-8859-15" },
74     { "KOI8-R.html", "KOI8-R" },
75     { "KOI8-U.html", "KOI8-U" },
76     { "macintosh.html", "macintosh" },
77     { "Shift-JIS.html", "Shift_JIS" },
78     { "US-ASCII.html", "ISO-8859-1" },  // http://crbug.com/15801
79     { "UTF-8.html", "UTF-8" },
80     { "UTF-16LE.html", "UTF-16LE" },
81     { "windows-874.html", "windows-874" },
82     { "windows-949.html", "windows-949" },
83     { "windows-1250.html", "windows-1250" },
84     { "windows-1251.html", "windows-1251" },
85     { "windows-1252.html", "windows-1252" },
86     { "windows-1253.html", "windows-1253" },
87     { "windows-1254.html", "windows-1254" },
88     { "windows-1255.html", "windows-1255" },
89     { "windows-1256.html", "windows-1256" },
90     { "windows-1257.html", "windows-1257" },
91     { "windows-1258.html", "windows-1258" }
92   };
93   const char* const kAliasTestDir = "alias_mapping";
94 
95   scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
96   ASSERT_TRUE(tab_proxy.get());
97 
98   FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kAliasTestDir);
99   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEncodingTestDatas); ++i) {
100     FilePath test_file_path(test_dir_path);
101     test_file_path = test_file_path.AppendASCII(
102         kEncodingTestDatas[i].file_name);
103 
104     NavigateToURL(URLRequestMockHTTPJob::GetMockUrl(test_file_path));
105 
106     std::string encoding;
107     EXPECT_TRUE(tab_proxy->GetPageCurrentEncoding(&encoding));
108     EXPECT_EQ(encoding, kEncodingTestDatas[i].encoding_name);
109   }
110 }
111 
112 // Marked as flaky: see  http://crbug.com/44668
TEST_F(BrowserEncodingTest,FLAKY_TestOverrideEncoding)113 TEST_F(BrowserEncodingTest, FLAKY_TestOverrideEncoding) {
114   const char* const kTestFileName = "gb18030_with_iso88591_meta.html";
115   const char* const kExpectedFileName =
116       "expected_gb18030_saved_from_iso88591_meta.html";
117   const char* const kOverrideTestDir = "user_override";
118 
119   FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kOverrideTestDir);
120   test_dir_path = test_dir_path.AppendASCII(kTestFileName);
121   GURL url = URLRequestMockHTTPJob::GetMockUrl(test_dir_path);
122   scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
123   ASSERT_TRUE(tab_proxy.get());
124   ASSERT_TRUE(tab_proxy->NavigateToURL(url));
125   WaitUntilTabCount(1);
126 
127   // Get the encoding declared in the page.
128   std::string encoding;
129   EXPECT_TRUE(tab_proxy->GetPageCurrentEncoding(&encoding));
130   EXPECT_EQ(encoding, "ISO-8859-1");
131 
132   // Override the encoding to "gb18030".
133   int64 last_nav_time = 0;
134   EXPECT_TRUE(tab_proxy->GetLastNavigationTime(&last_nav_time));
135   EXPECT_TRUE(tab_proxy->OverrideEncoding("gb18030"));
136   EXPECT_TRUE(tab_proxy->WaitForNavigation(last_nav_time));
137 
138   // Re-get the encoding of page. It should be gb18030.
139   EXPECT_TRUE(tab_proxy->GetPageCurrentEncoding(&encoding));
140   EXPECT_EQ(encoding, "gb18030");
141 
142   // Dump the page, the content of dump page should be identical to the
143   // expected result file.
144   FilePath full_file_name = save_dir_.AppendASCII(kTestFileName);
145   // We save the page as way of complete HTML file, which requires a directory
146   // name to save sub resources in it. Although this test file does not have
147   // sub resources, but the directory name is still required.
148   EXPECT_TRUE(tab_proxy->SavePage(full_file_name, temp_sub_resource_dir_,
149                                   SavePackage::SAVE_AS_COMPLETE_HTML));
150   scoped_refptr<BrowserProxy> browser(automation()->GetBrowserWindow(0));
151   ASSERT_TRUE(browser.get());
152   EXPECT_TRUE(WaitForDownloadShelfVisible(browser.get()));
153   FilePath expected_file_name = FilePath().AppendASCII(kOverrideTestDir);
154   expected_file_name = expected_file_name.AppendASCII(kExpectedFileName);
155   CheckFile(full_file_name, expected_file_name, true);
156 }
157 
158 // The following encodings are excluded from the auto-detection test because
159 // it's a known issue that the current encoding detector does not detect them:
160 // ISO-8859-4
161 // ISO-8859-13
162 // KOI8-U
163 // macintosh
164 // windows-874
165 // windows-1252
166 // windows-1253
167 // windows-1257
168 // windows-1258
169 
170 // For Hebrew, the expected encoding value is ISO-8859-8-I. See
171 // http://crbug.com/2927 for more details.
172 // FLAKY / Disabled on CrOS: see http://crbug.com/44666
173 #if defined(OS_CHROMEOS)
174 #define MAYBE_TestEncodingAutoDetect DISABLED_TestEncodingAutoDetect
175 #else
176 #define MAYBE_TestEncodingAutoDetect FLAKY_TestEncodingAutoDetect
177 #endif
178 
TEST_F(BrowserEncodingTest,MAYBE_TestEncodingAutoDetect)179 TEST_F(BrowserEncodingTest, MAYBE_TestEncodingAutoDetect) {
180   struct EncodingAutoDetectTestData {
181     const char* test_file_name;   // File name of test data.
182     const char* expected_result;  // File name of expected results.
183     const char* expected_encoding;   // expected encoding.
184   };
185   const EncodingAutoDetectTestData kTestDatas[] = {
186       { "Big5_with_no_encoding_specified.html",
187         "expected_Big5_saved_from_no_encoding_specified.html",
188         "Big5" },
189       { "gb18030_with_no_encoding_specified.html",
190         "expected_gb18030_saved_from_no_encoding_specified.html",
191         "gb18030" },
192       { "iso-8859-1_with_no_encoding_specified.html",
193         "expected_iso-8859-1_saved_from_no_encoding_specified.html",
194         "ISO-8859-1" },
195       { "ISO-8859-5_with_no_encoding_specified.html",
196         "expected_ISO-8859-5_saved_from_no_encoding_specified.html",
197         "ISO-8859-5" },
198       { "ISO-8859-6_with_no_encoding_specified.html",
199         "expected_ISO-8859-6_saved_from_no_encoding_specified.html",
200         "ISO-8859-6" },
201       { "ISO-8859-7_with_no_encoding_specified.html",
202         "expected_ISO-8859-7_saved_from_no_encoding_specified.html",
203         "ISO-8859-7" },
204       { "ISO-8859-8_with_no_encoding_specified.html",
205         "expected_ISO-8859-8_saved_from_no_encoding_specified.html",
206         "ISO-8859-8-I" },
207       { "KOI8-R_with_no_encoding_specified.html",
208         "expected_KOI8-R_saved_from_no_encoding_specified.html",
209         "KOI8-R" },
210       { "Shift-JIS_with_no_encoding_specified.html",
211         "expected_Shift-JIS_saved_from_no_encoding_specified.html",
212         "Shift_JIS" },
213       { "UTF-8_with_no_encoding_specified.html",
214         "expected_UTF-8_saved_from_no_encoding_specified.html",
215         "UTF-8" },
216       { "windows-949_with_no_encoding_specified.html",
217         "expected_windows-949_saved_from_no_encoding_specified.html",
218         "windows-949" },
219       { "windows-1251_with_no_encoding_specified.html",
220         "expected_windows-1251_saved_from_no_encoding_specified.html",
221         "windows-1251" },
222       { "windows-1254_with_no_encoding_specified.html",
223         "expected_windows-1254_saved_from_no_encoding_specified.html",
224         "windows-1254" },
225       { "windows-1255_with_no_encoding_specified.html",
226         "expected_windows-1255_saved_from_no_encoding_specified.html",
227         "windows-1255" },
228       { "windows-1256_with_no_encoding_specified.html",
229         "expected_windows-1256_saved_from_no_encoding_specified.html",
230         "windows-1256" }
231     };
232   const char* const kAutoDetectDir = "auto_detect";
233   // Directory of the files of expected results.
234   const char* const kExpectedResultDir = "expected_results";
235 
236   // Full path of saved file. full_file_name = save_dir_ + file_name[i];
237   FilePath full_saved_file_name;
238 
239   FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kAutoDetectDir);
240 
241   scoped_refptr<BrowserProxy> browser(automation()->GetBrowserWindow(0));
242   ASSERT_TRUE(browser.get());
243   // Set the default charset to one of encodings not supported by the current
244   // auto-detector (Please refer to the above comments) to make sure we
245   // incorrectly decode the page. Now we use ISO-8859-4.
246   ASSERT_TRUE(browser->SetStringPreference(prefs::kDefaultCharset,
247                                            "ISO-8859-4"));
248   scoped_refptr<TabProxy> tab(GetActiveTab());
249   ASSERT_TRUE(tab.get());
250 
251   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestDatas);i++) {
252     FilePath test_file_path(test_dir_path);
253     test_file_path = test_file_path.AppendASCII(kTestDatas[i].test_file_name);
254     GURL url =
255         URLRequestMockHTTPJob::GetMockUrl(test_file_path);
256     ASSERT_TRUE(tab->NavigateToURL(url));
257 
258     // Disable auto detect if it is on.
259     EXPECT_TRUE(
260         browser->SetBooleanPreference(prefs::kWebKitUsesUniversalDetector,
261                                       false));
262     EXPECT_TRUE(tab->Reload());
263 
264     // Get the encoding used for the page, it must be the default charset we
265     // just set.
266     std::string encoding;
267     EXPECT_TRUE(tab->GetPageCurrentEncoding(&encoding));
268     EXPECT_EQ(encoding, "ISO-8859-4");
269 
270     // Enable the encoding auto detection.
271     EXPECT_TRUE(browser->SetBooleanPreference(
272         prefs::kWebKitUsesUniversalDetector, true));
273     EXPECT_TRUE(tab->Reload());
274 
275     // Re-get the encoding of page. It should return the real encoding now.
276     bool encoding_auto_detect = false;
277     EXPECT_TRUE(
278         browser->GetBooleanPreference(prefs::kWebKitUsesUniversalDetector,
279                                       &encoding_auto_detect));
280     EXPECT_TRUE(encoding_auto_detect);
281     EXPECT_TRUE(tab->GetPageCurrentEncoding(&encoding));
282     EXPECT_EQ(encoding, kTestDatas[i].expected_encoding);
283 
284     // Dump the page, the content of dump page should be equal with our expect
285     // result file.
286     full_saved_file_name = save_dir_.AppendASCII(kTestDatas[i].test_file_name);
287     // Full path of expect result file.
288     FilePath expected_result_file_name = FilePath().AppendASCII(kAutoDetectDir);
289     expected_result_file_name = expected_result_file_name.AppendASCII(
290         kExpectedResultDir);
291     expected_result_file_name = expected_result_file_name.AppendASCII(
292         kTestDatas[i].expected_result);
293     EXPECT_TRUE(tab->SavePage(full_saved_file_name, temp_sub_resource_dir_,
294                               SavePackage::SAVE_AS_COMPLETE_HTML));
295     EXPECT_TRUE(WaitForDownloadShelfVisible(browser.get()));
296     CheckFile(full_saved_file_name, expected_result_file_name, true);
297   }
298 }
299