1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9
10 #include "base/i18n/file_util_icu.h"
11
12 #include <stddef.h>
13
14 #include "base/files/file_util.h"
15 #include "base/logging.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "build/build_config.h"
18 #include "build/chromeos_buildflags.h"
19 #include "testing/gtest/include/gtest/gtest.h"
20 #include "testing/platform_test.h"
21
22 namespace base {
23 namespace i18n {
24
25 // file_util winds up using autoreleased objects on the Mac, so this needs
26 // to be a PlatformTest
27 class FileUtilICUTest : public PlatformTest {
28 };
29
30 #if BUILDFLAG(IS_POSIX) && !BUILDFLAG(IS_APPLE)
31
32 // On linux, file path is parsed and filtered as UTF-8.
33 static const struct GoodBadPairLinux {
34 const char* bad_name;
35 const char* good_name;
36 } kLinuxIllegalCharacterCases[] = {
37 {"bad*\\/file:name?.jpg", "bad---file-name-.jpg"},
38 {"**********::::.txt", "--------------.txt"},
39 {"\xe9\xf0zzzz.\xff", "\xe9\xf0zzzz.\xff"},
40 {" _ ", "-_-"},
41 {".", "-"},
42 {" .( ). ", "-.( ).-"},
43 {" ", "- -"},
44 };
45
TEST_F(FileUtilICUTest,ReplaceIllegalCharactersInPathLinuxTest)46 TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathLinuxTest) {
47 for (auto i : kLinuxIllegalCharacterCases) {
48 std::string bad_name(i.bad_name);
49 ReplaceIllegalCharactersInPath(&bad_name, '-');
50 EXPECT_EQ(i.good_name, bad_name);
51 }
52 }
53
54 #endif
55
56 // For Mac & Windows, which both do Unicode validation on filenames. These
57 // characters are given as UTF-16 strings since its more convenient to specify
58 // unicode characters. For Mac they should be converted to UTF-8, for Windows to
59 // wide.
60 static const struct FileUtilICUTestCases {
61 const char16_t* bad_name;
62 const char16_t* good_name_with_dash;
63 const char16_t* good_name_with_space;
64 } kIllegalCharacterCases[] = {
65 {u"bad*file:name?.jpg", u"bad-file-name-.jpg", u"bad file name .jpg"},
66 {u"**********::::.txt", u"--------------.txt", u"_.txt"},
67 // We can't use UCNs (universal character names) for C0/C1 characters and
68 // U+007F, but \x escape is interpreted by MSVC and gcc as we intend.
69 {u"bad\x0003\x0091 file\u200E\u200Fname.png", u"bad-- file--name.png",
70 u"bad file name.png"},
71 {u"bad*file\\?name.jpg", u"bad-file--name.jpg", u"bad file name.jpg"},
72 {u"\t bad*file\\name/.jpg", u"- bad-file-name-.jpg",
73 u"bad file name .jpg"},
74 {u"this_file_name is okay!.mp3", u"this_file_name is okay!.mp3",
75 u"this_file_name is okay!.mp3"},
76 {u"\u4E00\uAC00.mp3", u"\u4E00\uAC00.mp3", u"\u4E00\uAC00.mp3"},
77 {u"\u0635\u200C\u0644.mp3", u"\u0635-\u0644.mp3", u"\u0635 \u0644.mp3"},
78 {u"\U00010330\U00010331.mp3", u"\U00010330\U00010331.mp3",
79 u"\U00010330\U00010331.mp3"},
80 // Unassigned codepoints are ok.
81 {u"\u0378\U00040001.mp3", u"\u0378\U00040001.mp3", u"\u0378\U00040001.mp3"},
82 // Non-characters are not allowed.
83 {u"bad\uFFFFfile\U0010FFFEname.jpg", u"bad-file-name.jpg",
84 u"bad file name.jpg"},
85 {u"bad\uFDD0file\uFDEFname.jpg", u"bad-file-name.jpg",
86 u"bad file name.jpg"},
87 // CVE-2014-9390
88 {u"(\u200C.\u200D.\u200E.\u200F.\u202A.\u202B.\u202C.\u202D.\u202E.\u206A."
89 u"\u206B.\u206C.\u206D.\u206F.\uFEFF)",
90 u"(-.-.-.-.-.-.-.-.-.-.-.-.-.-.-)", u"( . . . . . . . . . . . . . . )"},
91 {u" _ ", u"-_-", u"_"},
92 {u" ", u"-", u"_ _"},
93 {u"\u2008.(\u2007).\u3000", u"-.(\u2007).-", u"(\u2007)"},
94 {u" ", u"- -", u"_ _"},
95 {u". ", u"- -", u"_. _"},
96 #if BUILDFLAG(IS_WIN)
97 // '~' is only invalid on Windows, and only if the file name could possibly
98 // be an 8.3 short name.
99 {u"config~1", u"config-1", u"config 1"},
100 {u"config~1.txt", u"config-1.txt", u"config 1.txt"},
101 #else
102 {u"config~1", u"config~1", u"config~1"},
103 {u"config~1.txt", u"config~1.txt", u"config~1.txt"},
104 #endif
105 // Tildes are always illegal at ends.
106 {u"~config1.txt", u"-config1.txt", u"config1.txt"},
107 {u"config1.txt~", u"config1.txt-", u"config1.txt"},
108 // Some characters, such as spaces, are not allowed in 8.3 short names.
109 // Don't replace the '~' if these characters are present.
110 {u"conf g~1", u"conf g~1", u"conf g~1"},
111 {u"conf,g~1.txt", u"conf,g~1.txt", u"conf,g~1.txt"},
112 // File names with periods in invalid positions are not legal 8.3 names.
113 {u"conf~1.jpeg", u"conf~1.jpeg", u"conf~1.jpeg"},
114 {u"config~12.md", u"config~12.md", u"config~12.md"},
115 // Short names without a '~' character are allowed.
116 {u"config.txt", u"config.txt", u"config.txt"},
117 // Names long enough to not be short names are allowed.
118 {u"config~12.txt", u"config~12.txt", u"config~12.txt"},
119 {u"config~1VeryLongCannotBeShortNameOK.txt",
120 u"config~1VeryLongCannotBeShortNameOK.txt",
121 u"config~1VeryLongCannotBeShortNameOK.txt"},
122 // Base name is longer than 8 characters, without a dot.
123 {u"config~1txt", u"config~1txt", u"config~1txt"},
124 };
125 #if BUILDFLAG(IS_WIN) || BUILDFLAG(IS_APPLE) || BUILDFLAG(IS_POSIX)
126
TEST_F(FileUtilICUTest,ReplaceIllegalCharactersInPathTest)127 TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) {
128 for (auto i : kIllegalCharacterCases) {
129 #if BUILDFLAG(IS_WIN)
130 std::wstring bad_name = UTF16ToWide(i.bad_name);
131 ReplaceIllegalCharactersInPath(&bad_name, '-');
132 EXPECT_EQ(UTF16ToWide(i.good_name_with_dash), bad_name);
133 #else
134 std::string bad_name = UTF16ToUTF8(i.bad_name);
135 ReplaceIllegalCharactersInPath(&bad_name, '-');
136 EXPECT_EQ(UTF16ToUTF8(i.good_name_with_dash), bad_name);
137 #endif
138 }
139 }
140
TEST_F(FileUtilICUTest,ReplaceIllegalCharactersInPathWithIllegalEndCharTest)141 TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathWithIllegalEndCharTest) {
142 for (auto i : kIllegalCharacterCases) {
143 #if BUILDFLAG(IS_WIN)
144 std::wstring bad_name = UTF16ToWide(i.bad_name);
145 ReplaceIllegalCharactersInPath(&bad_name, ' ');
146 EXPECT_EQ(UTF16ToWide(i.good_name_with_space), bad_name);
147 #else
148 std::string bad_name(UTF16ToUTF8(i.bad_name));
149 ReplaceIllegalCharactersInPath(&bad_name, ' ');
150 EXPECT_EQ(UTF16ToUTF8(i.good_name_with_space), bad_name);
151 #endif
152 }
153 }
154
155 #endif
156
TEST_F(FileUtilICUTest,IsFilenameLegalTest)157 TEST_F(FileUtilICUTest, IsFilenameLegalTest) {
158 EXPECT_TRUE(IsFilenameLegal(std::u16string()));
159
160 for (const auto& test_case : kIllegalCharacterCases) {
161 std::u16string bad_name = test_case.bad_name;
162 std::u16string good_name = test_case.good_name_with_dash;
163
164 EXPECT_TRUE(IsFilenameLegal(good_name)) << good_name;
165 if (good_name != bad_name)
166 EXPECT_FALSE(IsFilenameLegal(bad_name)) << bad_name;
167 }
168 }
169
170 #if BUILDFLAG(IS_CHROMEOS_ASH)
171 static const struct normalize_name_encoding_test_cases {
172 const char* original_path;
173 const char* normalized_path;
174 } kNormalizeFileNameEncodingTestCases[] = {
175 { "foo_na\xcc\x88me.foo", "foo_n\xc3\xa4me.foo"},
176 { "foo_dir_na\xcc\x88me/foo_na\xcc\x88me.foo",
177 "foo_dir_na\xcc\x88me/foo_n\xc3\xa4me.foo"},
178 { "", ""},
179 { "foo_dir_na\xcc\x88me/", "foo_dir_n\xc3\xa4me"}
180 };
181
TEST_F(FileUtilICUTest,NormalizeFileNameEncoding)182 TEST_F(FileUtilICUTest, NormalizeFileNameEncoding) {
183 for (size_t i = 0; i < std::size(kNormalizeFileNameEncodingTestCases); i++) {
184 FilePath path(kNormalizeFileNameEncodingTestCases[i].original_path);
185 NormalizeFileNameEncoding(&path);
186 EXPECT_EQ(FilePath(kNormalizeFileNameEncodingTestCases[i].normalized_path),
187 path);
188 }
189 }
190
191 #endif
192
193 } // namespace i18n
194 } // namespace base
195