1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/rtl.h"
6
7 #include <algorithm>
8
9 #include "base/files/file_path.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/sys_string_conversions.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "testing/platform_test.h"
15 #include "third_party/icu/source/i18n/unicode/usearch.h"
16
17 namespace base {
18 namespace i18n {
19
20 namespace {
21
22 // A test utility function to set the application default text direction.
SetRTL(bool rtl)23 void SetRTL(bool rtl) {
24 // Override the current locale/direction.
25 SetICUDefaultLocale(rtl ? "he" : "en");
26 EXPECT_EQ(rtl, IsRTL());
27 }
28
29 } // namespace
30
31 class RTLTest : public PlatformTest {
32 };
33
TEST_F(RTLTest,GetFirstStrongCharacterDirection)34 TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
35 struct {
36 const wchar_t* text;
37 TextDirection direction;
38 } cases[] = {
39 // Test pure LTR string.
40 { L"foo bar", LEFT_TO_RIGHT },
41 // Test pure RTL string.
42 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
43 // Test bidi string in which the first character with strong directionality
44 // is a character with type L.
45 { L"foo \x05d0 bar", LEFT_TO_RIGHT },
46 // Test bidi string in which the first character with strong directionality
47 // is a character with type R.
48 { L"\x05d0 foo bar", RIGHT_TO_LEFT },
49 // Test bidi string which starts with a character with weak directionality
50 // and in which the first character with strong directionality is a
51 // character with type L.
52 { L"!foo \x05d0 bar", LEFT_TO_RIGHT },
53 // Test bidi string which starts with a character with weak directionality
54 // and in which the first character with strong directionality is a
55 // character with type R.
56 { L",\x05d0 foo bar", RIGHT_TO_LEFT },
57 // Test bidi string in which the first character with strong directionality
58 // is a character with type LRE.
59 { L"\x202a \x05d0 foo bar", LEFT_TO_RIGHT },
60 // Test bidi string in which the first character with strong directionality
61 // is a character with type LRO.
62 { L"\x202d \x05d0 foo bar", LEFT_TO_RIGHT },
63 // Test bidi string in which the first character with strong directionality
64 // is a character with type RLE.
65 { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT },
66 // Test bidi string in which the first character with strong directionality
67 // is a character with type RLO.
68 { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT },
69 // Test bidi string in which the first character with strong directionality
70 // is a character with type AL.
71 { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT },
72 // Test a string without strong directionality characters.
73 { L",!.{}", LEFT_TO_RIGHT },
74 // Test empty string.
75 { L"", LEFT_TO_RIGHT },
76 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
77 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
78 // information).
79 {
80 #if defined(WCHAR_T_IS_UTF32)
81 L" ! \x10910" L"abc 123",
82 #elif defined(WCHAR_T_IS_UTF16)
83 L" ! \xd802\xdd10" L"abc 123",
84 #else
85 #error wchar_t should be either UTF-16 or UTF-32
86 #endif
87 RIGHT_TO_LEFT },
88 {
89 #if defined(WCHAR_T_IS_UTF32)
90 L" ! \x10401" L"abc 123",
91 #elif defined(WCHAR_T_IS_UTF16)
92 L" ! \xd801\xdc01" L"abc 123",
93 #else
94 #error wchar_t should be either UTF-16 or UTF-32
95 #endif
96 LEFT_TO_RIGHT },
97 };
98
99 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
100 EXPECT_EQ(cases[i].direction,
101 GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text)));
102 }
103
104
105 // Note that the cases with LRE, LRO, RLE and RLO are invalid for
106 // GetLastStrongCharacterDirection because they should be followed by PDF
107 // character.
TEST_F(RTLTest,GetLastStrongCharacterDirection)108 TEST_F(RTLTest, GetLastStrongCharacterDirection) {
109 struct {
110 const wchar_t* text;
111 TextDirection direction;
112 } cases[] = {
113 // Test pure LTR string.
114 { L"foo bar", LEFT_TO_RIGHT },
115 // Test pure RTL string.
116 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
117 // Test bidi string in which the last character with strong directionality
118 // is a character with type L.
119 { L"foo \x05d0 bar", LEFT_TO_RIGHT },
120 // Test bidi string in which the last character with strong directionality
121 // is a character with type R.
122 { L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT },
123 // Test bidi string which ends with a character with weak directionality
124 // and in which the last character with strong directionality is a
125 // character with type L.
126 { L"!foo \x05d0 bar!", LEFT_TO_RIGHT },
127 // Test bidi string which ends with a character with weak directionality
128 // and in which the last character with strong directionality is a
129 // character with type R.
130 { L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT },
131 // Test bidi string in which the last character with strong directionality
132 // is a character with type AL.
133 { L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT },
134 // Test a string without strong directionality characters.
135 { L",!.{}", LEFT_TO_RIGHT },
136 // Test empty string.
137 { L"", LEFT_TO_RIGHT },
138 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
139 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
140 // information).
141 {
142 #if defined(WCHAR_T_IS_UTF32)
143 L"abc 123" L" ! \x10910 !",
144 #elif defined(WCHAR_T_IS_UTF16)
145 L"abc 123" L" ! \xd802\xdd10 !",
146 #else
147 #error wchar_t should be either UTF-16 or UTF-32
148 #endif
149 RIGHT_TO_LEFT },
150 {
151 #if defined(WCHAR_T_IS_UTF32)
152 L"abc 123" L" ! \x10401 !",
153 #elif defined(WCHAR_T_IS_UTF16)
154 L"abc 123" L" ! \xd801\xdc01 !",
155 #else
156 #error wchar_t should be either UTF-16 or UTF-32
157 #endif
158 LEFT_TO_RIGHT },
159 };
160
161 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
162 EXPECT_EQ(cases[i].direction,
163 GetLastStrongCharacterDirection(WideToUTF16(cases[i].text)));
164 }
165
TEST_F(RTLTest,GetStringDirection)166 TEST_F(RTLTest, GetStringDirection) {
167 struct {
168 const wchar_t* text;
169 TextDirection direction;
170 } cases[] = {
171 // Test pure LTR string.
172 { L"foobar", LEFT_TO_RIGHT },
173 { L".foobar", LEFT_TO_RIGHT },
174 { L"foo, bar", LEFT_TO_RIGHT },
175 // Test pure LTR with strong directionality characters of type LRE.
176 { L"\x202a\x202a", LEFT_TO_RIGHT },
177 { L".\x202a\x202a", LEFT_TO_RIGHT },
178 { L"\x202a, \x202a", LEFT_TO_RIGHT },
179 // Test pure LTR with strong directionality characters of type LRO.
180 { L"\x202d\x202d", LEFT_TO_RIGHT },
181 { L".\x202d\x202d", LEFT_TO_RIGHT },
182 { L"\x202d, \x202d", LEFT_TO_RIGHT },
183 // Test pure LTR with various types of strong directionality characters.
184 { L"foo \x202a\x202d", LEFT_TO_RIGHT },
185 { L".\x202d foo \x202a", LEFT_TO_RIGHT },
186 { L"\x202a, \x202d foo", LEFT_TO_RIGHT },
187 // Test pure RTL with strong directionality characters of type R.
188 { L"\x05d0\x05d0", RIGHT_TO_LEFT },
189 { L".\x05d0\x05d0", RIGHT_TO_LEFT },
190 { L"\x05d0, \x05d0", RIGHT_TO_LEFT },
191 // Test pure RTL with strong directionality characters of type RLE.
192 { L"\x202b\x202b", RIGHT_TO_LEFT },
193 { L".\x202b\x202b", RIGHT_TO_LEFT },
194 { L"\x202b, \x202b", RIGHT_TO_LEFT },
195 // Test pure RTL with strong directionality characters of type RLO.
196 { L"\x202e\x202e", RIGHT_TO_LEFT },
197 { L".\x202e\x202e", RIGHT_TO_LEFT },
198 { L"\x202e, \x202e", RIGHT_TO_LEFT },
199 // Test pure RTL with strong directionality characters of type AL.
200 { L"\x0622\x0622", RIGHT_TO_LEFT },
201 { L".\x0622\x0622", RIGHT_TO_LEFT },
202 { L"\x0622, \x0622", RIGHT_TO_LEFT },
203 // Test pure RTL with various types of strong directionality characters.
204 { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT },
205 { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT },
206 { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT },
207 // Test bidi strings.
208 { L"foo \x05d0 bar", UNKNOWN_DIRECTION },
209 { L"\x202b foo bar", UNKNOWN_DIRECTION },
210 { L"!foo \x0622 bar", UNKNOWN_DIRECTION },
211 { L"\x202a\x202b", UNKNOWN_DIRECTION },
212 { L"\x202e\x202d", UNKNOWN_DIRECTION },
213 { L"\x0622\x202a", UNKNOWN_DIRECTION },
214 { L"\x202d\x05d0", UNKNOWN_DIRECTION },
215 // Test a string without strong directionality characters.
216 { L",!.{}", LEFT_TO_RIGHT },
217 // Test empty string.
218 { L"", LEFT_TO_RIGHT },
219 {
220 #if defined(WCHAR_T_IS_UTF32)
221 L" ! \x10910" L"abc 123",
222 #elif defined(WCHAR_T_IS_UTF16)
223 L" ! \xd802\xdd10" L"abc 123",
224 #else
225 #error wchar_t should be either UTF-16 or UTF-32
226 #endif
227 UNKNOWN_DIRECTION },
228 {
229 #if defined(WCHAR_T_IS_UTF32)
230 L" ! \x10401" L"abc 123",
231 #elif defined(WCHAR_T_IS_UTF16)
232 L" ! \xd801\xdc01" L"abc 123",
233 #else
234 #error wchar_t should be either UTF-16 or UTF-32
235 #endif
236 LEFT_TO_RIGHT },
237 };
238
239 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
240 EXPECT_EQ(cases[i].direction,
241 GetStringDirection(WideToUTF16(cases[i].text)));
242 }
243
TEST_F(RTLTest,WrapPathWithLTRFormatting)244 TEST_F(RTLTest, WrapPathWithLTRFormatting) {
245 const wchar_t* cases[] = {
246 // Test common path, such as "c:\foo\bar".
247 L"c:/foo/bar",
248 // Test path with file name, such as "c:\foo\bar\test.jpg".
249 L"c:/foo/bar/test.jpg",
250 // Test path ending with punctuation, such as "c:\(foo)\bar.".
251 L"c:/(foo)/bar.",
252 // Test path ending with separator, such as "c:\foo\bar\".
253 L"c:/foo/bar/",
254 // Test path with RTL character.
255 L"c:/\x05d0",
256 // Test path with 2 level RTL directory names.
257 L"c:/\x05d0/\x0622",
258 // Test path with mixed RTL/LTR directory names and ending with punctuation.
259 L"c:/\x05d0/\x0622/(foo)/b.a.r.",
260 // Test path without driver name, such as "/foo/bar/test/jpg".
261 L"/foo/bar/test.jpg",
262 // Test path start with current directory, such as "./foo".
263 L"./foo",
264 // Test path start with parent directory, such as "../foo/bar.jpg".
265 L"../foo/bar.jpg",
266 // Test absolute path, such as "//foo/bar.jpg".
267 L"//foo/bar.jpg",
268 // Test path with mixed RTL/LTR directory names.
269 L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
270 // Test empty path.
271 L""
272 };
273
274 for (size_t i = 0; i < arraysize(cases); ++i) {
275 FilePath path;
276 #if defined(OS_WIN)
277 std::wstring win_path(cases[i]);
278 std::replace(win_path.begin(), win_path.end(), '/', '\\');
279 path = FilePath(win_path);
280 std::wstring wrapped_expected =
281 std::wstring(L"\x202a") + win_path + L"\x202c";
282 #else
283 path = FilePath(base::SysWideToNativeMB(cases[i]));
284 std::wstring wrapped_expected =
285 std::wstring(L"\x202a") + cases[i] + L"\x202c";
286 #endif
287 string16 localized_file_path_string;
288 WrapPathWithLTRFormatting(path, &localized_file_path_string);
289
290 std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
291 EXPECT_EQ(wrapped_expected, wrapped_actual);
292 }
293 }
294
TEST_F(RTLTest,WrapString)295 TEST_F(RTLTest, WrapString) {
296 const wchar_t* cases[] = {
297 L" . ",
298 L"abc",
299 L"a" L"\x5d0\x5d1",
300 L"a" L"\x5d1" L"b",
301 L"\x5d0\x5d1\x5d2",
302 L"\x5d0\x5d1" L"a",
303 L"\x5d0" L"a" L"\x5d1",
304 };
305
306 const bool was_rtl = IsRTL();
307
308 for (size_t i = 0; i < 2; ++i) {
309 // Toggle the application default text direction (to try each direction).
310 SetRTL(!IsRTL());
311
312 string16 empty;
313 WrapStringWithLTRFormatting(&empty);
314 EXPECT_TRUE(empty.empty());
315 WrapStringWithRTLFormatting(&empty);
316 EXPECT_TRUE(empty.empty());
317
318 for (size_t i = 0; i < arraysize(cases); ++i) {
319 string16 input = WideToUTF16(cases[i]);
320 string16 ltr_wrap = input;
321 WrapStringWithLTRFormatting(<r_wrap);
322 EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
323 EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
324 EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
325
326 string16 rtl_wrap = input;
327 WrapStringWithRTLFormatting(&rtl_wrap);
328 EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
329 EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
330 EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
331 }
332 }
333
334 EXPECT_EQ(was_rtl, IsRTL());
335 }
336
TEST_F(RTLTest,GetDisplayStringInLTRDirectionality)337 TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
338 struct {
339 const wchar_t* path;
340 bool wrap_ltr;
341 bool wrap_rtl;
342 } cases[] = {
343 { L"test", false, true },
344 { L"test.html", false, true },
345 { L"\x05d0\x05d1\x05d2", true, true },
346 { L"\x05d0\x05d1\x05d2.txt", true, true },
347 { L"\x05d0" L"abc", true, true },
348 { L"\x05d0" L"abc.txt", true, true },
349 { L"abc\x05d0\x05d1", false, true },
350 { L"abc\x05d0\x05d1.jpg", false, true },
351 };
352
353 const bool was_rtl = IsRTL();
354
355 for (size_t i = 0; i < 2; ++i) {
356 // Toggle the application default text direction (to try each direction).
357 SetRTL(!IsRTL());
358 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
359 string16 input = WideToUTF16(cases[i].path);
360 string16 output = GetDisplayStringInLTRDirectionality(input);
361 // Test the expected wrapping behavior for the current UI directionality.
362 if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr)
363 EXPECT_NE(output, input);
364 else
365 EXPECT_EQ(output, input);
366 }
367 }
368
369 EXPECT_EQ(was_rtl, IsRTL());
370 }
371
TEST_F(RTLTest,GetTextDirection)372 TEST_F(RTLTest, GetTextDirection) {
373 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
374 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
375 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
376 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
377 // iw is an obsolete code for Hebrew.
378 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
379 // Although we're not yet localized to Farsi and Urdu, we
380 // do have the text layout direction information for them.
381 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
382 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
383 #if 0
384 // Enable these when we include the minimal locale data for Azerbaijani
385 // written in Arabic and Dhivehi. At the moment, our copy of
386 // ICU data does not have entries for them.
387 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
388 // Dhivehi that uses Thaana script.
389 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
390 #endif
391 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
392 // Chinese in China with '-'.
393 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
394 // Filipino : 3-letter code
395 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
396 // Russian
397 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
398 // Japanese that uses multiple scripts
399 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
400 }
401
TEST_F(RTLTest,UnadjustStringForLocaleDirection)402 TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
403 // These test strings are borrowed from WrapPathWithLTRFormatting
404 const wchar_t* cases[] = {
405 L"foo bar",
406 L"foo \x05d0 bar",
407 L"\x05d0 foo bar",
408 L"!foo \x05d0 bar",
409 L",\x05d0 foo bar",
410 L"\x202a \x05d0 foo bar",
411 L"\x202d \x05d0 foo bar",
412 L"\x202b foo \x05d0 bar",
413 L"\x202e foo \x05d0 bar",
414 L"\x0622 foo \x05d0 bar",
415 };
416
417 const bool was_rtl = IsRTL();
418
419 for (size_t i = 0; i < 2; ++i) {
420 // Toggle the application default text direction (to try each direction).
421 SetRTL(!IsRTL());
422
423 for (size_t i = 0; i < arraysize(cases); ++i) {
424 string16 test_case = WideToUTF16(cases[i]);
425 string16 adjusted_string = test_case;
426
427 if (!AdjustStringForLocaleDirection(&adjusted_string))
428 continue;
429
430 EXPECT_NE(test_case, adjusted_string);
431 EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
432 EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case
433 << "] with IsRTL() == " << IsRTL();
434 }
435 }
436
437 EXPECT_EQ(was_rtl, IsRTL());
438 }
439
440 } // namespace i18n
441 } // namespace base
442