• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/rtl.h"
6 
7 #include <algorithm>
8 
9 #include "base/files/file_path.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/sys_string_conversions.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "testing/platform_test.h"
15 #include "third_party/icu/source/i18n/unicode/usearch.h"
16 
17 #if defined(TOOLKIT_GTK)
18 #include <gtk/gtk.h>
19 #endif
20 
21 namespace base {
22 namespace i18n {
23 
24 namespace {
25 
26 // A test utility function to set the application default text direction.
SetRTL(bool rtl)27 void SetRTL(bool rtl) {
28   // Override the current locale/direction.
29   SetICUDefaultLocale(rtl ? "he" : "en");
30 #if defined(TOOLKIT_GTK)
31   // Do the same for GTK, which does not rely on the ICU default locale.
32   gtk_widget_set_default_direction(rtl ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
33 #endif
34   EXPECT_EQ(rtl, IsRTL());
35 }
36 
37 }  // namespace
38 
39 class RTLTest : public PlatformTest {
40 };
41 
TEST_F(RTLTest,GetFirstStrongCharacterDirection)42 TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
43   struct {
44     const wchar_t* text;
45     TextDirection direction;
46   } cases[] = {
47     // Test pure LTR string.
48     { L"foo bar", LEFT_TO_RIGHT },
49     // Test bidi string in which the first character with strong directionality
50     // is a character with type L.
51     { L"foo \x05d0 bar", LEFT_TO_RIGHT },
52     // Test bidi string in which the first character with strong directionality
53     // is a character with type R.
54     { L"\x05d0 foo bar", RIGHT_TO_LEFT },
55     // Test bidi string which starts with a character with weak directionality
56     // and in which the first character with strong directionality is a
57     // character with type L.
58     { L"!foo \x05d0 bar", LEFT_TO_RIGHT },
59     // Test bidi string which starts with a character with weak directionality
60     // and in which the first character with strong directionality is a
61     // character with type R.
62     { L",\x05d0 foo bar", RIGHT_TO_LEFT },
63     // Test bidi string in which the first character with strong directionality
64     // is a character with type LRE.
65     { L"\x202a \x05d0 foo  bar", LEFT_TO_RIGHT },
66     // Test bidi string in which the first character with strong directionality
67     // is a character with type LRO.
68     { L"\x202d \x05d0 foo  bar", LEFT_TO_RIGHT },
69     // Test bidi string in which the first character with strong directionality
70     // is a character with type RLE.
71     { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT },
72     // Test bidi string in which the first character with strong directionality
73     // is a character with type RLO.
74     { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT },
75     // Test bidi string in which the first character with strong directionality
76     // is a character with type AL.
77     { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT },
78     // Test a string without strong directionality characters.
79     { L",!.{}", LEFT_TO_RIGHT },
80     // Test empty string.
81     { L"", LEFT_TO_RIGHT },
82     // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
83     // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
84     // information).
85     {
86 #if defined(WCHAR_T_IS_UTF32)
87       L" ! \x10910" L"abc 123",
88 #elif defined(WCHAR_T_IS_UTF16)
89       L" ! \xd802\xdd10" L"abc 123",
90 #else
91 #error wchar_t should be either UTF-16 or UTF-32
92 #endif
93       RIGHT_TO_LEFT },
94     {
95 #if defined(WCHAR_T_IS_UTF32)
96       L" ! \x10401" L"abc 123",
97 #elif defined(WCHAR_T_IS_UTF16)
98       L" ! \xd801\xdc01" L"abc 123",
99 #else
100 #error wchar_t should be either UTF-16 or UTF-32
101 #endif
102       LEFT_TO_RIGHT },
103    };
104 
105   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
106     EXPECT_EQ(cases[i].direction,
107               GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text)));
108 }
109 
TEST_F(RTLTest,GetStringDirection)110 TEST_F(RTLTest, GetStringDirection) {
111   struct {
112     const wchar_t* text;
113     TextDirection direction;
114   } cases[] = {
115     // Test pure LTR string.
116     { L"foobar", LEFT_TO_RIGHT },
117     { L".foobar", LEFT_TO_RIGHT },
118     { L"foo, bar", LEFT_TO_RIGHT },
119     // Test pure LTR with strong directionality characters of type LRE.
120     { L"\x202a\x202a", LEFT_TO_RIGHT },
121     { L".\x202a\x202a", LEFT_TO_RIGHT },
122     { L"\x202a, \x202a", LEFT_TO_RIGHT },
123     // Test pure LTR with strong directionality characters of type LRO.
124     { L"\x202d\x202d", LEFT_TO_RIGHT },
125     { L".\x202d\x202d", LEFT_TO_RIGHT },
126     { L"\x202d, \x202d", LEFT_TO_RIGHT },
127     // Test pure LTR with various types of strong directionality characters.
128     { L"foo \x202a\x202d", LEFT_TO_RIGHT },
129     { L".\x202d foo \x202a", LEFT_TO_RIGHT },
130     { L"\x202a, \x202d foo", LEFT_TO_RIGHT },
131     // Test pure RTL with strong directionality characters of type R.
132     { L"\x05d0\x05d0", RIGHT_TO_LEFT },
133     { L".\x05d0\x05d0", RIGHT_TO_LEFT },
134     { L"\x05d0, \x05d0", RIGHT_TO_LEFT },
135     // Test pure RTL with strong directionality characters of type RLE.
136     { L"\x202b\x202b", RIGHT_TO_LEFT },
137     { L".\x202b\x202b", RIGHT_TO_LEFT },
138     { L"\x202b, \x202b", RIGHT_TO_LEFT },
139     // Test pure RTL with strong directionality characters of type RLO.
140     { L"\x202e\x202e", RIGHT_TO_LEFT },
141     { L".\x202e\x202e", RIGHT_TO_LEFT },
142     { L"\x202e, \x202e", RIGHT_TO_LEFT },
143     // Test pure RTL with strong directionality characters of type AL.
144     { L"\x0622\x0622", RIGHT_TO_LEFT },
145     { L".\x0622\x0622", RIGHT_TO_LEFT },
146     { L"\x0622, \x0622", RIGHT_TO_LEFT },
147     // Test pure RTL with various types of strong directionality characters.
148     { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT },
149     { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT },
150     { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT },
151     // Test bidi strings.
152     { L"foo \x05d0 bar", UNKNOWN_DIRECTION },
153     { L"\x202b foo bar", UNKNOWN_DIRECTION },
154     { L"!foo \x0622 bar", UNKNOWN_DIRECTION },
155     { L"\x202a\x202b", UNKNOWN_DIRECTION },
156     { L"\x202e\x202d", UNKNOWN_DIRECTION },
157     { L"\x0622\x202a", UNKNOWN_DIRECTION },
158     { L"\x202d\x05d0", UNKNOWN_DIRECTION },
159     // Test a string without strong directionality characters.
160     { L",!.{}", LEFT_TO_RIGHT },
161     // Test empty string.
162     { L"", LEFT_TO_RIGHT },
163     {
164 #if defined(WCHAR_T_IS_UTF32)
165       L" ! \x10910" L"abc 123",
166 #elif defined(WCHAR_T_IS_UTF16)
167       L" ! \xd802\xdd10" L"abc 123",
168 #else
169 #error wchar_t should be either UTF-16 or UTF-32
170 #endif
171       UNKNOWN_DIRECTION },
172     {
173 #if defined(WCHAR_T_IS_UTF32)
174       L" ! \x10401" L"abc 123",
175 #elif defined(WCHAR_T_IS_UTF16)
176       L" ! \xd801\xdc01" L"abc 123",
177 #else
178 #error wchar_t should be either UTF-16 or UTF-32
179 #endif
180       LEFT_TO_RIGHT },
181    };
182 
183   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
184     EXPECT_EQ(cases[i].direction,
185               GetStringDirection(WideToUTF16(cases[i].text)));
186 }
187 
TEST_F(RTLTest,WrapPathWithLTRFormatting)188 TEST_F(RTLTest, WrapPathWithLTRFormatting) {
189   const wchar_t* cases[] = {
190     // Test common path, such as "c:\foo\bar".
191     L"c:/foo/bar",
192     // Test path with file name, such as "c:\foo\bar\test.jpg".
193     L"c:/foo/bar/test.jpg",
194     // Test path ending with punctuation, such as "c:\(foo)\bar.".
195     L"c:/(foo)/bar.",
196     // Test path ending with separator, such as "c:\foo\bar\".
197     L"c:/foo/bar/",
198     // Test path with RTL character.
199     L"c:/\x05d0",
200     // Test path with 2 level RTL directory names.
201     L"c:/\x05d0/\x0622",
202     // Test path with mixed RTL/LTR directory names and ending with punctuation.
203     L"c:/\x05d0/\x0622/(foo)/b.a.r.",
204     // Test path without driver name, such as "/foo/bar/test/jpg".
205     L"/foo/bar/test.jpg",
206     // Test path start with current directory, such as "./foo".
207     L"./foo",
208     // Test path start with parent directory, such as "../foo/bar.jpg".
209     L"../foo/bar.jpg",
210     // Test absolute path, such as "//foo/bar.jpg".
211     L"//foo/bar.jpg",
212     // Test path with mixed RTL/LTR directory names.
213     L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
214     // Test empty path.
215     L""
216   };
217 
218   for (size_t i = 0; i < arraysize(cases); ++i) {
219     FilePath path;
220 #if defined(OS_WIN)
221     std::wstring win_path(cases[i]);
222     std::replace(win_path.begin(), win_path.end(), '/', '\\');
223     path = FilePath(win_path);
224     std::wstring wrapped_expected =
225         std::wstring(L"\x202a") + win_path + L"\x202c";
226 #else
227     path = FilePath(base::SysWideToNativeMB(cases[i]));
228     std::wstring wrapped_expected =
229         std::wstring(L"\x202a") + cases[i] + L"\x202c";
230 #endif
231     string16 localized_file_path_string;
232     WrapPathWithLTRFormatting(path, &localized_file_path_string);
233 
234     std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
235     EXPECT_EQ(wrapped_expected, wrapped_actual);
236   }
237 }
238 
TEST_F(RTLTest,WrapString)239 TEST_F(RTLTest, WrapString) {
240   const wchar_t* cases[] = {
241     L" . ",
242     L"abc",
243     L"a" L"\x5d0\x5d1",
244     L"a" L"\x5d1" L"b",
245     L"\x5d0\x5d1\x5d2",
246     L"\x5d0\x5d1" L"a",
247     L"\x5d0" L"a" L"\x5d1",
248   };
249 
250   const bool was_rtl = IsRTL();
251 
252   for (size_t i = 0; i < 2; ++i) {
253     // Toggle the application default text direction (to try each direction).
254     SetRTL(!IsRTL());
255 
256     string16 empty;
257     WrapStringWithLTRFormatting(&empty);
258     EXPECT_TRUE(empty.empty());
259     WrapStringWithRTLFormatting(&empty);
260     EXPECT_TRUE(empty.empty());
261 
262     for (size_t i = 0; i < arraysize(cases); ++i) {
263       string16 input = WideToUTF16(cases[i]);
264       string16 ltr_wrap = input;
265       WrapStringWithLTRFormatting(&ltr_wrap);
266       EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
267       EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
268       EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
269 
270       string16 rtl_wrap = input;
271       WrapStringWithRTLFormatting(&rtl_wrap);
272       EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
273       EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
274       EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
275     }
276   }
277 
278   EXPECT_EQ(was_rtl, IsRTL());
279 }
280 
TEST_F(RTLTest,GetDisplayStringInLTRDirectionality)281 TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
282   struct {
283     const wchar_t* path;
284     bool wrap_ltr;
285     bool wrap_rtl;
286   } cases[] = {
287     { L"test",                   false, true },
288     { L"test.html",              false, true },
289     { L"\x05d0\x05d1\x05d2",     true,  true },
290     { L"\x05d0\x05d1\x05d2.txt", true,  true },
291     { L"\x05d0" L"abc",          true,  true },
292     { L"\x05d0" L"abc.txt",      true,  true },
293     { L"abc\x05d0\x05d1",        false, true },
294     { L"abc\x05d0\x05d1.jpg",    false, true },
295   };
296 
297   const bool was_rtl = IsRTL();
298 
299   for (size_t i = 0; i < 2; ++i) {
300     // Toggle the application default text direction (to try each direction).
301     SetRTL(!IsRTL());
302     for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
303       string16 input = WideToUTF16(cases[i].path);
304       string16 output = GetDisplayStringInLTRDirectionality(input);
305       // Test the expected wrapping behavior for the current UI directionality.
306       if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr)
307         EXPECT_NE(output, input);
308       else
309         EXPECT_EQ(output, input);
310     }
311   }
312 
313   EXPECT_EQ(was_rtl, IsRTL());
314 }
315 
TEST_F(RTLTest,GetTextDirection)316 TEST_F(RTLTest, GetTextDirection) {
317   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
318   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
319   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
320   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
321   // iw is an obsolete code for Hebrew.
322   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
323   // Although we're not yet localized to Farsi and Urdu, we
324   // do have the text layout direction information for them.
325   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
326   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
327 #if 0
328   // Enable these when we include the minimal locale data for Azerbaijani
329   // written in Arabic and Dhivehi. At the moment, our copy of
330   // ICU data does not have entries for them.
331   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
332   // Dhivehi that uses Thaana script.
333   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
334 #endif
335   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
336   // Chinese in China with '-'.
337   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
338   // Filipino : 3-letter code
339   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
340   // Russian
341   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
342   // Japanese that uses multiple scripts
343   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
344 }
345 
TEST_F(RTLTest,UnadjustStringForLocaleDirection)346 TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
347   // These test strings are borrowed from WrapPathWithLTRFormatting
348   const wchar_t* cases[] = {
349     L"foo bar",
350     L"foo \x05d0 bar",
351     L"\x05d0 foo bar",
352     L"!foo \x05d0 bar",
353     L",\x05d0 foo bar",
354     L"\x202a \x05d0 foo  bar",
355     L"\x202d \x05d0 foo  bar",
356     L"\x202b foo \x05d0 bar",
357     L"\x202e foo \x05d0 bar",
358     L"\x0622 foo \x05d0 bar",
359   };
360 
361   const bool was_rtl = IsRTL();
362 
363   for (size_t i = 0; i < 2; ++i) {
364     // Toggle the application default text direction (to try each direction).
365     SetRTL(!IsRTL());
366 
367     for (size_t i = 0; i < arraysize(cases); ++i) {
368       string16 test_case = WideToUTF16(cases[i]);
369       string16 adjusted_string = test_case;
370 
371       if (!AdjustStringForLocaleDirection(&adjusted_string))
372         continue;
373 
374       EXPECT_NE(test_case, adjusted_string);
375       EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
376       EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case
377                                             << "] with IsRTL() == " << IsRTL();
378     }
379   }
380 
381   EXPECT_EQ(was_rtl, IsRTL());
382 }
383 
384 }  // namespace i18n
385 }  // namespace base
386