• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/rtl.h"
6 
7 #include <stddef.h>
8 
9 #include <algorithm>
10 
11 #include "base/files/file_path.h"
12 #include "base/macros.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/sys_string_conversions.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/test/icu_test_util.h"
17 #include "build/build_config.h"
18 #include "testing/gtest/include/gtest/gtest.h"
19 #include "testing/platform_test.h"
20 #include "third_party/icu/source/common/unicode/locid.h"
21 #include "third_party/icu/source/i18n/unicode/usearch.h"
22 
23 namespace base {
24 namespace i18n {
25 
26 class RTLTest : public PlatformTest {
27 };
28 
TEST_F(RTLTest,GetFirstStrongCharacterDirection)29 TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
30   struct {
31     const wchar_t* text;
32     TextDirection direction;
33   } cases[] = {
34     // Test pure LTR string.
35     { L"foo bar", LEFT_TO_RIGHT },
36     // Test pure RTL string.
37     { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
38     // Test bidi string in which the first character with strong directionality
39     // is a character with type L.
40     { L"foo \x05d0 bar", LEFT_TO_RIGHT },
41     // Test bidi string in which the first character with strong directionality
42     // is a character with type R.
43     { L"\x05d0 foo bar", RIGHT_TO_LEFT },
44     // Test bidi string which starts with a character with weak directionality
45     // and in which the first character with strong directionality is a
46     // character with type L.
47     { L"!foo \x05d0 bar", LEFT_TO_RIGHT },
48     // Test bidi string which starts with a character with weak directionality
49     // and in which the first character with strong directionality is a
50     // character with type R.
51     { L",\x05d0 foo bar", RIGHT_TO_LEFT },
52     // Test bidi string in which the first character with strong directionality
53     // is a character with type LRE.
54     { L"\x202a \x05d0 foo  bar", LEFT_TO_RIGHT },
55     // Test bidi string in which the first character with strong directionality
56     // is a character with type LRO.
57     { L"\x202d \x05d0 foo  bar", LEFT_TO_RIGHT },
58     // Test bidi string in which the first character with strong directionality
59     // is a character with type RLE.
60     { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT },
61     // Test bidi string in which the first character with strong directionality
62     // is a character with type RLO.
63     { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT },
64     // Test bidi string in which the first character with strong directionality
65     // is a character with type AL.
66     { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT },
67     // Test a string without strong directionality characters.
68     { L",!.{}", LEFT_TO_RIGHT },
69     // Test empty string.
70     { L"", LEFT_TO_RIGHT },
71     // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
72     // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
73     // information).
74     {
75 #if defined(WCHAR_T_IS_UTF32)
76       L" ! \x10910" L"abc 123",
77 #elif defined(WCHAR_T_IS_UTF16)
78       L" ! \xd802\xdd10" L"abc 123",
79 #else
80 #error wchar_t should be either UTF-16 or UTF-32
81 #endif
82       RIGHT_TO_LEFT },
83     {
84 #if defined(WCHAR_T_IS_UTF32)
85       L" ! \x10401" L"abc 123",
86 #elif defined(WCHAR_T_IS_UTF16)
87       L" ! \xd801\xdc01" L"abc 123",
88 #else
89 #error wchar_t should be either UTF-16 or UTF-32
90 #endif
91       LEFT_TO_RIGHT },
92    };
93 
94   for (size_t i = 0; i < arraysize(cases); ++i)
95     EXPECT_EQ(cases[i].direction,
96               GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text)));
97 }
98 
99 
100 // Note that the cases with LRE, LRO, RLE and RLO are invalid for
101 // GetLastStrongCharacterDirection because they should be followed by PDF
102 // character.
TEST_F(RTLTest,GetLastStrongCharacterDirection)103 TEST_F(RTLTest, GetLastStrongCharacterDirection) {
104   struct {
105     const wchar_t* text;
106     TextDirection direction;
107   } cases[] = {
108     // Test pure LTR string.
109     { L"foo bar", LEFT_TO_RIGHT },
110     // Test pure RTL string.
111     { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
112     // Test bidi string in which the last character with strong directionality
113     // is a character with type L.
114     { L"foo \x05d0 bar", LEFT_TO_RIGHT },
115     // Test bidi string in which the last character with strong directionality
116     // is a character with type R.
117     { L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT },
118     // Test bidi string which ends with a character with weak directionality
119     // and in which the last character with strong directionality is a
120     // character with type L.
121     { L"!foo \x05d0 bar!", LEFT_TO_RIGHT },
122     // Test bidi string which ends with a character with weak directionality
123     // and in which the last character with strong directionality is a
124     // character with type R.
125     { L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT },
126     // Test bidi string in which the last character with strong directionality
127     // is a character with type AL.
128     { L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT },
129     // Test a string without strong directionality characters.
130     { L",!.{}", LEFT_TO_RIGHT },
131     // Test empty string.
132     { L"", LEFT_TO_RIGHT },
133     // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
134     // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
135     // information).
136     {
137 #if defined(WCHAR_T_IS_UTF32)
138        L"abc 123" L" ! \x10910 !",
139 #elif defined(WCHAR_T_IS_UTF16)
140        L"abc 123" L" ! \xd802\xdd10 !",
141 #else
142 #error wchar_t should be either UTF-16 or UTF-32
143 #endif
144       RIGHT_TO_LEFT },
145     {
146 #if defined(WCHAR_T_IS_UTF32)
147        L"abc 123" L" ! \x10401 !",
148 #elif defined(WCHAR_T_IS_UTF16)
149        L"abc 123" L" ! \xd801\xdc01 !",
150 #else
151 #error wchar_t should be either UTF-16 or UTF-32
152 #endif
153       LEFT_TO_RIGHT },
154    };
155 
156   for (size_t i = 0; i < arraysize(cases); ++i)
157     EXPECT_EQ(cases[i].direction,
158               GetLastStrongCharacterDirection(WideToUTF16(cases[i].text)));
159 }
160 
TEST_F(RTLTest,GetStringDirection)161 TEST_F(RTLTest, GetStringDirection) {
162   struct {
163     const wchar_t* text;
164     TextDirection direction;
165   } cases[] = {
166     // Test pure LTR string.
167     { L"foobar", LEFT_TO_RIGHT },
168     { L".foobar", LEFT_TO_RIGHT },
169     { L"foo, bar", LEFT_TO_RIGHT },
170     // Test pure LTR with strong directionality characters of type LRE.
171     { L"\x202a\x202a", LEFT_TO_RIGHT },
172     { L".\x202a\x202a", LEFT_TO_RIGHT },
173     { L"\x202a, \x202a", LEFT_TO_RIGHT },
174     // Test pure LTR with strong directionality characters of type LRO.
175     { L"\x202d\x202d", LEFT_TO_RIGHT },
176     { L".\x202d\x202d", LEFT_TO_RIGHT },
177     { L"\x202d, \x202d", LEFT_TO_RIGHT },
178     // Test pure LTR with various types of strong directionality characters.
179     { L"foo \x202a\x202d", LEFT_TO_RIGHT },
180     { L".\x202d foo \x202a", LEFT_TO_RIGHT },
181     { L"\x202a, \x202d foo", LEFT_TO_RIGHT },
182     // Test pure RTL with strong directionality characters of type R.
183     { L"\x05d0\x05d0", RIGHT_TO_LEFT },
184     { L".\x05d0\x05d0", RIGHT_TO_LEFT },
185     { L"\x05d0, \x05d0", RIGHT_TO_LEFT },
186     // Test pure RTL with strong directionality characters of type RLE.
187     { L"\x202b\x202b", RIGHT_TO_LEFT },
188     { L".\x202b\x202b", RIGHT_TO_LEFT },
189     { L"\x202b, \x202b", RIGHT_TO_LEFT },
190     // Test pure RTL with strong directionality characters of type RLO.
191     { L"\x202e\x202e", RIGHT_TO_LEFT },
192     { L".\x202e\x202e", RIGHT_TO_LEFT },
193     { L"\x202e, \x202e", RIGHT_TO_LEFT },
194     // Test pure RTL with strong directionality characters of type AL.
195     { L"\x0622\x0622", RIGHT_TO_LEFT },
196     { L".\x0622\x0622", RIGHT_TO_LEFT },
197     { L"\x0622, \x0622", RIGHT_TO_LEFT },
198     // Test pure RTL with various types of strong directionality characters.
199     { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT },
200     { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT },
201     { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT },
202     // Test bidi strings.
203     { L"foo \x05d0 bar", UNKNOWN_DIRECTION },
204     { L"\x202b foo bar", UNKNOWN_DIRECTION },
205     { L"!foo \x0622 bar", UNKNOWN_DIRECTION },
206     { L"\x202a\x202b", UNKNOWN_DIRECTION },
207     { L"\x202e\x202d", UNKNOWN_DIRECTION },
208     { L"\x0622\x202a", UNKNOWN_DIRECTION },
209     { L"\x202d\x05d0", UNKNOWN_DIRECTION },
210     // Test a string without strong directionality characters.
211     { L",!.{}", LEFT_TO_RIGHT },
212     // Test empty string.
213     { L"", LEFT_TO_RIGHT },
214     {
215 #if defined(WCHAR_T_IS_UTF32)
216       L" ! \x10910" L"abc 123",
217 #elif defined(WCHAR_T_IS_UTF16)
218       L" ! \xd802\xdd10" L"abc 123",
219 #else
220 #error wchar_t should be either UTF-16 or UTF-32
221 #endif
222       UNKNOWN_DIRECTION },
223     {
224 #if defined(WCHAR_T_IS_UTF32)
225       L" ! \x10401" L"abc 123",
226 #elif defined(WCHAR_T_IS_UTF16)
227       L" ! \xd801\xdc01" L"abc 123",
228 #else
229 #error wchar_t should be either UTF-16 or UTF-32
230 #endif
231       LEFT_TO_RIGHT },
232    };
233 
234   for (size_t i = 0; i < arraysize(cases); ++i)
235     EXPECT_EQ(cases[i].direction,
236               GetStringDirection(WideToUTF16(cases[i].text)));
237 }
238 
TEST_F(RTLTest,WrapPathWithLTRFormatting)239 TEST_F(RTLTest, WrapPathWithLTRFormatting) {
240   const wchar_t* cases[] = {
241     // Test common path, such as "c:\foo\bar".
242     L"c:/foo/bar",
243     // Test path with file name, such as "c:\foo\bar\test.jpg".
244     L"c:/foo/bar/test.jpg",
245     // Test path ending with punctuation, such as "c:\(foo)\bar.".
246     L"c:/(foo)/bar.",
247     // Test path ending with separator, such as "c:\foo\bar\".
248     L"c:/foo/bar/",
249     // Test path with RTL character.
250     L"c:/\x05d0",
251     // Test path with 2 level RTL directory names.
252     L"c:/\x05d0/\x0622",
253     // Test path with mixed RTL/LTR directory names and ending with punctuation.
254     L"c:/\x05d0/\x0622/(foo)/b.a.r.",
255     // Test path without driver name, such as "/foo/bar/test/jpg".
256     L"/foo/bar/test.jpg",
257     // Test path start with current directory, such as "./foo".
258     L"./foo",
259     // Test path start with parent directory, such as "../foo/bar.jpg".
260     L"../foo/bar.jpg",
261     // Test absolute path, such as "//foo/bar.jpg".
262     L"//foo/bar.jpg",
263     // Test path with mixed RTL/LTR directory names.
264     L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
265     // Test empty path.
266     L""
267   };
268 
269   for (size_t i = 0; i < arraysize(cases); ++i) {
270     FilePath path;
271 #if defined(OS_WIN)
272     std::wstring win_path(cases[i]);
273     std::replace(win_path.begin(), win_path.end(), '/', '\\');
274     path = FilePath(win_path);
275     std::wstring wrapped_expected =
276         std::wstring(L"\x202a") + win_path + L"\x202c";
277 #else
278     path = FilePath(base::SysWideToNativeMB(cases[i]));
279     std::wstring wrapped_expected =
280         std::wstring(L"\x202a") + cases[i] + L"\x202c";
281 #endif
282     string16 localized_file_path_string;
283     WrapPathWithLTRFormatting(path, &localized_file_path_string);
284 
285     std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
286     EXPECT_EQ(wrapped_expected, wrapped_actual);
287   }
288 }
289 
TEST_F(RTLTest,WrapString)290 TEST_F(RTLTest, WrapString) {
291   const wchar_t* cases[] = {
292     L" . ",
293     L"abc",
294     L"a" L"\x5d0\x5d1",
295     L"a" L"\x5d1" L"b",
296     L"\x5d0\x5d1\x5d2",
297     L"\x5d0\x5d1" L"a",
298     L"\x5d0" L"a" L"\x5d1",
299   };
300 
301   const bool was_rtl = IsRTL();
302 
303   test::ScopedRestoreICUDefaultLocale restore_locale;
304   for (size_t i = 0; i < 2; ++i) {
305     // Toggle the application default text direction (to try each direction).
306     SetRTLForTesting(!IsRTL());
307 
308     string16 empty;
309     WrapStringWithLTRFormatting(&empty);
310     EXPECT_TRUE(empty.empty());
311     WrapStringWithRTLFormatting(&empty);
312     EXPECT_TRUE(empty.empty());
313 
314     for (size_t i = 0; i < arraysize(cases); ++i) {
315       string16 input = WideToUTF16(cases[i]);
316       string16 ltr_wrap = input;
317       WrapStringWithLTRFormatting(&ltr_wrap);
318       EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
319       EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
320       EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
321 
322       string16 rtl_wrap = input;
323       WrapStringWithRTLFormatting(&rtl_wrap);
324       EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
325       EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
326       EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
327     }
328   }
329 
330   EXPECT_EQ(was_rtl, IsRTL());
331 }
332 
TEST_F(RTLTest,GetDisplayStringInLTRDirectionality)333 TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
334   struct {
335     const wchar_t* path;
336     bool wrap_ltr;
337     bool wrap_rtl;
338   } cases[] = {
339     { L"test",                   false, true },
340     { L"test.html",              false, true },
341     { L"\x05d0\x05d1\x05d2",     true,  true },
342     { L"\x05d0\x05d1\x05d2.txt", true,  true },
343     { L"\x05d0" L"abc",          true,  true },
344     { L"\x05d0" L"abc.txt",      true,  true },
345     { L"abc\x05d0\x05d1",        false, true },
346     { L"abc\x05d0\x05d1.jpg",    false, true },
347   };
348 
349   const bool was_rtl = IsRTL();
350 
351   test::ScopedRestoreICUDefaultLocale restore_locale;
352   for (size_t i = 0; i < 2; ++i) {
353     // Toggle the application default text direction (to try each direction).
354     SetRTLForTesting(!IsRTL());
355     for (size_t i = 0; i < arraysize(cases); ++i) {
356       string16 input = WideToUTF16(cases[i].path);
357       string16 output = GetDisplayStringInLTRDirectionality(input);
358       // Test the expected wrapping behavior for the current UI directionality.
359       if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr)
360         EXPECT_NE(output, input);
361       else
362         EXPECT_EQ(output, input);
363     }
364   }
365 
366   EXPECT_EQ(was_rtl, IsRTL());
367 }
368 
TEST_F(RTLTest,GetTextDirection)369 TEST_F(RTLTest, GetTextDirection) {
370   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
371   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
372   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
373   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
374   // iw is an obsolete code for Hebrew.
375   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
376   // Although we're not yet localized to Farsi and Urdu, we
377   // do have the text layout direction information for them.
378   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
379   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
380 #if 0
381   // Enable these when we include the minimal locale data for Azerbaijani
382   // written in Arabic and Dhivehi. At the moment, our copy of
383   // ICU data does not have entries for them.
384   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
385   // Dhivehi that uses Thaana script.
386   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
387 #endif
388   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
389   // Chinese in China with '-'.
390   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
391   // Filipino : 3-letter code
392   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
393   // Russian
394   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
395   // Japanese that uses multiple scripts
396   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
397 }
398 
TEST_F(RTLTest,GetTextDirectionForLocaleInStartUp)399 TEST_F(RTLTest, GetTextDirectionForLocaleInStartUp) {
400   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ar"));
401   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ar_EG"));
402   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("he"));
403   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("he_IL"));
404   // iw is an obsolete code for Hebrew.
405   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("iw"));
406   // Although we're not yet localized to Farsi and Urdu, we
407   // do have the text layout direction information for them.
408   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("fa"));
409   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ur"));
410   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("en"));
411   // Chinese in China with '-'.
412   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("zh-CN"));
413   // Filipino : 3-letter code
414   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("fil"));
415   // Russian
416   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("ru"));
417   // Japanese that uses multiple scripts
418   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("ja"));
419 }
420 
TEST_F(RTLTest,UnadjustStringForLocaleDirection)421 TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
422   // These test strings are borrowed from WrapPathWithLTRFormatting
423   const wchar_t* cases[] = {
424     L"foo bar",
425     L"foo \x05d0 bar",
426     L"\x05d0 foo bar",
427     L"!foo \x05d0 bar",
428     L",\x05d0 foo bar",
429     L"\x202a \x05d0 foo  bar",
430     L"\x202d \x05d0 foo  bar",
431     L"\x202b foo \x05d0 bar",
432     L"\x202e foo \x05d0 bar",
433     L"\x0622 foo \x05d0 bar",
434   };
435 
436   const bool was_rtl = IsRTL();
437 
438   test::ScopedRestoreICUDefaultLocale restore_locale;
439   for (size_t i = 0; i < 2; ++i) {
440     // Toggle the application default text direction (to try each direction).
441     SetRTLForTesting(!IsRTL());
442 
443     for (size_t i = 0; i < arraysize(cases); ++i) {
444       string16 test_case = WideToUTF16(cases[i]);
445       string16 adjusted_string = test_case;
446 
447       if (!AdjustStringForLocaleDirection(&adjusted_string))
448         continue;
449 
450       EXPECT_NE(test_case, adjusted_string);
451       EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
452       EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case
453                                             << "] with IsRTL() == " << IsRTL();
454     }
455   }
456 
457   EXPECT_EQ(was_rtl, IsRTL());
458 }
459 
TEST_F(RTLTest,EnsureTerminatedDirectionalFormatting)460 TEST_F(RTLTest, EnsureTerminatedDirectionalFormatting) {
461   struct {
462     const wchar_t* unformated_text;
463     const wchar_t* formatted_text;
464   } cases[] = {
465       // Tests string without any dir-formatting characters.
466       {L"google.com", L"google.com"},
467       // Tests string with properly terminated dir-formatting character.
468       {L"\x202egoogle.com\x202c", L"\x202egoogle.com\x202c"},
469       // Tests string with over-terminated dir-formatting characters.
470       {L"\x202egoogle\x202c.com\x202c", L"\x202egoogle\x202c.com\x202c"},
471       // Tests string beginning with a dir-formatting character.
472       {L"\x202emoc.elgoog", L"\x202emoc.elgoog\x202c"},
473       // Tests string that over-terminates then re-opens.
474       {L"\x202egoogle\x202c\x202c.\x202eom",
475        L"\x202egoogle\x202c\x202c.\x202eom\x202c"},
476       // Tests string containing a dir-formatting character in the middle.
477       {L"google\x202e.com", L"google\x202e.com\x202c"},
478       // Tests string with multiple dir-formatting characters.
479       {L"\x202egoogle\x202e.com/\x202eguest",
480        L"\x202egoogle\x202e.com/\x202eguest\x202c\x202c\x202c"},
481       // Test the other dir-formatting characters (U+202A, U+202B, and U+202D).
482       {L"\x202agoogle.com", L"\x202agoogle.com\x202c"},
483       {L"\x202bgoogle.com", L"\x202bgoogle.com\x202c"},
484       {L"\x202dgoogle.com", L"\x202dgoogle.com\x202c"},
485   };
486 
487   const bool was_rtl = IsRTL();
488 
489   test::ScopedRestoreICUDefaultLocale restore_locale;
490   for (size_t i = 0; i < 2; ++i) {
491     // Toggle the application default text direction (to try each direction).
492     SetRTLForTesting(!IsRTL());
493     for (size_t i = 0; i < arraysize(cases); ++i) {
494       string16 unsanitized_text = WideToUTF16(cases[i].unformated_text);
495       string16 sanitized_text = WideToUTF16(cases[i].formatted_text);
496       EnsureTerminatedDirectionalFormatting(&unsanitized_text);
497       EXPECT_EQ(sanitized_text, unsanitized_text);
498     }
499   }
500   EXPECT_EQ(was_rtl, IsRTL());
501 }
502 
TEST_F(RTLTest,SanitizeUserSuppliedString)503 TEST_F(RTLTest, SanitizeUserSuppliedString) {
504   struct {
505     const wchar_t* unformatted_text;
506     const wchar_t* formatted_text;
507   } cases[] = {
508       // Tests RTL string with properly terminated dir-formatting character.
509       {L"\x202eكبير Google التطبيق\x202c", L"\x202eكبير Google التطبيق\x202c"},
510       // Tests RTL string with over-terminated dir-formatting characters.
511       {L"\x202eكبير Google\x202cالتطبيق\x202c",
512        L"\x202eكبير Google\x202cالتطبيق\x202c"},
513       // Tests RTL string that over-terminates then re-opens.
514       {L"\x202eكبير Google\x202c\x202cالتطبيق\x202e",
515        L"\x202eكبير Google\x202c\x202cالتطبيق\x202e\x202c"},
516       // Tests RTL string with multiple dir-formatting characters.
517       {L"\x202eك\x202eبير Google الت\x202eطبيق",
518        L"\x202eك\x202eبير Google الت\x202eطبيق\x202c\x202c\x202c"},
519       // Test the other dir-formatting characters (U+202A, U+202B, and U+202D).
520       {L"\x202aكبير Google التطبيق", L"\x202aكبير Google التطبيق\x202c"},
521       {L"\x202bكبير Google التطبيق", L"\x202bكبير Google التطبيق\x202c"},
522       {L"\x202dكبير Google التطبيق", L"\x202dكبير Google التطبيق\x202c"},
523 
524   };
525 
526   for (size_t i = 0; i < arraysize(cases); ++i) {
527     // On Windows for an LTR locale, no changes to the string are made.
528     string16 prefix, suffix = WideToUTF16(L"");
529 #if !defined(OS_WIN)
530     prefix = WideToUTF16(L"\x200e\x202b");
531     suffix = WideToUTF16(L"\x202c\x200e");
532 #endif  // !OS_WIN
533     string16 unsanitized_text = WideToUTF16(cases[i].unformatted_text);
534     string16 sanitized_text =
535         prefix + WideToUTF16(cases[i].formatted_text) + suffix;
536     SanitizeUserSuppliedString(&unsanitized_text);
537     EXPECT_EQ(sanitized_text, unsanitized_text);
538   }
539 }
540 
541 class SetICULocaleTest : public PlatformTest {};
542 
TEST_F(SetICULocaleTest,OverlongLocaleId)543 TEST_F(SetICULocaleTest, OverlongLocaleId) {
544   test::ScopedRestoreICUDefaultLocale restore_locale;
545   std::string id("fr-ca-x-foo");
546   while (id.length() < 152)
547     id.append("-x-foo");
548   SetICUDefaultLocale(id);
549   EXPECT_STRNE("en_US", icu::Locale::getDefault().getName());
550   id.append("zzz");
551   SetICUDefaultLocale(id);
552   EXPECT_STREQ("en_US", icu::Locale::getDefault().getName());
553 }
554 
555 }  // namespace i18n
556 }  // namespace base
557