• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/case_conversion.h"
6 #include "base/i18n/rtl.h"
7 #include "base/strings/utf_string_conversions.h"
8 #include "base/test/icu_test_util.h"
9 #include "testing/gtest/include/gtest/gtest.h"
10 #include "third_party/icu/source/i18n/unicode/usearch.h"
11 
12 namespace base {
13 namespace i18n {
14 
15 namespace {
16 
17 const wchar_t kNonASCIIMixed[] =
18     L"\xC4\xD6\xE4\xF6\x20\xCF\xEF\x20\xF7\x25"
19     L"\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07\x1F0F"
20     L"\x20\x1E00\x1E01";
21 const wchar_t kNonASCIILower[] =
22     L"\xE4\xF6\xE4\xF6\x20\xEF\xEF"
23     L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07"
24     L"\x1F07\x20\x1E01\x1E01";
25 const wchar_t kNonASCIIUpper[] =
26     L"\xC4\xD6\xC4\xD6\x20\xCF\xCF"
27     L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F0F"
28     L"\x1F0F\x20\x1E00\x1E00";
29 
30 }  // namespace
31 
32 // Test upper and lower case string conversion.
TEST(CaseConversionTest,UpperLower)33 TEST(CaseConversionTest, UpperLower) {
34   const string16 mixed(ASCIIToUTF16("Text with UPPer & lowER casE."));
35   const string16 expected_lower(ASCIIToUTF16("text with upper & lower case."));
36   const string16 expected_upper(ASCIIToUTF16("TEXT WITH UPPER & LOWER CASE."));
37 
38   string16 result = ToLower(mixed);
39   EXPECT_EQ(expected_lower, result);
40 
41   result = ToUpper(mixed);
42   EXPECT_EQ(expected_upper, result);
43 }
44 
TEST(CaseConversionTest,NonASCII)45 TEST(CaseConversionTest, NonASCII) {
46   const string16 mixed(WideToUTF16(kNonASCIIMixed));
47   const string16 expected_lower(WideToUTF16(kNonASCIILower));
48   const string16 expected_upper(WideToUTF16(kNonASCIIUpper));
49 
50   string16 result = ToLower(mixed);
51   EXPECT_EQ(expected_lower, result);
52 
53   result = ToUpper(mixed);
54   EXPECT_EQ(expected_upper, result);
55 }
56 
TEST(CaseConversionTest,TurkishLocaleConversion)57 TEST(CaseConversionTest, TurkishLocaleConversion) {
58   const string16 mixed(WideToUTF16(L"\x49\x131"));
59   const string16 expected_lower(WideToUTF16(L"\x69\x131"));
60   const string16 expected_upper(WideToUTF16(L"\x49\x49"));
61 
62   test::ScopedRestoreICUDefaultLocale restore_locale;
63   i18n::SetICUDefaultLocale("en_US");
64 
65   string16 result = ToLower(mixed);
66   EXPECT_EQ(expected_lower, result);
67 
68   result = ToUpper(mixed);
69   EXPECT_EQ(expected_upper, result);
70 
71   i18n::SetICUDefaultLocale("tr");
72 
73   const string16 expected_lower_turkish(WideToUTF16(L"\x131\x131"));
74   const string16 expected_upper_turkish(WideToUTF16(L"\x49\x49"));
75 
76   result = ToLower(mixed);
77   EXPECT_EQ(expected_lower_turkish, result);
78 
79   result = ToUpper(mixed);
80   EXPECT_EQ(expected_upper_turkish, result);
81 }
82 
TEST(CaseConversionTest,FoldCase)83 TEST(CaseConversionTest, FoldCase) {
84   // Simple ASCII, should lower-case.
85   EXPECT_EQ(ASCIIToUTF16("hello, world"),
86             FoldCase(ASCIIToUTF16("Hello, World")));
87 
88   // Non-ASCII cases from above. They should all fold to the same result.
89   EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed)),
90             FoldCase(WideToUTF16(kNonASCIILower)));
91   EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed)),
92             FoldCase(WideToUTF16(kNonASCIIUpper)));
93 
94   // Turkish cases from above. This is the lower-case expected result from the
95   // US locale. It should be the same even when the current locale is Turkish.
96   const string16 turkish(WideToUTF16(L"\x49\x131"));
97   const string16 turkish_expected(WideToUTF16(L"\x69\x131"));
98 
99   test::ScopedRestoreICUDefaultLocale restore_locale;
100   i18n::SetICUDefaultLocale("en_US");
101   EXPECT_EQ(turkish_expected, FoldCase(turkish));
102 
103   i18n::SetICUDefaultLocale("tr");
104   EXPECT_EQ(turkish_expected, FoldCase(turkish));
105 
106   // Test a case that gets bigger when processed.
107   // U+130 = LATIN CAPITAL LETTER I WITH DOT ABOVE gets folded to a lower case
108   // "i" followed by U+307 COMBINING DOT ABOVE.
109   EXPECT_EQ(WideToUTF16(L"i\u0307j"), FoldCase(WideToUTF16(L"\u0130j")));
110 
111   // U+00DF (SHARP S) and U+1E9E (CAPIRAL SHARP S) are both folded to "ss".
112   EXPECT_EQ(ASCIIToUTF16("ssss"), FoldCase(WideToUTF16(L"\u00DF\u1E9E")));
113 }
114 
115 }  // namespace i18n
116 }  // namespace base
117 
118 
119 
120