• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <string>
6 
7 #include "base/basictypes.h"
8 #include "base/strings/string_piece.h"
9 #include "base/strings/sys_string_conversions.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "base/test/scoped_locale.h"
12 #include "testing/gtest/include/gtest/gtest.h"
13 
14 #ifdef WCHAR_T_IS_UTF32
15 static const std::wstring kSysWideOldItalicLetterA = L"\x10300";
16 #else
17 static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00";
18 #endif
19 
20 namespace base {
21 
TEST(SysStrings,SysWideToUTF8)22 TEST(SysStrings, SysWideToUTF8) {
23   EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world"));
24   EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d"));
25 
26   // >16 bits
27   EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA));
28 
29   // Error case. When Windows finds a UTF-16 character going off the end of
30   // a string, it just converts that literal value to UTF-8, even though this
31   // is invalid.
32   //
33   // This is what XP does, but Vista has different behavior, so we don't bother
34   // verifying it:
35   // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw",
36   //           SysWideToUTF8(L"\x4f60\xd800zyxw"));
37 
38   // Test embedded NULLs.
39   std::wstring wide_null(L"a");
40   wide_null.push_back(0);
41   wide_null.push_back('b');
42 
43   std::string expected_null("a");
44   expected_null.push_back(0);
45   expected_null.push_back('b');
46 
47   EXPECT_EQ(expected_null, SysWideToUTF8(wide_null));
48 }
49 
TEST(SysStrings,SysUTF8ToWide)50 TEST(SysStrings, SysUTF8ToWide) {
51   EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world"));
52   EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));
53   // >16 bits
54   EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80"));
55 
56   // Error case. When Windows finds an invalid UTF-8 character, it just skips
57   // it. This seems weird because it's inconsistent with the reverse conversion.
58   //
59   // This is what XP does, but Vista has different behavior, so we don't bother
60   // verifying it:
61   // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw"));
62 
63   // Test embedded NULLs.
64   std::string utf8_null("a");
65   utf8_null.push_back(0);
66   utf8_null.push_back('b');
67 
68   std::wstring expected_null(L"a");
69   expected_null.push_back(0);
70   expected_null.push_back('b');
71 
72   EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null));
73 }
74 
75 #if defined(OS_LINUX)  // Tests depend on setting a specific Linux locale.
76 
TEST(SysStrings,SysWideToNativeMB)77 TEST(SysStrings, SysWideToNativeMB) {
78   ScopedLocale locale("en_US.utf-8");
79   EXPECT_EQ("Hello, world", SysWideToNativeMB(L"Hello, world"));
80   EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToNativeMB(L"\x4f60\x597d"));
81 
82   // >16 bits
83   EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToNativeMB(kSysWideOldItalicLetterA));
84 
85   // Error case. When Windows finds a UTF-16 character going off the end of
86   // a string, it just converts that literal value to UTF-8, even though this
87   // is invalid.
88   //
89   // This is what XP does, but Vista has different behavior, so we don't bother
90   // verifying it:
91   // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw",
92   //           SysWideToNativeMB(L"\x4f60\xd800zyxw"));
93 
94   // Test embedded NULLs.
95   std::wstring wide_null(L"a");
96   wide_null.push_back(0);
97   wide_null.push_back('b');
98 
99   std::string expected_null("a");
100   expected_null.push_back(0);
101   expected_null.push_back('b');
102 
103   EXPECT_EQ(expected_null, SysWideToNativeMB(wide_null));
104 }
105 
106 // We assume the test is running in a UTF8 locale.
TEST(SysStrings,SysNativeMBToWide)107 TEST(SysStrings, SysNativeMBToWide) {
108   ScopedLocale locale("en_US.utf-8");
109   EXPECT_EQ(L"Hello, world", SysNativeMBToWide("Hello, world"));
110   EXPECT_EQ(L"\x4f60\x597d", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));
111   // >16 bits
112   EXPECT_EQ(kSysWideOldItalicLetterA, SysNativeMBToWide("\xF0\x90\x8C\x80"));
113 
114   // Error case. When Windows finds an invalid UTF-8 character, it just skips
115   // it. This seems weird because it's inconsistent with the reverse conversion.
116   //
117   // This is what XP does, but Vista has different behavior, so we don't bother
118   // verifying it:
119   // EXPECT_EQ(L"\x4f60zyxw", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5zyxw"));
120 
121   // Test embedded NULLs.
122   std::string utf8_null("a");
123   utf8_null.push_back(0);
124   utf8_null.push_back('b');
125 
126   std::wstring expected_null(L"a");
127   expected_null.push_back(0);
128   expected_null.push_back('b');
129 
130   EXPECT_EQ(expected_null, SysNativeMBToWide(utf8_null));
131 }
132 
133 static const wchar_t* const kConvertRoundtripCases[] = {
134   L"Google Video",
135   // "网页 图片 资讯更多 »"
136   L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
137   //  "Παγκόσμιος Ιστός"
138   L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
139   L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",
140   // "Поиск страниц на русском"
141   L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"
142   L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"
143   L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",
144   // "전체서비스"
145   L"\xc804\xccb4\xc11c\xbe44\xc2a4",
146 
147   // Test characters that take more than 16 bits. This will depend on whether
148   // wchar_t is 16 or 32 bits.
149 #if defined(WCHAR_T_IS_UTF16)
150   L"\xd800\xdf00",
151   // ?????  (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
152   L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",
153 #elif defined(WCHAR_T_IS_UTF32)
154   L"\x10300",
155   // ?????  (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
156   L"\x11d40\x11d41\x11d42\x11d43\x11d44",
157 #endif
158 };
159 
160 
TEST(SysStrings,SysNativeMBAndWide)161 TEST(SysStrings, SysNativeMBAndWide) {
162   ScopedLocale locale("en_US.utf-8");
163   for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
164     std::wstring wide = kConvertRoundtripCases[i];
165     std::wstring trip = SysNativeMBToWide(SysWideToNativeMB(wide));
166     EXPECT_EQ(wide.size(), trip.size());
167     EXPECT_EQ(wide, trip);
168   }
169 
170   // We assume our test is running in UTF-8, so double check through ICU.
171   for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
172     std::wstring wide = kConvertRoundtripCases[i];
173     std::wstring trip = SysNativeMBToWide(WideToUTF8(wide));
174     EXPECT_EQ(wide.size(), trip.size());
175     EXPECT_EQ(wide, trip);
176   }
177 
178   for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
179     std::wstring wide = kConvertRoundtripCases[i];
180     std::wstring trip = UTF8ToWide(SysWideToNativeMB(wide));
181     EXPECT_EQ(wide.size(), trip.size());
182     EXPECT_EQ(wide, trip);
183   }
184 }
185 #endif  // OS_LINUX
186 
187 }  // namespace base
188