1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/sys_string_conversions.h"
6
7 #include <wchar.h>
8
9 #include "base/strings/string_piece.h"
10 #include "base/strings/utf_string_conversions.h"
11
12 namespace base {
13
SysWideToUTF8(const std::wstring & wide)14 std::string SysWideToUTF8(const std::wstring& wide) {
15 // In theory this should be using the system-provided conversion rather
16 // than our ICU, but this will do for now.
17 return WideToUTF8(wide);
18 }
SysUTF8ToWide(const StringPiece & utf8)19 std::wstring SysUTF8ToWide(const StringPiece& utf8) {
20 // In theory this should be using the system-provided conversion rather
21 // than our ICU, but this will do for now.
22 std::wstring out;
23 UTF8ToWide(utf8.data(), utf8.size(), &out);
24 return out;
25 }
26
27 #if defined(OS_CHROMEOS) || defined(OS_ANDROID)
28 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
29 // support and a better understanding of what calls these routines.
30
31 // ChromeOS always runs in UTF-8 locale.
SysWideToNativeMB(const std::wstring & wide)32 std::string SysWideToNativeMB(const std::wstring& wide) {
33 return WideToUTF8(wide);
34 }
35
SysNativeMBToWide(const StringPiece & native_mb)36 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
37 return SysUTF8ToWide(native_mb);
38 }
39
40 #else
41
SysWideToNativeMB(const std::wstring & wide)42 std::string SysWideToNativeMB(const std::wstring& wide) {
43 mbstate_t ps;
44
45 // Calculate the number of multi-byte characters. We walk through the string
46 // without writing the output, counting the number of multi-byte characters.
47 size_t num_out_chars = 0;
48 memset(&ps, 0, sizeof(ps));
49 for (size_t i = 0; i < wide.size(); ++i) {
50 const wchar_t src = wide[i];
51 // Use a temp buffer since calling wcrtomb with an output of NULL does not
52 // calculate the output length.
53 char buf[16];
54 // Skip NULLs to avoid wcrtomb's special handling of them.
55 size_t res = src ? wcrtomb(buf, src, &ps) : 0;
56 switch (res) {
57 // Handle any errors and return an empty string.
58 case static_cast<size_t>(-1):
59 return std::string();
60 break;
61 case 0:
62 // We hit an embedded null byte, keep going.
63 ++num_out_chars;
64 break;
65 default:
66 num_out_chars += res;
67 break;
68 }
69 }
70
71 if (num_out_chars == 0)
72 return std::string();
73
74 std::string out;
75 out.resize(num_out_chars);
76
77 // We walk the input string again, with |i| tracking the index of the
78 // wide input, and |j| tracking the multi-byte output.
79 memset(&ps, 0, sizeof(ps));
80 for (size_t i = 0, j = 0; i < wide.size(); ++i) {
81 const wchar_t src = wide[i];
82 // We don't want wcrtomb to do its funkiness for embedded NULLs.
83 size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
84 switch (res) {
85 // Handle any errors and return an empty string.
86 case static_cast<size_t>(-1):
87 return std::string();
88 break;
89 case 0:
90 // We hit an embedded null byte, keep going.
91 ++j; // Output is already zeroed.
92 break;
93 default:
94 j += res;
95 break;
96 }
97 }
98
99 return out;
100 }
101
SysNativeMBToWide(const StringPiece & native_mb)102 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
103 mbstate_t ps;
104
105 // Calculate the number of wide characters. We walk through the string
106 // without writing the output, counting the number of wide characters.
107 size_t num_out_chars = 0;
108 memset(&ps, 0, sizeof(ps));
109 for (size_t i = 0; i < native_mb.size(); ) {
110 const char* src = native_mb.data() + i;
111 size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
112 switch (res) {
113 // Handle any errors and return an empty string.
114 case static_cast<size_t>(-2):
115 case static_cast<size_t>(-1):
116 return std::wstring();
117 break;
118 case 0:
119 // We hit an embedded null byte, keep going.
120 i += 1; // Fall through.
121 default:
122 i += res;
123 ++num_out_chars;
124 break;
125 }
126 }
127
128 if (num_out_chars == 0)
129 return std::wstring();
130
131 std::wstring out;
132 out.resize(num_out_chars);
133
134 memset(&ps, 0, sizeof(ps)); // Clear the shift state.
135 // We walk the input string again, with |i| tracking the index of the
136 // multi-byte input, and |j| tracking the wide output.
137 for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
138 const char* src = native_mb.data() + i;
139 wchar_t* dst = &out[j];
140 size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
141 switch (res) {
142 // Handle any errors and return an empty string.
143 case static_cast<size_t>(-2):
144 case static_cast<size_t>(-1):
145 return std::wstring();
146 break;
147 case 0:
148 i += 1; // Skip null byte.
149 break;
150 default:
151 i += res;
152 break;
153 }
154 }
155
156 return out;
157 }
158
159 #endif // OS_CHROMEOS
160
161 } // namespace base
162