1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/sys_string_conversions.h"
6
7 #include <wchar.h>
8
9 #include "base/string_piece.h"
10 #include "base/utf_string_conversions.h"
11
12 namespace base {
13
SysWideToUTF8(const std::wstring & wide)14 std::string SysWideToUTF8(const std::wstring& wide) {
15 // In theory this should be using the system-provided conversion rather
16 // than our ICU, but this will do for now.
17 return WideToUTF8(wide);
18 }
SysUTF8ToWide(const StringPiece & utf8)19 std::wstring SysUTF8ToWide(const StringPiece& utf8) {
20 // In theory this should be using the system-provided conversion rather
21 // than our ICU, but this will do for now.
22 std::wstring out;
23 UTF8ToWide(utf8.data(), utf8.size(), &out);
24 return out;
25 }
26
27 #if defined(OS_CHROMEOS)
28
29 // ChromeOS always runs in UTF-8 locale.
SysWideToNativeMB(const std::wstring & wide)30 std::string SysWideToNativeMB(const std::wstring& wide) {
31 return WideToUTF8(wide);
32 }
33
SysNativeMBToWide(const StringPiece & native_mb)34 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
35 return SysUTF8ToWide(native_mb);
36 }
37
38 #else
39
SysWideToNativeMB(const std::wstring & wide)40 std::string SysWideToNativeMB(const std::wstring& wide) {
41 mbstate_t ps;
42
43 // Calculate the number of multi-byte characters. We walk through the string
44 // without writing the output, counting the number of multi-byte characters.
45 size_t num_out_chars = 0;
46 memset(&ps, 0, sizeof(ps));
47 for (size_t i = 0; i < wide.size(); ++i) {
48 const wchar_t src = wide[i];
49 // Use a temp buffer since calling wcrtomb with an output of NULL does not
50 // calculate the output length.
51 char buf[16];
52 // Skip NULLs to avoid wcrtomb's special handling of them.
53 size_t res = src ? wcrtomb(buf, src, &ps) : 0;
54 switch (res) {
55 // Handle any errors and return an empty string.
56 case static_cast<size_t>(-1):
57 return std::string();
58 break;
59 case 0:
60 // We hit an embedded null byte, keep going.
61 ++num_out_chars;
62 break;
63 default:
64 num_out_chars += res;
65 break;
66 }
67 }
68
69 if (num_out_chars == 0)
70 return std::string();
71
72 std::string out;
73 out.resize(num_out_chars);
74
75 // We walk the input string again, with |i| tracking the index of the
76 // wide input, and |j| tracking the multi-byte output.
77 memset(&ps, 0, sizeof(ps));
78 for (size_t i = 0, j = 0; i < wide.size(); ++i) {
79 const wchar_t src = wide[i];
80 // We don't want wcrtomb to do it's funkiness for embedded NULLs.
81 size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
82 switch (res) {
83 // Handle any errors and return an empty string.
84 case static_cast<size_t>(-1):
85 return std::string();
86 break;
87 case 0:
88 // We hit an embedded null byte, keep going.
89 ++j; // Output is already zeroed.
90 break;
91 default:
92 j += res;
93 break;
94 }
95 }
96
97 return out;
98 }
99
SysNativeMBToWide(const StringPiece & native_mb)100 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
101 mbstate_t ps;
102
103 // Calculate the number of wide characters. We walk through the string
104 // without writing the output, counting the number of wide characters.
105 size_t num_out_chars = 0;
106 memset(&ps, 0, sizeof(ps));
107 for (size_t i = 0; i < native_mb.size(); ) {
108 const char* src = native_mb.data() + i;
109 size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
110 switch (res) {
111 // Handle any errors and return an empty string.
112 case static_cast<size_t>(-2):
113 case static_cast<size_t>(-1):
114 return std::wstring();
115 break;
116 case 0:
117 // We hit an embedded null byte, keep going.
118 i += 1; // Fall through.
119 default:
120 i += res;
121 ++num_out_chars;
122 break;
123 }
124 }
125
126 if (num_out_chars == 0)
127 return std::wstring();
128
129 std::wstring out;
130 out.resize(num_out_chars);
131
132 memset(&ps, 0, sizeof(ps)); // Clear the shift state.
133 // We walk the input string again, with |i| tracking the index of the
134 // multi-byte input, and |j| tracking the wide output.
135 for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
136 const char* src = native_mb.data() + i;
137 wchar_t* dst = &out[j];
138 size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
139 switch (res) {
140 // Handle any errors and return an empty string.
141 case static_cast<size_t>(-2):
142 case static_cast<size_t>(-1):
143 return std::wstring();
144 break;
145 case 0:
146 i += 1; // Skip null byte.
147 break;
148 default:
149 i += res;
150 break;
151 }
152 }
153
154 return out;
155 }
156
157 #endif // OS_CHROMEOS
158
159 } // namespace base
160