• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/sys_string_conversions.h"
6 
7 #include <wchar.h>
8 
9 #include "base/strings/string_piece.h"
10 #include "base/strings/utf_string_conversions.h"
11 
12 namespace base {
13 
SysWideToUTF8(const std::wstring & wide)14 std::string SysWideToUTF8(const std::wstring& wide) {
15   // In theory this should be using the system-provided conversion rather
16   // than our ICU, but this will do for now.
17   return WideToUTF8(wide);
18 }
SysUTF8ToWide(const StringPiece & utf8)19 std::wstring SysUTF8ToWide(const StringPiece& utf8) {
20   // In theory this should be using the system-provided conversion rather
21   // than our ICU, but this will do for now.
22   std::wstring out;
23   UTF8ToWide(utf8.data(), utf8.size(), &out);
24   return out;
25 }
26 
27 #if defined(OS_CHROMEOS) || defined(OS_ANDROID)
28 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
29 // support and a better understanding of what calls these routines.
30 
31 // ChromeOS always runs in UTF-8 locale.
SysWideToNativeMB(const std::wstring & wide)32 std::string SysWideToNativeMB(const std::wstring& wide) {
33   return WideToUTF8(wide);
34 }
35 
SysNativeMBToWide(const StringPiece & native_mb)36 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
37   return SysUTF8ToWide(native_mb);
38 }
39 
40 #else
41 
SysWideToNativeMB(const std::wstring & wide)42 std::string SysWideToNativeMB(const std::wstring& wide) {
43   mbstate_t ps;
44 
45   // Calculate the number of multi-byte characters.  We walk through the string
46   // without writing the output, counting the number of multi-byte characters.
47   size_t num_out_chars = 0;
48   memset(&ps, 0, sizeof(ps));
49   for (size_t i = 0; i < wide.size(); ++i) {
50     const wchar_t src = wide[i];
51     // Use a temp buffer since calling wcrtomb with an output of NULL does not
52     // calculate the output length.
53     char buf[16];
54     // Skip NULLs to avoid wcrtomb's special handling of them.
55     size_t res = src ? wcrtomb(buf, src, &ps) : 0;
56     switch (res) {
57       // Handle any errors and return an empty string.
58       case static_cast<size_t>(-1):
59         return std::string();
60         break;
61       case 0:
62         // We hit an embedded null byte, keep going.
63         ++num_out_chars;
64         break;
65       default:
66         num_out_chars += res;
67         break;
68     }
69   }
70 
71   if (num_out_chars == 0)
72     return std::string();
73 
74   std::string out;
75   out.resize(num_out_chars);
76 
77   // We walk the input string again, with |i| tracking the index of the
78   // wide input, and |j| tracking the multi-byte output.
79   memset(&ps, 0, sizeof(ps));
80   for (size_t i = 0, j = 0; i < wide.size(); ++i) {
81     const wchar_t src = wide[i];
82     // We don't want wcrtomb to do its funkiness for embedded NULLs.
83     size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
84     switch (res) {
85       // Handle any errors and return an empty string.
86       case static_cast<size_t>(-1):
87         return std::string();
88         break;
89       case 0:
90         // We hit an embedded null byte, keep going.
91         ++j;  // Output is already zeroed.
92         break;
93       default:
94         j += res;
95         break;
96     }
97   }
98 
99   return out;
100 }
101 
SysNativeMBToWide(const StringPiece & native_mb)102 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
103   mbstate_t ps;
104 
105   // Calculate the number of wide characters.  We walk through the string
106   // without writing the output, counting the number of wide characters.
107   size_t num_out_chars = 0;
108   memset(&ps, 0, sizeof(ps));
109   for (size_t i = 0; i < native_mb.size(); ) {
110     const char* src = native_mb.data() + i;
111     size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
112     switch (res) {
113       // Handle any errors and return an empty string.
114       case static_cast<size_t>(-2):
115       case static_cast<size_t>(-1):
116         return std::wstring();
117         break;
118       case 0:
119         // We hit an embedded null byte, keep going.
120         i += 1;  // Fall through.
121       default:
122         i += res;
123         ++num_out_chars;
124         break;
125     }
126   }
127 
128   if (num_out_chars == 0)
129     return std::wstring();
130 
131   std::wstring out;
132   out.resize(num_out_chars);
133 
134   memset(&ps, 0, sizeof(ps));  // Clear the shift state.
135   // We walk the input string again, with |i| tracking the index of the
136   // multi-byte input, and |j| tracking the wide output.
137   for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
138     const char* src = native_mb.data() + i;
139     wchar_t* dst = &out[j];
140     size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
141     switch (res) {
142       // Handle any errors and return an empty string.
143       case static_cast<size_t>(-2):
144       case static_cast<size_t>(-1):
145         return std::wstring();
146         break;
147       case 0:
148         i += 1;  // Skip null byte.
149         break;
150       default:
151         i += res;
152         break;
153     }
154   }
155 
156   return out;
157 }
158 
159 #endif  // OS_CHROMEOS
160 
161 }  // namespace base
162