1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9
10 #include "base/strings/sys_string_conversions.h"
11
12 #include <stddef.h>
13 #include <string.h>
14 #include <wchar.h>
15
16 #include <string_view>
17
18 #include "base/strings/utf_string_conversions.h"
19 #include "build/build_config.h"
20
21 namespace base {
22
SysWideToUTF8(const std::wstring & wide)23 std::string SysWideToUTF8(const std::wstring& wide) {
24 // In theory this should be using the system-provided conversion rather
25 // than our ICU, but this will do for now.
26 return WideToUTF8(wide);
27 }
SysUTF8ToWide(std::string_view utf8)28 std::wstring SysUTF8ToWide(std::string_view utf8) {
29 // In theory this should be using the system-provided conversion rather
30 // than our ICU, but this will do for now.
31 std::wstring out;
32 UTF8ToWide(utf8.data(), utf8.size(), &out);
33 return out;
34 }
35
36 #if defined(SYSTEM_NATIVE_UTF8) || BUILDFLAG(IS_ANDROID)
37 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
38 // support and a better understanding of what calls these routines.
39
SysWideToNativeMB(const std::wstring & wide)40 std::string SysWideToNativeMB(const std::wstring& wide) {
41 return WideToUTF8(wide);
42 }
43
SysNativeMBToWide(std::string_view native_mb)44 std::wstring SysNativeMBToWide(std::string_view native_mb) {
45 return SysUTF8ToWide(native_mb);
46 }
47
48 #else
49
SysWideToNativeMB(const std::wstring & wide)50 std::string SysWideToNativeMB(const std::wstring& wide) {
51 mbstate_t ps;
52
53 // Calculate the number of multi-byte characters. We walk through the string
54 // without writing the output, counting the number of multi-byte characters.
55 size_t num_out_chars = 0;
56 memset(&ps, 0, sizeof(ps));
57 for (auto src : wide) {
58 // Use a temp buffer since calling wcrtomb with an output of NULL does not
59 // calculate the output length.
60 char buf[16];
61 // Skip NULLs to avoid wcrtomb's special handling of them.
62 size_t res = src ? wcrtomb(buf, src, &ps) : 0;
63 switch (res) {
64 // Handle any errors and return an empty string.
65 case static_cast<size_t>(-1):
66 return std::string();
67 case 0:
68 // We hit an embedded null byte, keep going.
69 ++num_out_chars;
70 break;
71 default:
72 num_out_chars += res;
73 break;
74 }
75 }
76
77 if (num_out_chars == 0)
78 return std::string();
79
80 std::string out;
81 out.resize(num_out_chars);
82
83 // We walk the input string again, with |i| tracking the index of the
84 // wide input, and |j| tracking the multi-byte output.
85 memset(&ps, 0, sizeof(ps));
86 for (size_t i = 0, j = 0; i < wide.size(); ++i) {
87 const wchar_t src = wide[i];
88 // We don't want wcrtomb to do its funkiness for embedded NULLs.
89 size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
90 switch (res) {
91 // Handle any errors and return an empty string.
92 case static_cast<size_t>(-1):
93 return std::string();
94 case 0:
95 // We hit an embedded null byte, keep going.
96 ++j; // Output is already zeroed.
97 break;
98 default:
99 j += res;
100 break;
101 }
102 }
103
104 return out;
105 }
106
SysNativeMBToWide(std::string_view native_mb)107 std::wstring SysNativeMBToWide(std::string_view native_mb) {
108 mbstate_t ps;
109
110 // Calculate the number of wide characters. We walk through the string
111 // without writing the output, counting the number of wide characters.
112 size_t num_out_chars = 0;
113 memset(&ps, 0, sizeof(ps));
114 for (size_t i = 0; i < native_mb.size(); ) {
115 const char* src = native_mb.data() + i;
116 size_t res = mbrtowc(nullptr, src, native_mb.size() - i, &ps);
117 switch (res) {
118 // Handle any errors and return an empty string.
119 case static_cast<size_t>(-2):
120 case static_cast<size_t>(-1):
121 return std::wstring();
122 case 0:
123 // We hit an embedded null byte, keep going.
124 i += 1;
125 [[fallthrough]];
126 default:
127 i += res;
128 ++num_out_chars;
129 break;
130 }
131 }
132
133 if (num_out_chars == 0)
134 return std::wstring();
135
136 std::wstring out;
137 out.resize(num_out_chars);
138
139 memset(&ps, 0, sizeof(ps)); // Clear the shift state.
140 // We walk the input string again, with |i| tracking the index of the
141 // multi-byte input, and |j| tracking the wide output.
142 for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
143 const char* src = native_mb.data() + i;
144 wchar_t* dst = &out[j];
145 size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
146 switch (res) {
147 // Handle any errors and return an empty string.
148 case static_cast<size_t>(-2):
149 case static_cast<size_t>(-1):
150 return std::wstring();
151 case 0:
152 i += 1; // Skip null byte.
153 break;
154 default:
155 i += res;
156 break;
157 }
158 }
159
160 return out;
161 }
162
163 #endif // defined(SYSTEM_NATIVE_UTF8) || BUILDFLAG(IS_ANDROID)
164
165 } // namespace base
166