• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 #include "base/strings/sys_string_conversions.h"
11 
12 #include <stddef.h>
13 #include <string.h>
14 #include <wchar.h>
15 
16 #include <string_view>
17 
18 #include "base/strings/utf_string_conversions.h"
19 #include "build/build_config.h"
20 
21 namespace base {
22 
SysWideToUTF8(const std::wstring & wide)23 std::string SysWideToUTF8(const std::wstring& wide) {
24   // In theory this should be using the system-provided conversion rather
25   // than our ICU, but this will do for now.
26   return WideToUTF8(wide);
27 }
SysUTF8ToWide(std::string_view utf8)28 std::wstring SysUTF8ToWide(std::string_view utf8) {
29   // In theory this should be using the system-provided conversion rather
30   // than our ICU, but this will do for now.
31   std::wstring out;
32   UTF8ToWide(utf8.data(), utf8.size(), &out);
33   return out;
34 }
35 
36 #if defined(SYSTEM_NATIVE_UTF8) || BUILDFLAG(IS_ANDROID)
37 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
38 // support and a better understanding of what calls these routines.
39 
SysWideToNativeMB(const std::wstring & wide)40 std::string SysWideToNativeMB(const std::wstring& wide) {
41   return WideToUTF8(wide);
42 }
43 
SysNativeMBToWide(std::string_view native_mb)44 std::wstring SysNativeMBToWide(std::string_view native_mb) {
45   return SysUTF8ToWide(native_mb);
46 }
47 
48 #else
49 
SysWideToNativeMB(const std::wstring & wide)50 std::string SysWideToNativeMB(const std::wstring& wide) {
51   mbstate_t ps;
52 
53   // Calculate the number of multi-byte characters.  We walk through the string
54   // without writing the output, counting the number of multi-byte characters.
55   size_t num_out_chars = 0;
56   memset(&ps, 0, sizeof(ps));
57   for (auto src : wide) {
58     // Use a temp buffer since calling wcrtomb with an output of NULL does not
59     // calculate the output length.
60     char buf[16];
61     // Skip NULLs to avoid wcrtomb's special handling of them.
62     size_t res = src ? wcrtomb(buf, src, &ps) : 0;
63     switch (res) {
64       // Handle any errors and return an empty string.
65       case static_cast<size_t>(-1):
66         return std::string();
67       case 0:
68         // We hit an embedded null byte, keep going.
69         ++num_out_chars;
70         break;
71       default:
72         num_out_chars += res;
73         break;
74     }
75   }
76 
77   if (num_out_chars == 0)
78     return std::string();
79 
80   std::string out;
81   out.resize(num_out_chars);
82 
83   // We walk the input string again, with |i| tracking the index of the
84   // wide input, and |j| tracking the multi-byte output.
85   memset(&ps, 0, sizeof(ps));
86   for (size_t i = 0, j = 0; i < wide.size(); ++i) {
87     const wchar_t src = wide[i];
88     // We don't want wcrtomb to do its funkiness for embedded NULLs.
89     size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
90     switch (res) {
91       // Handle any errors and return an empty string.
92       case static_cast<size_t>(-1):
93         return std::string();
94       case 0:
95         // We hit an embedded null byte, keep going.
96         ++j;  // Output is already zeroed.
97         break;
98       default:
99         j += res;
100         break;
101     }
102   }
103 
104   return out;
105 }
106 
SysNativeMBToWide(std::string_view native_mb)107 std::wstring SysNativeMBToWide(std::string_view native_mb) {
108   mbstate_t ps;
109 
110   // Calculate the number of wide characters.  We walk through the string
111   // without writing the output, counting the number of wide characters.
112   size_t num_out_chars = 0;
113   memset(&ps, 0, sizeof(ps));
114   for (size_t i = 0; i < native_mb.size(); ) {
115     const char* src = native_mb.data() + i;
116     size_t res = mbrtowc(nullptr, src, native_mb.size() - i, &ps);
117     switch (res) {
118       // Handle any errors and return an empty string.
119       case static_cast<size_t>(-2):
120       case static_cast<size_t>(-1):
121         return std::wstring();
122       case 0:
123         // We hit an embedded null byte, keep going.
124         i += 1;
125         [[fallthrough]];
126       default:
127         i += res;
128         ++num_out_chars;
129         break;
130     }
131   }
132 
133   if (num_out_chars == 0)
134     return std::wstring();
135 
136   std::wstring out;
137   out.resize(num_out_chars);
138 
139   memset(&ps, 0, sizeof(ps));  // Clear the shift state.
140   // We walk the input string again, with |i| tracking the index of the
141   // multi-byte input, and |j| tracking the wide output.
142   for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
143     const char* src = native_mb.data() + i;
144     wchar_t* dst = &out[j];
145     size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
146     switch (res) {
147       // Handle any errors and return an empty string.
148       case static_cast<size_t>(-2):
149       case static_cast<size_t>(-1):
150         return std::wstring();
151       case 0:
152         i += 1;  // Skip null byte.
153         break;
154       default:
155         i += res;
156         break;
157     }
158   }
159 
160   return out;
161 }
162 
163 #endif  // defined(SYSTEM_NATIVE_UTF8) || BUILDFLAG(IS_ANDROID)
164 
165 }  // namespace base
166