1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/utf_string_conversions.h"
6
7 #include <stdint.h>
8
9 #include "base/strings/string_piece.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/utf_string_conversion_utils.h"
12 #include "build/build_config.h"
13
14 namespace base {
15
16 namespace {
17
18 // Generalized Unicode converter -----------------------------------------------
19
20 // Converts the given source Unicode character type to the given destination
21 // Unicode character type as a STL string. The given input buffer and size
22 // determine the source, and the given output STL string will be replaced by
23 // the result.
24 template<typename SRC_CHAR, typename DEST_STRING>
ConvertUnicode(const SRC_CHAR * src,size_t src_len,DEST_STRING * output)25 bool ConvertUnicode(const SRC_CHAR* src,
26 size_t src_len,
27 DEST_STRING* output) {
28 // ICU requires 32-bit numbers.
29 bool success = true;
30 int32_t src_len32 = static_cast<int32_t>(src_len);
31 for (int32_t i = 0; i < src_len32; i++) {
32 uint32_t code_point;
33 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
34 WriteUnicodeCharacter(code_point, output);
35 } else {
36 WriteUnicodeCharacter(0xFFFD, output);
37 success = false;
38 }
39 }
40
41 return success;
42 }
43
44 } // namespace
45
46 // UTF-8 <-> Wide --------------------------------------------------------------
47
WideToUTF8(const wchar_t * src,size_t src_len,std::string * output)48 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
49 if (IsStringASCII(std::wstring(src, src_len))) {
50 output->assign(src, src + src_len);
51 return true;
52 } else {
53 PrepareForUTF8Output(src, src_len, output);
54 return ConvertUnicode(src, src_len, output);
55 }
56 }
57
WideToUTF8(const std::wstring & wide)58 std::string WideToUTF8(const std::wstring& wide) {
59 if (IsStringASCII(wide)) {
60 return std::string(wide.data(), wide.data() + wide.length());
61 }
62
63 std::string ret;
64 PrepareForUTF8Output(wide.data(), wide.length(), &ret);
65 ConvertUnicode(wide.data(), wide.length(), &ret);
66 return ret;
67 }
68
UTF8ToWide(const char * src,size_t src_len,std::wstring * output)69 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
70 if (IsStringASCII(StringPiece(src, src_len))) {
71 output->assign(src, src + src_len);
72 return true;
73 } else {
74 PrepareForUTF16Or32Output(src, src_len, output);
75 return ConvertUnicode(src, src_len, output);
76 }
77 }
78
UTF8ToWide(StringPiece utf8)79 std::wstring UTF8ToWide(StringPiece utf8) {
80 if (IsStringASCII(utf8)) {
81 return std::wstring(utf8.begin(), utf8.end());
82 }
83
84 std::wstring ret;
85 PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
86 ConvertUnicode(utf8.data(), utf8.length(), &ret);
87 return ret;
88 }
89
90 // UTF-16 <-> Wide -------------------------------------------------------------
91
92 #if defined(WCHAR_T_IS_UTF16)
93
94 // When wide == UTF-16, then conversions are a NOP.
WideToUTF16(const wchar_t * src,size_t src_len,string16 * output)95 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
96 output->assign(src, src_len);
97 return true;
98 }
99
WideToUTF16(const std::wstring & wide)100 string16 WideToUTF16(const std::wstring& wide) {
101 return wide;
102 }
103
UTF16ToWide(const char16 * src,size_t src_len,std::wstring * output)104 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
105 output->assign(src, src_len);
106 return true;
107 }
108
UTF16ToWide(const string16 & utf16)109 std::wstring UTF16ToWide(const string16& utf16) {
110 return utf16;
111 }
112
113 #elif defined(WCHAR_T_IS_UTF32)
114
WideToUTF16(const wchar_t * src,size_t src_len,string16 * output)115 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
116 output->clear();
117 // Assume that normally we won't have any non-BMP characters so the counts
118 // will be the same.
119 output->reserve(src_len);
120 return ConvertUnicode(src, src_len, output);
121 }
122
WideToUTF16(const std::wstring & wide)123 string16 WideToUTF16(const std::wstring& wide) {
124 string16 ret;
125 WideToUTF16(wide.data(), wide.length(), &ret);
126 return ret;
127 }
128
UTF16ToWide(const char16 * src,size_t src_len,std::wstring * output)129 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
130 output->clear();
131 // Assume that normally we won't have any non-BMP characters so the counts
132 // will be the same.
133 output->reserve(src_len);
134 return ConvertUnicode(src, src_len, output);
135 }
136
UTF16ToWide(const string16 & utf16)137 std::wstring UTF16ToWide(const string16& utf16) {
138 std::wstring ret;
139 UTF16ToWide(utf16.data(), utf16.length(), &ret);
140 return ret;
141 }
142
143 #endif // defined(WCHAR_T_IS_UTF32)
144
145 // UTF16 <-> UTF8 --------------------------------------------------------------
146
147 #if defined(WCHAR_T_IS_UTF32)
148
UTF8ToUTF16(const char * src,size_t src_len,string16 * output)149 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
150 if (IsStringASCII(StringPiece(src, src_len))) {
151 output->assign(src, src + src_len);
152 return true;
153 } else {
154 PrepareForUTF16Or32Output(src, src_len, output);
155 return ConvertUnicode(src, src_len, output);
156 }
157 }
158
UTF8ToUTF16(StringPiece utf8)159 string16 UTF8ToUTF16(StringPiece utf8) {
160 if (IsStringASCII(utf8)) {
161 return string16(utf8.begin(), utf8.end());
162 }
163
164 string16 ret;
165 PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
166 // Ignore the success flag of this call, it will do the best it can for
167 // invalid input, which is what we want here.
168 ConvertUnicode(utf8.data(), utf8.length(), &ret);
169 return ret;
170 }
171
UTF16ToUTF8(const char16 * src,size_t src_len,std::string * output)172 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
173 if (IsStringASCII(StringPiece16(src, src_len))) {
174 output->assign(src, src + src_len);
175 return true;
176 } else {
177 PrepareForUTF8Output(src, src_len, output);
178 return ConvertUnicode(src, src_len, output);
179 }
180 }
181
UTF16ToUTF8(StringPiece16 utf16)182 std::string UTF16ToUTF8(StringPiece16 utf16) {
183 std::string ret;
184 // Ignore the success flag of this call, it will do the best it can for
185 // invalid input, which is what we want here.
186 UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
187 return ret;
188 }
189
190 #elif defined(WCHAR_T_IS_UTF16)
191 // Easy case since we can use the "wide" versions we already wrote above.
192
UTF8ToUTF16(const char * src,size_t src_len,string16 * output)193 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
194 return UTF8ToWide(src, src_len, output);
195 }
196
UTF8ToUTF16(StringPiece utf8)197 string16 UTF8ToUTF16(StringPiece utf8) {
198 return UTF8ToWide(utf8);
199 }
200
UTF16ToUTF8(const char16 * src,size_t src_len,std::string * output)201 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
202 return WideToUTF8(src, src_len, output);
203 }
204
UTF16ToUTF8(StringPiece16 utf16)205 std::string UTF16ToUTF8(StringPiece16 utf16) {
206 if (IsStringASCII(utf16))
207 return std::string(utf16.data(), utf16.data() + utf16.length());
208
209 std::string ret;
210 PrepareForUTF8Output(utf16.data(), utf16.length(), &ret);
211 ConvertUnicode(utf16.data(), utf16.length(), &ret);
212 return ret;
213 }
214
215 #endif
216
ASCIIToUTF16(StringPiece ascii)217 string16 ASCIIToUTF16(StringPiece ascii) {
218 DCHECK(IsStringASCII(ascii)) << ascii;
219 return string16(ascii.begin(), ascii.end());
220 }
221
UTF16ToASCII(StringPiece16 utf16)222 std::string UTF16ToASCII(StringPiece16 utf16) {
223 DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
224 return std::string(utf16.begin(), utf16.end());
225 }
226
227 } // namespace base
228