1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/utf_string_conversions.h"
6
7 #include "base/string_piece.h"
8 #include "base/string_util.h"
9 #include "base/utf_string_conversion_utils.h"
10
11 using base::PrepareForUTF8Output;
12 using base::PrepareForUTF16Or32Output;
13 using base::ReadUnicodeCharacter;
14 using base::WriteUnicodeCharacter;
15
16 namespace {
17
18 // Generalized Unicode converter -----------------------------------------------
19
20 // Converts the given source Unicode character type to the given destination
21 // Unicode character type as a STL string. The given input buffer and size
22 // determine the source, and the given output STL string will be replaced by
23 // the result.
24 template<typename SRC_CHAR, typename DEST_STRING>
ConvertUnicode(const SRC_CHAR * src,size_t src_len,DEST_STRING * output)25 bool ConvertUnicode(const SRC_CHAR* src,
26 size_t src_len,
27 DEST_STRING* output) {
28 // ICU requires 32-bit numbers.
29 bool success = true;
30 int32 src_len32 = static_cast<int32>(src_len);
31 for (int32 i = 0; i < src_len32; i++) {
32 uint32 code_point;
33 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
34 WriteUnicodeCharacter(code_point, output);
35 } else {
36 WriteUnicodeCharacter(0xFFFD, output);
37 success = false;
38 }
39 }
40
41 return success;
42 }
43
44 } // namespace
45
46 // UTF-8 <-> Wide --------------------------------------------------------------
47
WideToUTF8(const wchar_t * src,size_t src_len,std::string * output)48 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
49 PrepareForUTF8Output(src, src_len, output);
50 return ConvertUnicode(src, src_len, output);
51 }
52
WideToUTF8(const std::wstring & wide)53 std::string WideToUTF8(const std::wstring& wide) {
54 std::string ret;
55 // Ignore the success flag of this call, it will do the best it can for
56 // invalid input, which is what we want here.
57 WideToUTF8(wide.data(), wide.length(), &ret);
58 return ret;
59 }
60
UTF8ToWide(const char * src,size_t src_len,std::wstring * output)61 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
62 PrepareForUTF16Or32Output(src, src_len, output);
63 return ConvertUnicode(src, src_len, output);
64 }
65
UTF8ToWide(const base::StringPiece & utf8)66 std::wstring UTF8ToWide(const base::StringPiece& utf8) {
67 std::wstring ret;
68 UTF8ToWide(utf8.data(), utf8.length(), &ret);
69 return ret;
70 }
71
72 // UTF-16 <-> Wide -------------------------------------------------------------
73
74 #if defined(WCHAR_T_IS_UTF16)
75
76 // When wide == UTF-16, then conversions are a NOP.
WideToUTF16(const wchar_t * src,size_t src_len,string16 * output)77 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
78 output->assign(src, src_len);
79 return true;
80 }
81
WideToUTF16(const std::wstring & wide)82 string16 WideToUTF16(const std::wstring& wide) {
83 return wide;
84 }
85
UTF16ToWide(const char16 * src,size_t src_len,std::wstring * output)86 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
87 output->assign(src, src_len);
88 return true;
89 }
90
UTF16ToWide(const string16 & utf16)91 std::wstring UTF16ToWide(const string16& utf16) {
92 return utf16;
93 }
94
95 #elif defined(WCHAR_T_IS_UTF32)
96
WideToUTF16(const wchar_t * src,size_t src_len,string16 * output)97 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
98 output->clear();
99 // Assume that normally we won't have any non-BMP characters so the counts
100 // will be the same.
101 output->reserve(src_len);
102 return ConvertUnicode(src, src_len, output);
103 }
104
WideToUTF16(const std::wstring & wide)105 string16 WideToUTF16(const std::wstring& wide) {
106 string16 ret;
107 WideToUTF16(wide.data(), wide.length(), &ret);
108 return ret;
109 }
110
UTF16ToWide(const char16 * src,size_t src_len,std::wstring * output)111 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
112 output->clear();
113 // Assume that normally we won't have any non-BMP characters so the counts
114 // will be the same.
115 output->reserve(src_len);
116 return ConvertUnicode(src, src_len, output);
117 }
118
UTF16ToWide(const string16 & utf16)119 std::wstring UTF16ToWide(const string16& utf16) {
120 std::wstring ret;
121 UTF16ToWide(utf16.data(), utf16.length(), &ret);
122 return ret;
123 }
124
125 #endif // defined(WCHAR_T_IS_UTF32)
126
127 // UTF16 <-> UTF8 --------------------------------------------------------------
128
129 #if defined(WCHAR_T_IS_UTF32)
130
UTF8ToUTF16(const char * src,size_t src_len,string16 * output)131 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
132 PrepareForUTF16Or32Output(src, src_len, output);
133 return ConvertUnicode(src, src_len, output);
134 }
135
UTF8ToUTF16(const base::StringPiece & utf8)136 string16 UTF8ToUTF16(const base::StringPiece& utf8) {
137 string16 ret;
138 // Ignore the success flag of this call, it will do the best it can for
139 // invalid input, which is what we want here.
140 UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
141 return ret;
142 }
143
UTF16ToUTF8(const char16 * src,size_t src_len,std::string * output)144 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
145 PrepareForUTF8Output(src, src_len, output);
146 return ConvertUnicode(src, src_len, output);
147 }
148
UTF16ToUTF8(const string16 & utf16)149 std::string UTF16ToUTF8(const string16& utf16) {
150 std::string ret;
151 // Ignore the success flag of this call, it will do the best it can for
152 // invalid input, which is what we want here.
153 UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
154 return ret;
155 }
156
157 #elif defined(WCHAR_T_IS_UTF16)
158 // Easy case since we can use the "wide" versions we already wrote above.
159
UTF8ToUTF16(const char * src,size_t src_len,string16 * output)160 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
161 return UTF8ToWide(src, src_len, output);
162 }
163
UTF8ToUTF16(const base::StringPiece & utf8)164 string16 UTF8ToUTF16(const base::StringPiece& utf8) {
165 return UTF8ToWide(utf8);
166 }
167
UTF16ToUTF8(const char16 * src,size_t src_len,std::string * output)168 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
169 return WideToUTF8(src, src_len, output);
170 }
171
UTF16ToUTF8(const string16 & utf16)172 std::string UTF16ToUTF8(const string16& utf16) {
173 return WideToUTF8(utf16);
174 }
175
176 #endif
177
ASCIIToWide(const base::StringPiece & ascii)178 std::wstring ASCIIToWide(const base::StringPiece& ascii) {
179 DCHECK(IsStringASCII(ascii)) << ascii;
180 return std::wstring(ascii.begin(), ascii.end());
181 }
182
ASCIIToUTF16(const base::StringPiece & ascii)183 string16 ASCIIToUTF16(const base::StringPiece& ascii) {
184 DCHECK(IsStringASCII(ascii)) << ascii;
185 return string16(ascii.begin(), ascii.end());
186 }
187