• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/utf_string_conversions.h"
6 
7 #include "base/string_piece.h"
8 #include "base/string_util.h"
9 #include "base/utf_string_conversion_utils.h"
10 
11 using base::PrepareForUTF8Output;
12 using base::PrepareForUTF16Or32Output;
13 using base::ReadUnicodeCharacter;
14 using base::WriteUnicodeCharacter;
15 
16 namespace {
17 
18 // Generalized Unicode converter -----------------------------------------------
19 
20 // Converts the given source Unicode character type to the given destination
21 // Unicode character type as a STL string. The given input buffer and size
22 // determine the source, and the given output STL string will be replaced by
23 // the result.
24 template<typename SRC_CHAR, typename DEST_STRING>
ConvertUnicode(const SRC_CHAR * src,size_t src_len,DEST_STRING * output)25 bool ConvertUnicode(const SRC_CHAR* src,
26                     size_t src_len,
27                     DEST_STRING* output) {
28   // ICU requires 32-bit numbers.
29   bool success = true;
30   int32 src_len32 = static_cast<int32>(src_len);
31   for (int32 i = 0; i < src_len32; i++) {
32     uint32 code_point;
33     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
34       WriteUnicodeCharacter(code_point, output);
35     } else {
36       WriteUnicodeCharacter(0xFFFD, output);
37       success = false;
38     }
39   }
40 
41   return success;
42 }
43 
44 }  // namespace
45 
46 // UTF-8 <-> Wide --------------------------------------------------------------
47 
WideToUTF8(const wchar_t * src,size_t src_len,std::string * output)48 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
49   PrepareForUTF8Output(src, src_len, output);
50   return ConvertUnicode(src, src_len, output);
51 }
52 
WideToUTF8(const std::wstring & wide)53 std::string WideToUTF8(const std::wstring& wide) {
54   std::string ret;
55   // Ignore the success flag of this call, it will do the best it can for
56   // invalid input, which is what we want here.
57   WideToUTF8(wide.data(), wide.length(), &ret);
58   return ret;
59 }
60 
UTF8ToWide(const char * src,size_t src_len,std::wstring * output)61 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
62   PrepareForUTF16Or32Output(src, src_len, output);
63   return ConvertUnicode(src, src_len, output);
64 }
65 
UTF8ToWide(const base::StringPiece & utf8)66 std::wstring UTF8ToWide(const base::StringPiece& utf8) {
67   std::wstring ret;
68   UTF8ToWide(utf8.data(), utf8.length(), &ret);
69   return ret;
70 }
71 
72 // UTF-16 <-> Wide -------------------------------------------------------------
73 
74 #if defined(WCHAR_T_IS_UTF16)
75 
76 // When wide == UTF-16, then conversions are a NOP.
WideToUTF16(const wchar_t * src,size_t src_len,string16 * output)77 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
78   output->assign(src, src_len);
79   return true;
80 }
81 
WideToUTF16(const std::wstring & wide)82 string16 WideToUTF16(const std::wstring& wide) {
83   return wide;
84 }
85 
UTF16ToWide(const char16 * src,size_t src_len,std::wstring * output)86 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
87   output->assign(src, src_len);
88   return true;
89 }
90 
UTF16ToWide(const string16 & utf16)91 std::wstring UTF16ToWide(const string16& utf16) {
92   return utf16;
93 }
94 
95 #elif defined(WCHAR_T_IS_UTF32)
96 
WideToUTF16(const wchar_t * src,size_t src_len,string16 * output)97 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
98   output->clear();
99   // Assume that normally we won't have any non-BMP characters so the counts
100   // will be the same.
101   output->reserve(src_len);
102   return ConvertUnicode(src, src_len, output);
103 }
104 
WideToUTF16(const std::wstring & wide)105 string16 WideToUTF16(const std::wstring& wide) {
106   string16 ret;
107   WideToUTF16(wide.data(), wide.length(), &ret);
108   return ret;
109 }
110 
UTF16ToWide(const char16 * src,size_t src_len,std::wstring * output)111 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
112   output->clear();
113   // Assume that normally we won't have any non-BMP characters so the counts
114   // will be the same.
115   output->reserve(src_len);
116   return ConvertUnicode(src, src_len, output);
117 }
118 
UTF16ToWide(const string16 & utf16)119 std::wstring UTF16ToWide(const string16& utf16) {
120   std::wstring ret;
121   UTF16ToWide(utf16.data(), utf16.length(), &ret);
122   return ret;
123 }
124 
125 #endif  // defined(WCHAR_T_IS_UTF32)
126 
127 // UTF16 <-> UTF8 --------------------------------------------------------------
128 
129 #if defined(WCHAR_T_IS_UTF32)
130 
UTF8ToUTF16(const char * src,size_t src_len,string16 * output)131 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
132   PrepareForUTF16Or32Output(src, src_len, output);
133   return ConvertUnicode(src, src_len, output);
134 }
135 
UTF8ToUTF16(const base::StringPiece & utf8)136 string16 UTF8ToUTF16(const base::StringPiece& utf8) {
137   string16 ret;
138   // Ignore the success flag of this call, it will do the best it can for
139   // invalid input, which is what we want here.
140   UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
141   return ret;
142 }
143 
UTF16ToUTF8(const char16 * src,size_t src_len,std::string * output)144 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
145   PrepareForUTF8Output(src, src_len, output);
146   return ConvertUnicode(src, src_len, output);
147 }
148 
UTF16ToUTF8(const string16 & utf16)149 std::string UTF16ToUTF8(const string16& utf16) {
150   std::string ret;
151   // Ignore the success flag of this call, it will do the best it can for
152   // invalid input, which is what we want here.
153   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
154   return ret;
155 }
156 
157 #elif defined(WCHAR_T_IS_UTF16)
158 // Easy case since we can use the "wide" versions we already wrote above.
159 
UTF8ToUTF16(const char * src,size_t src_len,string16 * output)160 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
161   return UTF8ToWide(src, src_len, output);
162 }
163 
UTF8ToUTF16(const base::StringPiece & utf8)164 string16 UTF8ToUTF16(const base::StringPiece& utf8) {
165   return UTF8ToWide(utf8);
166 }
167 
UTF16ToUTF8(const char16 * src,size_t src_len,std::string * output)168 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
169   return WideToUTF8(src, src_len, output);
170 }
171 
UTF16ToUTF8(const string16 & utf16)172 std::string UTF16ToUTF8(const string16& utf16) {
173   return WideToUTF8(utf16);
174 }
175 
176 #endif
177 
ASCIIToWide(const base::StringPiece & ascii)178 std::wstring ASCIIToWide(const base::StringPiece& ascii) {
179   DCHECK(IsStringASCII(ascii)) << ascii;
180   return std::wstring(ascii.begin(), ascii.end());
181 }
182 
ASCIIToUTF16(const base::StringPiece & ascii)183 string16 ASCIIToUTF16(const base::StringPiece& ascii) {
184   DCHECK(IsStringASCII(ascii)) << ascii;
185   return string16(ascii.begin(), ascii.end());
186 }
187