• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/utf_string_conversions.h"
6 
7 #include "base/strings/string_piece.h"
8 #include "base/strings/string_util.h"
9 #include "base/strings/utf_string_conversion_utils.h"
10 
11 namespace base {
12 
13 namespace {
14 
15 // Generalized Unicode converter -----------------------------------------------
16 
17 // Converts the given source Unicode character type to the given destination
18 // Unicode character type as a STL string. The given input buffer and size
19 // determine the source, and the given output STL string will be replaced by
20 // the result.
21 template<typename SRC_CHAR, typename DEST_STRING>
ConvertUnicode(const SRC_CHAR * src,size_t src_len,DEST_STRING * output)22 bool ConvertUnicode(const SRC_CHAR* src,
23                     size_t src_len,
24                     DEST_STRING* output) {
25   // ICU requires 32-bit numbers.
26   bool success = true;
27   int32 src_len32 = static_cast<int32>(src_len);
28   for (int32 i = 0; i < src_len32; i++) {
29     uint32 code_point;
30     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
31       WriteUnicodeCharacter(code_point, output);
32     } else {
33       WriteUnicodeCharacter(0xFFFD, output);
34       success = false;
35     }
36   }
37 
38   return success;
39 }
40 
41 }  // namespace
42 
43 // UTF-8 <-> Wide --------------------------------------------------------------
44 
WideToUTF8(const wchar_t * src,size_t src_len,std::string * output)45 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
46   PrepareForUTF8Output(src, src_len, output);
47   return ConvertUnicode(src, src_len, output);
48 }
49 
WideToUTF8(const std::wstring & wide)50 std::string WideToUTF8(const std::wstring& wide) {
51   std::string ret;
52   // Ignore the success flag of this call, it will do the best it can for
53   // invalid input, which is what we want here.
54   WideToUTF8(wide.data(), wide.length(), &ret);
55   return ret;
56 }
57 
UTF8ToWide(const char * src,size_t src_len,std::wstring * output)58 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
59   PrepareForUTF16Or32Output(src, src_len, output);
60   return ConvertUnicode(src, src_len, output);
61 }
62 
UTF8ToWide(const StringPiece & utf8)63 std::wstring UTF8ToWide(const StringPiece& utf8) {
64   std::wstring ret;
65   UTF8ToWide(utf8.data(), utf8.length(), &ret);
66   return ret;
67 }
68 
69 // UTF-16 <-> Wide -------------------------------------------------------------
70 
71 #if defined(WCHAR_T_IS_UTF16)
72 
73 // When wide == UTF-16, then conversions are a NOP.
WideToUTF16(const wchar_t * src,size_t src_len,string16 * output)74 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
75   output->assign(src, src_len);
76   return true;
77 }
78 
WideToUTF16(const std::wstring & wide)79 string16 WideToUTF16(const std::wstring& wide) {
80   return wide;
81 }
82 
UTF16ToWide(const char16 * src,size_t src_len,std::wstring * output)83 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
84   output->assign(src, src_len);
85   return true;
86 }
87 
UTF16ToWide(const string16 & utf16)88 std::wstring UTF16ToWide(const string16& utf16) {
89   return utf16;
90 }
91 
92 #elif defined(WCHAR_T_IS_UTF32)
93 
WideToUTF16(const wchar_t * src,size_t src_len,string16 * output)94 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
95   output->clear();
96   // Assume that normally we won't have any non-BMP characters so the counts
97   // will be the same.
98   output->reserve(src_len);
99   return ConvertUnicode(src, src_len, output);
100 }
101 
WideToUTF16(const std::wstring & wide)102 string16 WideToUTF16(const std::wstring& wide) {
103   string16 ret;
104   WideToUTF16(wide.data(), wide.length(), &ret);
105   return ret;
106 }
107 
UTF16ToWide(const char16 * src,size_t src_len,std::wstring * output)108 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
109   output->clear();
110   // Assume that normally we won't have any non-BMP characters so the counts
111   // will be the same.
112   output->reserve(src_len);
113   return ConvertUnicode(src, src_len, output);
114 }
115 
UTF16ToWide(const string16 & utf16)116 std::wstring UTF16ToWide(const string16& utf16) {
117   std::wstring ret;
118   UTF16ToWide(utf16.data(), utf16.length(), &ret);
119   return ret;
120 }
121 
122 #endif  // defined(WCHAR_T_IS_UTF32)
123 
124 // UTF16 <-> UTF8 --------------------------------------------------------------
125 
126 #if defined(WCHAR_T_IS_UTF32)
127 
UTF8ToUTF16(const char * src,size_t src_len,string16 * output)128 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
129   PrepareForUTF16Or32Output(src, src_len, output);
130   return ConvertUnicode(src, src_len, output);
131 }
132 
UTF8ToUTF16(const StringPiece & utf8)133 string16 UTF8ToUTF16(const StringPiece& utf8) {
134   string16 ret;
135   // Ignore the success flag of this call, it will do the best it can for
136   // invalid input, which is what we want here.
137   UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
138   return ret;
139 }
140 
UTF16ToUTF8(const char16 * src,size_t src_len,std::string * output)141 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
142   PrepareForUTF8Output(src, src_len, output);
143   return ConvertUnicode(src, src_len, output);
144 }
145 
UTF16ToUTF8(const string16 & utf16)146 std::string UTF16ToUTF8(const string16& utf16) {
147   std::string ret;
148   // Ignore the success flag of this call, it will do the best it can for
149   // invalid input, which is what we want here.
150   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
151   return ret;
152 }
153 
154 #elif defined(WCHAR_T_IS_UTF16)
155 // Easy case since we can use the "wide" versions we already wrote above.
156 
UTF8ToUTF16(const char * src,size_t src_len,string16 * output)157 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
158   return UTF8ToWide(src, src_len, output);
159 }
160 
UTF8ToUTF16(const StringPiece & utf8)161 string16 UTF8ToUTF16(const StringPiece& utf8) {
162   return UTF8ToWide(utf8);
163 }
164 
UTF16ToUTF8(const char16 * src,size_t src_len,std::string * output)165 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
166   return WideToUTF8(src, src_len, output);
167 }
168 
UTF16ToUTF8(const string16 & utf16)169 std::string UTF16ToUTF8(const string16& utf16) {
170   return WideToUTF8(utf16);
171 }
172 
173 #endif
174 
ASCIIToWide(const StringPiece & ascii)175 std::wstring ASCIIToWide(const StringPiece& ascii) {
176   DCHECK(IsStringASCII(ascii)) << ascii;
177   return std::wstring(ascii.begin(), ascii.end());
178 }
179 
ASCIIToUTF16(const StringPiece & ascii)180 string16 ASCIIToUTF16(const StringPiece& ascii) {
181   DCHECK(IsStringASCII(ascii)) << ascii;
182   return string16(ascii.begin(), ascii.end());
183 }
184 
UTF16ToASCII(const string16 & utf16)185 std::string UTF16ToASCII(const string16& utf16) {
186   DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
187   return std::string(utf16.begin(), utf16.end());
188 }
189 
190 }  // namespace base
191