• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "ppapi/shared_impl/private/ppb_char_set_shared.h"
6 
7 #include <algorithm>
8 
9 #include "base/i18n/icu_string_conversions.h"
10 #include "ppapi/c/dev/ppb_memory_dev.h"
11 #include "ppapi/thunk/thunk.h"
12 #include "third_party/icu/source/common/unicode/ucnv.h"
13 #include "third_party/icu/source/common/unicode/ucnv_cb.h"
14 #include "third_party/icu/source/common/unicode/ucnv_err.h"
15 #include "third_party/icu/source/common/unicode/ustring.h"
16 
17 namespace ppapi {
18 
19 namespace {
20 
DeprecatedToConversionError(PP_CharSet_ConversionError on_error)21 PP_CharSet_Trusted_ConversionError DeprecatedToConversionError(
22     PP_CharSet_ConversionError on_error) {
23   switch (on_error) {
24     case PP_CHARSET_CONVERSIONERROR_SKIP:
25       return PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP;
26     case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE:
27       return PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE;
28     case PP_CHARSET_CONVERSIONERROR_FAIL:
29     default:
30       return PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL;
31   }
32 }
33 
34 // Converts the given PP error handling behavior to the version in base,
35 // placing the result in |*result| and returning true on success. Returns false
36 // if the enum is invalid.
PPToBaseConversionError(PP_CharSet_Trusted_ConversionError on_error,base::OnStringConversionError::Type * result)37 bool PPToBaseConversionError(PP_CharSet_Trusted_ConversionError on_error,
38                              base::OnStringConversionError::Type* result) {
39   switch (on_error) {
40     case PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL:
41       *result = base::OnStringConversionError::FAIL;
42       return true;
43     case PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP:
44       *result = base::OnStringConversionError::SKIP;
45       return true;
46     case PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE:
47       *result = base::OnStringConversionError::SUBSTITUTE;
48       return true;
49     default:
50       return false;
51   }
52 }
53 
54 }  // namespace
55 
56 // static
57 // The "substitution" behavior of this function does not match the
58 // implementation in base, so we partially duplicate the code from
59 // icu_string_conversions.cc with the correct error handling setup required
60 // by the PPAPI interface.
UTF16ToCharSetDeprecated(const uint16_t * utf16,uint32_t utf16_len,const char * output_char_set,PP_CharSet_ConversionError deprecated_on_error,uint32_t * output_length)61 char* PPB_CharSet_Shared::UTF16ToCharSetDeprecated(
62     const uint16_t* utf16,
63     uint32_t utf16_len,
64     const char* output_char_set,
65     PP_CharSet_ConversionError deprecated_on_error,
66     uint32_t* output_length) {
67   *output_length = 0;
68   PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError(
69       deprecated_on_error);
70 
71   // Compute required length.
72   uint32_t required_length = 0;
73   UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error, NULL,
74                  &required_length);
75 
76   // Our output is null terminated, so need one more byte.
77   char* ret_buf = static_cast<char*>(
78       thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(required_length + 1));
79 
80   // Do the conversion into the buffer.
81   PP_Bool result = UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error,
82                                   ret_buf, &required_length);
83   if (result == PP_FALSE) {
84     thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf);
85     return NULL;
86   }
87   ret_buf[required_length] = 0;  // Null terminate.
88   *output_length = required_length;
89   return ret_buf;
90 }
91 
92 // static
UTF16ToCharSet(const uint16_t utf16[],uint32_t utf16_len,const char * output_char_set,PP_CharSet_Trusted_ConversionError on_error,char * output_buffer,uint32_t * output_length)93 PP_Bool PPB_CharSet_Shared::UTF16ToCharSet(
94     const uint16_t utf16[],
95     uint32_t utf16_len,
96     const char* output_char_set,
97     PP_CharSet_Trusted_ConversionError on_error,
98     char* output_buffer,
99     uint32_t* output_length) {
100   if (!utf16 || !output_char_set || !output_length) {
101     *output_length = 0;
102     return PP_FALSE;
103   }
104 
105   UErrorCode status = U_ZERO_ERROR;
106   UConverter* converter = ucnv_open(output_char_set, &status);
107   if (!U_SUCCESS(status)) {
108     *output_length = 0;
109     return PP_FALSE;
110   }
111 
112   // Setup our error handler.
113   switch (on_error) {
114     case PP_CHARSET_CONVERSIONERROR_FAIL:
115       ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, 0,
116                             NULL, NULL, &status);
117       break;
118     case PP_CHARSET_CONVERSIONERROR_SKIP:
119       ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, 0,
120                             NULL, NULL, &status);
121       break;
122     case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE: {
123       // ICU sets the substitution char for some character sets (like latin1)
124       // to be the ASCII "substitution character" (26). We want to use '?'
125       // instead for backwards-compat with Windows behavior.
126       char subst_chars[32];
127       int8_t subst_chars_len = 32;
128       ucnv_getSubstChars(converter, subst_chars, &subst_chars_len, &status);
129       if (subst_chars_len == 1 && subst_chars[0] == 26) {
130         // Override to the question mark character if possible. When using
131         // setSubstString, the input is a Unicode character. The function will
132         // try to convert it to the destination character set and fail if that
133         // can not be converted to the destination character set.
134         //
135         // We just ignore any failure. If the dest char set has no
136         // representation for '?', then we'll just stick to the ICU default
137         // substitution character.
138         UErrorCode subst_status = U_ZERO_ERROR;
139         UChar question_mark = '?';
140         ucnv_setSubstString(converter, &question_mark, 1, &subst_status);
141       }
142 
143       ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
144                             NULL, NULL, &status);
145       break;
146     }
147     default:
148       *output_length = 0;
149       ucnv_close(converter);
150       return PP_FALSE;
151   }
152 
153   // ucnv_fromUChars returns required size not including terminating null.
154   *output_length = static_cast<uint32_t>(ucnv_fromUChars(
155       converter, output_buffer, output_buffer ? *output_length : 0,
156       reinterpret_cast<const UChar*>(utf16), utf16_len, &status));
157 
158   ucnv_close(converter);
159   if (status == U_BUFFER_OVERFLOW_ERROR) {
160     // Don't treat this as a fatal error since we need to return the string
161     // size.
162     return PP_TRUE;
163   } else if (!U_SUCCESS(status)) {
164     *output_length = 0;
165     return PP_FALSE;
166   }
167   return PP_TRUE;
168 }
169 
170 // static
CharSetToUTF16Deprecated(const char * input,uint32_t input_len,const char * input_char_set,PP_CharSet_ConversionError deprecated_on_error,uint32_t * output_length)171 uint16_t* PPB_CharSet_Shared::CharSetToUTF16Deprecated(
172     const char* input,
173     uint32_t input_len,
174     const char* input_char_set,
175     PP_CharSet_ConversionError deprecated_on_error,
176     uint32_t* output_length) {
177   *output_length = 0;
178   PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError(
179       deprecated_on_error);
180 
181   // Compute required length.
182   uint32_t required_length = 0;
183   CharSetToUTF16(input, input_len, input_char_set, on_error, NULL,
184                  &required_length);
185 
186   // Our output is null terminated, so need one more byte.
187   uint16_t* ret_buf = static_cast<uint16_t*>(
188       thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(
189           (required_length + 1) * sizeof(uint16_t)));
190 
191   // Do the conversion into the buffer.
192   PP_Bool result = CharSetToUTF16(input, input_len, input_char_set, on_error,
193                                   ret_buf, &required_length);
194   if (result == PP_FALSE) {
195     thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf);
196     return NULL;
197   }
198   ret_buf[required_length] = 0;  // Null terminate.
199   *output_length = required_length;
200   return ret_buf;
201 }
202 
CharSetToUTF16(const char * input,uint32_t input_len,const char * input_char_set,PP_CharSet_Trusted_ConversionError on_error,uint16_t * output_buffer,uint32_t * output_utf16_length)203 PP_Bool PPB_CharSet_Shared::CharSetToUTF16(
204     const char* input,
205     uint32_t input_len,
206     const char* input_char_set,
207     PP_CharSet_Trusted_ConversionError on_error,
208     uint16_t* output_buffer,
209     uint32_t* output_utf16_length) {
210   if (!input || !input_char_set || !output_utf16_length) {
211     *output_utf16_length = 0;
212     return PP_FALSE;
213   }
214 
215   base::OnStringConversionError::Type base_on_error;
216   if (!PPToBaseConversionError(on_error, &base_on_error)) {
217     *output_utf16_length = 0;
218     return PP_FALSE;  // Invalid enum value.
219   }
220 
221   // We can convert this call to the implementation in base to avoid code
222   // duplication, although this does introduce an extra copy of the data.
223   base::string16 output;
224   if (!base::CodepageToUTF16(std::string(input, input_len), input_char_set,
225                              base_on_error, &output)) {
226     *output_utf16_length = 0;
227     return PP_FALSE;
228   }
229 
230   if (output_buffer) {
231     memcpy(output_buffer, output.c_str(),
232            std::min(*output_utf16_length, static_cast<uint32_t>(output.size()))
233            * sizeof(uint16_t));
234   }
235   *output_utf16_length = static_cast<uint32_t>(output.size());
236   return PP_TRUE;
237 }
238 
239 }  // namespace ppapi
240