1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "ppapi/tests/test_char_set.h"
6
7 #include "ppapi/c/dev/ppb_char_set_dev.h"
8 #include "ppapi/c/trusted/ppb_char_set_trusted.h"
9 #include "ppapi/cpp/dev/memory_dev.h"
10 #include "ppapi/cpp/module.h"
11 #include "ppapi/tests/testing_instance.h"
12
13 REGISTER_TEST_CASE(CharSet);
14
TestCharSet(TestingInstance * instance)15 TestCharSet::TestCharSet(TestingInstance* instance)
16 : TestCase(instance),
17 char_set_interface_(NULL) {
18 }
19
Init()20 bool TestCharSet::Init() {
21 char_set_interface_ = static_cast<const PPB_CharSet_Dev*>(
22 pp::Module::Get()->GetBrowserInterface(PPB_CHAR_SET_DEV_INTERFACE));
23 char_set_trusted_interface_ = static_cast<const PPB_CharSet_Trusted*>(
24 pp::Module::Get()->GetBrowserInterface(PPB_CHARSET_TRUSTED_INTERFACE));
25 return char_set_interface_ && char_set_trusted_interface_;
26 }
27
RunTests(const std::string & filter)28 void TestCharSet::RunTests(const std::string& filter) {
29 RUN_TEST(UTF16ToCharSetDeprecated, filter);
30 RUN_TEST(UTF16ToCharSet, filter);
31 RUN_TEST(CharSetToUTF16Deprecated, filter);
32 RUN_TEST(CharSetToUTF16, filter);
33 RUN_TEST(GetDefaultCharSet, filter);
34 }
35
36 // TODO(brettw) remove this when the old interface is removed.
TestUTF16ToCharSetDeprecated()37 std::string TestCharSet::TestUTF16ToCharSetDeprecated() {
38 // Empty string.
39 std::vector<uint16_t> utf16;
40 utf16.push_back(0);
41 uint32_t utf8result_len = 0;
42 pp::Memory_Dev memory;
43 char* utf8result = char_set_interface_->UTF16ToCharSet(
44 instance_->pp_instance(), &utf16[0], 0, "latin1",
45 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
46 ASSERT_TRUE(utf8result);
47 ASSERT_TRUE(utf8result[0] == 0);
48 ASSERT_TRUE(utf8result_len == 0);
49 memory.MemFree(utf8result);
50
51 // Try round-tripping some English & Chinese from UTF-8 through UTF-16
52 std::string utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd");
53 utf16 = UTF8ToUTF16(utf8source);
54 utf8result = char_set_interface_->UTF16ToCharSet(
55 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
56 "Utf-8", PP_CHARSET_CONVERSIONERROR_FAIL, &utf8result_len);
57 ASSERT_TRUE(utf8source == std::string(utf8result, utf8result_len));
58 memory.MemFree(utf8result);
59
60 // Test an un-encodable character with various modes.
61 utf16 = UTF8ToUTF16("h\xe4\xbd\xa0i");
62
63 // Fail mode.
64 utf8result_len = 1234; // Test that this gets 0'ed on failure.
65 utf8result = char_set_interface_->UTF16ToCharSet(
66 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
67 "latin1", PP_CHARSET_CONVERSIONERROR_FAIL, &utf8result_len);
68 ASSERT_TRUE(utf8result_len == 0);
69 ASSERT_TRUE(utf8result == NULL);
70
71 // Skip mode.
72 utf8result = char_set_interface_->UTF16ToCharSet(
73 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
74 "latin1", PP_CHARSET_CONVERSIONERROR_SKIP, &utf8result_len);
75 ASSERT_TRUE(utf8result_len == 2);
76 ASSERT_TRUE(utf8result[0] == 'h' && utf8result[1] == 'i' &&
77 utf8result[2] == 0);
78 memory.MemFree(utf8result);
79
80 // Substitute mode.
81 utf8result = char_set_interface_->UTF16ToCharSet(
82 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
83 "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
84 ASSERT_TRUE(utf8result_len == 3);
85 ASSERT_TRUE(utf8result[0] == 'h' && utf8result[1] == '?' &&
86 utf8result[2] == 'i' && utf8result[3] == 0);
87 memory.MemFree(utf8result);
88
89 // Try some invalid input encoding.
90 utf16.clear();
91 utf16.push_back(0xD800); // High surrogate.
92 utf16.push_back('A'); // Not a low surrogate.
93 utf8result = char_set_interface_->UTF16ToCharSet(
94 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
95 "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
96 ASSERT_TRUE(utf8result_len == 2);
97 ASSERT_TRUE(utf8result[0] == '?' && utf8result[1] == 'A' &&
98 utf8result[2] == 0);
99 memory.MemFree(utf8result);
100
101 // Invalid encoding name.
102 utf8result = char_set_interface_->UTF16ToCharSet(
103 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
104 "poopiepants", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
105 ASSERT_TRUE(!utf8result);
106 ASSERT_TRUE(utf8result_len == 0);
107
108 PASS();
109 }
110
TestUTF16ToCharSet()111 std::string TestCharSet::TestUTF16ToCharSet() {
112 // Empty string.
113 std::vector<uint16_t> utf16;
114 utf16.push_back(0);
115 std::string output_buffer;
116 output_buffer.resize(1);
117 uint32_t utf8result_len = 0;
118 PP_Bool result = char_set_trusted_interface_->UTF16ToCharSet(
119 &utf16[0], 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
120 &output_buffer[0], &utf8result_len);
121 ASSERT_TRUE(result == PP_TRUE);
122 ASSERT_TRUE(utf8result_len == 0);
123
124 // No output buffer returns length of string.
125 utf16 = UTF8ToUTF16("hello");
126 utf8result_len = 0;
127 result = char_set_trusted_interface_->UTF16ToCharSet(
128 &utf16[0], static_cast<uint32_t>(utf16.size()), "latin1",
129 PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, NULL, &utf8result_len);
130 ASSERT_TRUE(result == PP_TRUE);
131 ASSERT_TRUE(utf8result_len == 5);
132
133 // Giving too small of a buffer just fills in that many items and gives us
134 // the desired size.
135 output_buffer.resize(100);
136 utf8result_len = 2;
137 output_buffer[utf8result_len] = '$'; // Barrier character.
138 result = char_set_trusted_interface_->UTF16ToCharSet(
139 &utf16[0], static_cast<uint32_t>(utf16.size()), "latin1",
140 PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
141 &output_buffer[0], &utf8result_len);
142 ASSERT_TRUE(result == PP_TRUE);
143 ASSERT_TRUE(utf8result_len == 5);
144 ASSERT_TRUE(output_buffer[0] == 'h' && output_buffer[1] == 'e' &&
145 output_buffer[2] == '$');
146
147 // Try round-tripping some English & Chinese from UTF-8 through UTF-16
148 std::string utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd");
149 utf16 = UTF8ToUTF16(utf8source);
150 output_buffer.resize(100);
151 utf8result_len = static_cast<uint32_t>(output_buffer.size());
152 result = char_set_trusted_interface_->UTF16ToCharSet(
153 &utf16[0], static_cast<uint32_t>(utf16.size()),
154 "Utf-8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
155 &output_buffer[0], &utf8result_len);
156 ASSERT_TRUE(result == PP_TRUE);
157 output_buffer.resize(utf8result_len);
158 ASSERT_TRUE(utf8source == output_buffer);
159
160 // Test an un-encodable character with various modes.
161 utf16 = UTF8ToUTF16("h\xe4\xbd\xa0i");
162
163 // Fail mode, size should get 0'ed on failure.
164 output_buffer.resize(100);
165 utf8result_len = static_cast<uint32_t>(output_buffer.size());
166 result = char_set_trusted_interface_->UTF16ToCharSet(
167 &utf16[0], static_cast<uint32_t>(utf16.size()),
168 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
169 &output_buffer[0], &utf8result_len);
170 ASSERT_TRUE(result == PP_FALSE);
171 ASSERT_TRUE(utf8result_len == 0);
172
173 // Skip mode.
174 output_buffer.resize(100);
175 utf8result_len = static_cast<uint32_t>(output_buffer.size());
176 result = char_set_trusted_interface_->UTF16ToCharSet(
177 &utf16[0], static_cast<uint32_t>(utf16.size()),
178 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP,
179 &output_buffer[0], &utf8result_len);
180 ASSERT_TRUE(result == PP_TRUE);
181 ASSERT_TRUE(utf8result_len == 2);
182 ASSERT_TRUE(output_buffer[0] == 'h' && output_buffer[1] == 'i');
183
184 // Substitute mode.
185 output_buffer.resize(100);
186 utf8result_len = static_cast<uint32_t>(output_buffer.size());
187 result = char_set_trusted_interface_->UTF16ToCharSet(
188 &utf16[0], static_cast<uint32_t>(utf16.size()),
189 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
190 &output_buffer[0], &utf8result_len);
191 ASSERT_TRUE(result == PP_TRUE);
192 ASSERT_TRUE(utf8result_len == 3);
193 output_buffer.resize(utf8result_len);
194 ASSERT_TRUE(output_buffer == "h?i");
195
196 // Try some invalid input encoding.
197 output_buffer.resize(100);
198 utf8result_len = static_cast<uint32_t>(output_buffer.size());
199 utf16.clear();
200 utf16.push_back(0xD800); // High surrogate.
201 utf16.push_back('A'); // Not a low surrogate.
202 result = char_set_trusted_interface_->UTF16ToCharSet(
203 &utf16[0], static_cast<uint32_t>(utf16.size()),
204 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
205 &output_buffer[0], &utf8result_len);
206 ASSERT_TRUE(result == PP_TRUE);
207 ASSERT_TRUE(utf8result_len == 2);
208 ASSERT_TRUE(output_buffer[0] == '?' && output_buffer[1] == 'A');
209
210 // Invalid encoding name.
211 output_buffer.resize(100);
212 utf8result_len = static_cast<uint32_t>(output_buffer.size());
213 result = char_set_trusted_interface_->UTF16ToCharSet(
214 &utf16[0], static_cast<uint32_t>(utf16.size()),
215 "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
216 &output_buffer[0], &utf8result_len);
217 ASSERT_TRUE(result == PP_FALSE);
218 ASSERT_TRUE(utf8result_len == 0);
219
220 PASS();
221 }
222
223 // TODO(brettw) remove this when the old interface is removed.
TestCharSetToUTF16Deprecated()224 std::string TestCharSet::TestCharSetToUTF16Deprecated() {
225 pp::Memory_Dev memory;
226
227 // Empty string.
228 uint32_t utf16result_len;
229 uint16_t* utf16result = char_set_interface_->CharSetToUTF16(
230 instance_->pp_instance(), "", 0, "latin1",
231 PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len);
232 ASSERT_TRUE(utf16result);
233 ASSERT_TRUE(utf16result_len == 0);
234 ASSERT_TRUE(utf16result[0] == 0);
235 memory.MemFree(utf16result);
236
237 // Basic Latin1.
238 char latin1[] = "H\xef";
239 utf16result = char_set_interface_->CharSetToUTF16(
240 instance_->pp_instance(), latin1, 2, "latin1",
241 PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len);
242 ASSERT_TRUE(utf16result);
243 ASSERT_TRUE(utf16result_len == 2);
244 ASSERT_TRUE(utf16result[0] == 'H' && utf16result[1] == 0xef &&
245 utf16result[2] == 0);
246 memory.MemFree(utf16result);
247
248 // Invalid input encoding with FAIL.
249 char badutf8[] = "A\xe4Z";
250 utf16result = char_set_interface_->CharSetToUTF16(
251 instance_->pp_instance(), badutf8, 3, "utf8",
252 PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len);
253 ASSERT_TRUE(!utf16result);
254 ASSERT_TRUE(utf16result_len == 0);
255 memory.MemFree(utf16result);
256
257 // Invalid input with SKIP.
258 utf16result = char_set_interface_->CharSetToUTF16(
259 instance_->pp_instance(), badutf8, 3, "utf8",
260 PP_CHARSET_CONVERSIONERROR_SKIP, &utf16result_len);
261 ASSERT_TRUE(utf16result);
262 ASSERT_TRUE(utf16result_len == 2);
263 ASSERT_TRUE(utf16result[0] == 'A' && utf16result[1] == 'Z' &&
264 utf16result[2] == 0);
265 memory.MemFree(utf16result);
266
267 // Invalid input with SUBSTITUTE.
268 utf16result = char_set_interface_->CharSetToUTF16(
269 instance_->pp_instance(), badutf8, 3, "utf8",
270 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf16result_len);
271 ASSERT_TRUE(utf16result);
272 ASSERT_TRUE(utf16result_len == 3);
273 ASSERT_TRUE(utf16result[0] == 'A' && utf16result[1] == 0xFFFD &&
274 utf16result[2] == 'Z' && utf16result[3] == 0);
275 memory.MemFree(utf16result);
276
277 // Invalid encoding name.
278 utf16result = char_set_interface_->CharSetToUTF16(
279 instance_->pp_instance(), badutf8, 3, "poopiepants",
280 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf16result_len);
281 ASSERT_TRUE(!utf16result);
282 ASSERT_TRUE(utf16result_len == 0);
283 memory.MemFree(utf16result);
284
285 PASS();
286 }
287
TestCharSetToUTF16()288 std::string TestCharSet::TestCharSetToUTF16() {
289 std::vector<uint16_t> output_buffer;
290 output_buffer.resize(100);
291
292 // Empty string.
293 output_buffer.resize(100);
294 uint32_t utf16result_len = static_cast<uint32_t>(output_buffer.size());
295 PP_Bool result = char_set_trusted_interface_->CharSetToUTF16(
296 "", 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
297 &output_buffer[0], &utf16result_len);
298 ASSERT_TRUE(result);
299 ASSERT_TRUE(utf16result_len == 0);
300 ASSERT_TRUE(output_buffer[0] == 0);
301
302 // Basic Latin1.
303 output_buffer.resize(100);
304 utf16result_len = static_cast<uint32_t>(output_buffer.size());
305 char latin1[] = "H\xef";
306 result = char_set_trusted_interface_->CharSetToUTF16(
307 latin1, 2, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
308 &output_buffer[0], &utf16result_len);
309 ASSERT_TRUE(result);
310 ASSERT_TRUE(utf16result_len == 2);
311 ASSERT_TRUE(output_buffer[0] == 'H' && output_buffer[1] == 0xef);
312
313 // Invalid input encoding with FAIL.
314 output_buffer.resize(100);
315 utf16result_len = static_cast<uint32_t>(output_buffer.size());
316 char badutf8[] = "A\xe4Z";
317 result = char_set_trusted_interface_->CharSetToUTF16(
318 badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
319 &output_buffer[0], &utf16result_len);
320 ASSERT_TRUE(!result);
321 ASSERT_TRUE(utf16result_len == 0);
322
323 // Invalid input with SKIP.
324 output_buffer.resize(100);
325 utf16result_len = static_cast<uint32_t>(output_buffer.size());
326 result = char_set_trusted_interface_->CharSetToUTF16(
327 badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP,
328 &output_buffer[0], &utf16result_len);
329 ASSERT_TRUE(result);
330 ASSERT_TRUE(utf16result_len == 2);
331 ASSERT_TRUE(output_buffer[0] == 'A' && output_buffer[1] == 'Z');
332
333 // Invalid input with SUBSTITUTE.
334 output_buffer.resize(100);
335 utf16result_len = static_cast<uint32_t>(output_buffer.size());
336 result = char_set_trusted_interface_->CharSetToUTF16(
337 badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
338 &output_buffer[0], &utf16result_len);
339 ASSERT_TRUE(result);
340 ASSERT_TRUE(utf16result_len == 3);
341 ASSERT_TRUE(output_buffer[0] == 'A' && output_buffer[1] == 0xFFFD &&
342 output_buffer[2] == 'Z');
343
344 // Invalid encoding name.
345 output_buffer.resize(100);
346 utf16result_len = static_cast<uint32_t>(output_buffer.size());
347 result = char_set_trusted_interface_->CharSetToUTF16(
348 badutf8, 3, "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
349 &output_buffer[0], &utf16result_len);
350 ASSERT_TRUE(!result);
351 ASSERT_TRUE(utf16result_len == 0);
352
353 PASS();
354 }
355
TestGetDefaultCharSet()356 std::string TestCharSet::TestGetDefaultCharSet() {
357 // Test invalid instance.
358 pp::Var result(pp::PASS_REF, char_set_interface_->GetDefaultCharSet(0));
359 ASSERT_TRUE(result.is_undefined());
360
361 // Just make sure the default char set is a nonempty string.
362 result = pp::Var(pp::PASS_REF,
363 char_set_interface_->GetDefaultCharSet(instance_->pp_instance()));
364 ASSERT_TRUE(result.is_string());
365 ASSERT_FALSE(result.AsString().empty());
366
367 PASS();
368 }
369
UTF8ToUTF16(const std::string & utf8)370 std::vector<uint16_t> TestCharSet::UTF8ToUTF16(const std::string& utf8) {
371 uint32_t result_len = 0;
372 uint16_t* result = char_set_interface_->CharSetToUTF16(
373 instance_->pp_instance(), utf8.c_str(),
374 static_cast<uint32_t>(utf8.size()),
375 "utf-8", PP_CHARSET_CONVERSIONERROR_FAIL, &result_len);
376
377 std::vector<uint16_t> result_vector;
378 if (!result)
379 return result_vector;
380
381 result_vector.assign(result, &result[result_len]);
382 pp::Memory_Dev memory;
383 memory.MemFree(result);
384 return result_vector;
385 }
386