1 /*
2 * Copyright © 2011 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27 #include "hb-test.h"
28
29 /* Unit tests for hb-buffer.h */
30
31
32 static const char utf8[10] = "ab\360\240\200\200defg";
33 static const uint16_t utf16[8] = {'a', 'b', 0xD840, 0xDC00, 'd', 'e', 'f', 'g'};
34 static const uint32_t utf32[7] = {'a', 'b', 0x20000, 'd', 'e', 'f', 'g'};
35
36
37 typedef enum {
38 BUFFER_EMPTY,
39 BUFFER_ONE_BY_ONE,
40 BUFFER_UTF32,
41 BUFFER_UTF16,
42 BUFFER_UTF8,
43 BUFFER_NUM_TYPES,
44 } buffer_type_t;
45
46 static const char *buffer_names[] = {
47 "empty",
48 "one-by-one",
49 "utf32",
50 "utf16",
51 "utf8"
52 };
53
54 typedef struct
55 {
56 hb_buffer_t *buffer;
57 } fixture_t;
58
59 static void
fixture_init(fixture_t * fixture,gconstpointer user_data)60 fixture_init (fixture_t *fixture, gconstpointer user_data)
61 {
62 hb_buffer_t *b;
63 unsigned int i;
64
65 b = fixture->buffer = hb_buffer_create ();
66
67 switch (GPOINTER_TO_INT (user_data))
68 {
69 case BUFFER_EMPTY:
70 break;
71
72 case BUFFER_ONE_BY_ONE:
73 for (i = 1; i < G_N_ELEMENTS (utf32) - 1; i++)
74 hb_buffer_add (b, utf32[i], i);
75 break;
76
77 case BUFFER_UTF32:
78 hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
79 break;
80
81 case BUFFER_UTF16:
82 hb_buffer_add_utf16 (b, utf16, G_N_ELEMENTS (utf16), 1, G_N_ELEMENTS (utf16) - 2);
83 break;
84
85 case BUFFER_UTF8:
86 hb_buffer_add_utf8 (b, utf8, G_N_ELEMENTS (utf8), 1, G_N_ELEMENTS (utf8) - 2);
87 break;
88
89 default:
90 g_assert_not_reached ();
91 }
92 }
93
94 static void
fixture_finish(fixture_t * fixture,gconstpointer user_data HB_UNUSED)95 fixture_finish (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
96 {
97 hb_buffer_destroy (fixture->buffer);
98 }
99
100
101 static void
test_buffer_properties(fixture_t * fixture,gconstpointer user_data HB_UNUSED)102 test_buffer_properties (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
103 {
104 hb_buffer_t *b = fixture->buffer;
105 hb_unicode_funcs_t *ufuncs;
106
107 /* test default properties */
108
109 g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
110 g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
111 g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
112 g_assert (hb_buffer_get_language (b) == NULL);
113
114
115 /* test property changes are retained */
116 ufuncs = hb_unicode_funcs_create (NULL);
117 hb_buffer_set_unicode_funcs (b, ufuncs);
118 hb_unicode_funcs_destroy (ufuncs);
119 g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
120
121 hb_buffer_set_direction (b, HB_DIRECTION_RTL);
122 g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
123
124 hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
125 g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
126
127 hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
128 g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
129
130 hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
131 g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
132
133 hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
134 g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
135
136
137 /* test clear_contents clears all these properties: */
138
139 hb_buffer_clear_contents (b);
140
141 g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
142 g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
143 g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
144 g_assert (hb_buffer_get_language (b) == NULL);
145
146 /* but not these: */
147
148 g_assert (hb_buffer_get_flags (b) != HB_BUFFER_FLAGS_DEFAULT);
149 g_assert (hb_buffer_get_replacement_codepoint (b) != HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
150
151
152 /* test reset clears all properties */
153
154 hb_buffer_set_direction (b, HB_DIRECTION_RTL);
155 g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
156
157 hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
158 g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
159
160 hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
161 g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
162
163 hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
164 g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
165
166 hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
167 g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
168
169 hb_buffer_reset (b);
170
171 g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
172 g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
173 g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
174 g_assert (hb_buffer_get_language (b) == NULL);
175 g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
176 g_assert (hb_buffer_get_replacement_codepoint (b) == HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
177 }
178
179 static void
test_buffer_contents(fixture_t * fixture,gconstpointer user_data)180 test_buffer_contents (fixture_t *fixture, gconstpointer user_data)
181 {
182 hb_buffer_t *b = fixture->buffer;
183 unsigned int i, len, len2;
184 buffer_type_t buffer_type = GPOINTER_TO_INT (user_data);
185 hb_glyph_info_t *glyphs;
186
187 if (buffer_type == BUFFER_EMPTY) {
188 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
189 return;
190 }
191
192 len = hb_buffer_get_length (b);
193 hb_buffer_get_glyph_infos (b, NULL); /* test NULL */
194 glyphs = hb_buffer_get_glyph_infos (b, &len2);
195 g_assert_cmpint (len, ==, len2);
196 g_assert_cmpint (len, ==, 5);
197
198 for (i = 0; i < len; i++) {
199 g_assert_cmphex (glyphs[i].mask, ==, 0);
200 g_assert_cmphex (glyphs[i].var1.u32, ==, 0);
201 g_assert_cmphex (glyphs[i].var2.u32, ==, 0);
202 }
203
204 for (i = 0; i < len; i++) {
205 unsigned int cluster;
206 cluster = 1+i;
207 if (i >= 2) {
208 if (buffer_type == BUFFER_UTF16)
209 cluster++;
210 else if (buffer_type == BUFFER_UTF8)
211 cluster += 3;
212 }
213 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
214 g_assert_cmphex (glyphs[i].cluster, ==, cluster);
215 }
216
217 /* reverse, test, and reverse back */
218
219 hb_buffer_reverse (b);
220 for (i = 0; i < len; i++)
221 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
222
223 hb_buffer_reverse (b);
224 for (i = 0; i < len; i++)
225 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
226
227 /* reverse_clusters works same as reverse for now since each codepoint is
228 * in its own cluster */
229
230 hb_buffer_reverse_clusters (b);
231 for (i = 0; i < len; i++)
232 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
233
234 hb_buffer_reverse_clusters (b);
235 for (i = 0; i < len; i++)
236 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
237
238 /* now form a cluster and test again */
239 glyphs[2].cluster = glyphs[1].cluster;
240
241 /* reverse, test, and reverse back */
242
243 hb_buffer_reverse (b);
244 for (i = 0; i < len; i++)
245 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
246
247 hb_buffer_reverse (b);
248 for (i = 0; i < len; i++)
249 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
250
251 /* reverse_clusters twice still should return the original string,
252 * but when applied once, the 1-2 cluster should be retained. */
253
254 hb_buffer_reverse_clusters (b);
255 for (i = 0; i < len; i++) {
256 unsigned int j = len-1-i;
257 if (j == 1)
258 j = 2;
259 else if (j == 2)
260 j = 1;
261 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+j]);
262 }
263
264 hb_buffer_reverse_clusters (b);
265 for (i = 0; i < len; i++)
266 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
267
268
269 /* test setting length */
270
271 /* enlarge */
272 g_assert (hb_buffer_set_length (b, 10));
273 glyphs = hb_buffer_get_glyph_infos (b, NULL);
274 g_assert_cmpint (hb_buffer_get_length (b), ==, 10);
275 for (i = 0; i < 5; i++)
276 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
277 for (i = 5; i < 10; i++)
278 g_assert_cmphex (glyphs[i].codepoint, ==, 0);
279 /* shrink */
280 g_assert (hb_buffer_set_length (b, 3));
281 glyphs = hb_buffer_get_glyph_infos (b, NULL);
282 g_assert_cmpint (hb_buffer_get_length (b), ==, 3);
283 for (i = 0; i < 3; i++)
284 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
285
286
287 g_assert (hb_buffer_allocation_successful (b));
288
289
290 /* test reset clears content */
291
292 hb_buffer_reset (b);
293 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
294 }
295
296 static void
test_buffer_positions(fixture_t * fixture,gconstpointer user_data HB_UNUSED)297 test_buffer_positions (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
298 {
299 hb_buffer_t *b = fixture->buffer;
300 unsigned int i, len, len2;
301 hb_glyph_position_t *positions;
302
303 /* Without shaping, positions should all be zero */
304 len = hb_buffer_get_length (b);
305 hb_buffer_get_glyph_positions (b, NULL); /* test NULL */
306 positions = hb_buffer_get_glyph_positions (b, &len2);
307 g_assert_cmpint (len, ==, len2);
308 for (i = 0; i < len; i++) {
309 g_assert_cmpint (0, ==, positions[i].x_advance);
310 g_assert_cmpint (0, ==, positions[i].y_advance);
311 g_assert_cmpint (0, ==, positions[i].x_offset);
312 g_assert_cmpint (0, ==, positions[i].y_offset);
313 g_assert_cmpint (0, ==, positions[i].var.i32);
314 }
315
316 /* test reset clears content */
317 hb_buffer_reset (b);
318 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
319 }
320
321 static void
test_buffer_allocation(fixture_t * fixture,gconstpointer user_data HB_UNUSED)322 test_buffer_allocation (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
323 {
324 hb_buffer_t *b = fixture->buffer;
325
326 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
327
328 g_assert (hb_buffer_pre_allocate (b, 100));
329 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
330 g_assert (hb_buffer_allocation_successful (b));
331
332 /* lets try a huge allocation, make sure it fails */
333 g_assert (!hb_buffer_pre_allocate (b, (unsigned int) -1));
334 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
335 g_assert (!hb_buffer_allocation_successful (b));
336
337 /* small one again */
338 g_assert (hb_buffer_pre_allocate (b, 50));
339 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
340 g_assert (!hb_buffer_allocation_successful (b));
341
342 hb_buffer_reset (b);
343 g_assert (hb_buffer_allocation_successful (b));
344
345 /* all allocation and size */
346 g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 + 1));
347 g_assert (!hb_buffer_allocation_successful (b));
348
349 hb_buffer_reset (b);
350 g_assert (hb_buffer_allocation_successful (b));
351
352 /* technically, this one can actually pass on 64bit machines, but
353 * I'm doubtful that any malloc allows 4GB allocations at a time.
354 * But let's only enable it on a 32-bit machine. */
355 if (sizeof (long) == 4) {
356 g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 - 1));
357 g_assert (!hb_buffer_allocation_successful (b));
358 }
359
360 hb_buffer_reset (b);
361 g_assert (hb_buffer_allocation_successful (b));
362 }
363
364
365 typedef struct {
366 const char utf8[8];
367 const uint32_t codepoints[8];
368 } utf8_conversion_test_t;
369
370 /* note: we skip the first and last byte when adding to buffer */
371 static const utf8_conversion_test_t utf8_conversion_tests[] = {
372 {"a\303\207", {(hb_codepoint_t) -1}},
373 {"a\303\207b", {0xC7}},
374 {"ab\303cd", {'b', (hb_codepoint_t) -1, 'c'}},
375 {"ab\303\302\301cd", {'b', (hb_codepoint_t) -1, (hb_codepoint_t) -1, (hb_codepoint_t) -1, 'c'}}
376 };
377
378 static void
test_buffer_utf8_conversion(void)379 test_buffer_utf8_conversion (void)
380 {
381 hb_buffer_t *b;
382 unsigned int chars, i, j, len;
383
384 b = hb_buffer_create ();
385 hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
386
387 for (i = 0; i < G_N_ELEMENTS (utf8_conversion_tests); i++)
388 {
389 unsigned int bytes;
390 hb_glyph_info_t *glyphs;
391 const utf8_conversion_test_t *test = &utf8_conversion_tests[i];
392 char *escaped;
393
394 escaped = g_strescape (test->utf8, NULL);
395 g_test_message ("UTF-8 test #%d: %s", i, escaped);
396 g_free (escaped);
397
398 bytes = strlen (test->utf8);
399 for (chars = 0; test->codepoints[chars]; chars++)
400 ;
401
402 hb_buffer_clear_contents (b);
403 hb_buffer_add_utf8 (b, test->utf8, bytes, 1, bytes - 2);
404
405 glyphs = hb_buffer_get_glyph_infos (b, &len);
406 g_assert_cmpint (len, ==, chars);
407 for (j = 0; j < chars; j++)
408 g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
409 }
410
411 hb_buffer_destroy (b);
412 }
413
414
415
416 /* Following test table is adapted from glib/glib/tests/utf8-validate.c
417 * with relicensing permission from Matthias Clasen. */
418
419 typedef struct {
420 const char *utf8;
421 int max_len;
422 unsigned int offset;
423 gboolean valid;
424 } utf8_validity_test_t;
425
426 static const utf8_validity_test_t utf8_validity_tests[] = {
427 /* some tests to check max_len handling */
428 /* length 1 */
429 { "abcde", -1, 5, TRUE },
430 { "abcde", 3, 3, TRUE },
431 { "abcde", 5, 5, TRUE },
432 /* length 2 */
433 { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE },
434 { "\xc2\xa9\xc2\xa9\xc2\xa9", 1, 0, FALSE },
435 { "\xc2\xa9\xc2\xa9\xc2\xa9", 2, 2, TRUE },
436 { "\xc2\xa9\xc2\xa9\xc2\xa9", 3, 2, FALSE },
437 { "\xc2\xa9\xc2\xa9\xc2\xa9", 4, 4, TRUE },
438 { "\xc2\xa9\xc2\xa9\xc2\xa9", 5, 4, FALSE },
439 { "\xc2\xa9\xc2\xa9\xc2\xa9", 6, 6, TRUE },
440 /* length 3 */
441 { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
442 { "\xe2\x89\xa0\xe2\x89\xa0", 1, 0, FALSE },
443 { "\xe2\x89\xa0\xe2\x89\xa0", 2, 0, FALSE },
444 { "\xe2\x89\xa0\xe2\x89\xa0", 3, 3, TRUE },
445 { "\xe2\x89\xa0\xe2\x89\xa0", 4, 3, FALSE },
446 { "\xe2\x89\xa0\xe2\x89\xa0", 5, 3, FALSE },
447 { "\xe2\x89\xa0\xe2\x89\xa0", 6, 6, TRUE },
448
449 /* examples from https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
450 /* greek 'kosme' */
451 { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
452 /* first sequence of each length */
453 { "\x00", -1, 0, TRUE },
454 { "\xc2\x80", -1, 2, TRUE },
455 { "\xe0\xa0\x80", -1, 3, TRUE },
456 { "\xf0\x90\x80\x80", -1, 4, TRUE },
457 { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
458 { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
459 /* last sequence of each length */
460 { "\x7f", -1, 1, TRUE },
461 { "\xdf\xbf", -1, 2, TRUE },
462 { "\xef\xbf\xbf", -1, 0, TRUE },
463 { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
464 { "\xf4\x90\xbf\xbf", -1, 0, FALSE },
465 { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
466 { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
467 { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
468 /* other boundary conditions */
469 { "\xed\x9f\xbf", -1, 3, TRUE },
470 { "\xed\xa0\x80", -1, 0, FALSE },
471 { "\xed\xbf\xbf", -1, 0, FALSE },
472 { "\xee\x80\x80", -1, 3, TRUE },
473 { "\xef\xbf\xbd", -1, 3, TRUE },
474 { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
475 /* malformed sequences */
476 /* continuation bytes */
477 { "\x80", -1, 0, FALSE },
478 { "\xbf", -1, 0, FALSE },
479 { "\x80\xbf", -1, 0, FALSE },
480 { "\x80\xbf\x80", -1, 0, FALSE },
481 { "\x80\xbf\x80\xbf", -1, 0, FALSE },
482 { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
483 { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
484 { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
485
486 /* all possible continuation byte */
487 { "\x80", -1, 0, FALSE },
488 { "\x81", -1, 0, FALSE },
489 { "\x82", -1, 0, FALSE },
490 { "\x83", -1, 0, FALSE },
491 { "\x84", -1, 0, FALSE },
492 { "\x85", -1, 0, FALSE },
493 { "\x86", -1, 0, FALSE },
494 { "\x87", -1, 0, FALSE },
495 { "\x88", -1, 0, FALSE },
496 { "\x89", -1, 0, FALSE },
497 { "\x8a", -1, 0, FALSE },
498 { "\x8b", -1, 0, FALSE },
499 { "\x8c", -1, 0, FALSE },
500 { "\x8d", -1, 0, FALSE },
501 { "\x8e", -1, 0, FALSE },
502 { "\x8f", -1, 0, FALSE },
503 { "\x90", -1, 0, FALSE },
504 { "\x91", -1, 0, FALSE },
505 { "\x92", -1, 0, FALSE },
506 { "\x93", -1, 0, FALSE },
507 { "\x94", -1, 0, FALSE },
508 { "\x95", -1, 0, FALSE },
509 { "\x96", -1, 0, FALSE },
510 { "\x97", -1, 0, FALSE },
511 { "\x98", -1, 0, FALSE },
512 { "\x99", -1, 0, FALSE },
513 { "\x9a", -1, 0, FALSE },
514 { "\x9b", -1, 0, FALSE },
515 { "\x9c", -1, 0, FALSE },
516 { "\x9d", -1, 0, FALSE },
517 { "\x9e", -1, 0, FALSE },
518 { "\x9f", -1, 0, FALSE },
519 { "\xa0", -1, 0, FALSE },
520 { "\xa1", -1, 0, FALSE },
521 { "\xa2", -1, 0, FALSE },
522 { "\xa3", -1, 0, FALSE },
523 { "\xa4", -1, 0, FALSE },
524 { "\xa5", -1, 0, FALSE },
525 { "\xa6", -1, 0, FALSE },
526 { "\xa7", -1, 0, FALSE },
527 { "\xa8", -1, 0, FALSE },
528 { "\xa9", -1, 0, FALSE },
529 { "\xaa", -1, 0, FALSE },
530 { "\xab", -1, 0, FALSE },
531 { "\xac", -1, 0, FALSE },
532 { "\xad", -1, 0, FALSE },
533 { "\xae", -1, 0, FALSE },
534 { "\xaf", -1, 0, FALSE },
535 { "\xb0", -1, 0, FALSE },
536 { "\xb1", -1, 0, FALSE },
537 { "\xb2", -1, 0, FALSE },
538 { "\xb3", -1, 0, FALSE },
539 { "\xb4", -1, 0, FALSE },
540 { "\xb5", -1, 0, FALSE },
541 { "\xb6", -1, 0, FALSE },
542 { "\xb7", -1, 0, FALSE },
543 { "\xb8", -1, 0, FALSE },
544 { "\xb9", -1, 0, FALSE },
545 { "\xba", -1, 0, FALSE },
546 { "\xbb", -1, 0, FALSE },
547 { "\xbc", -1, 0, FALSE },
548 { "\xbd", -1, 0, FALSE },
549 { "\xbe", -1, 0, FALSE },
550 { "\xbf", -1, 0, FALSE },
551 /* lone start characters */
552 { "\xc0\x20", -1, 0, FALSE },
553 { "\xc1\x20", -1, 0, FALSE },
554 { "\xc2\x20", -1, 0, FALSE },
555 { "\xc3\x20", -1, 0, FALSE },
556 { "\xc4\x20", -1, 0, FALSE },
557 { "\xc5\x20", -1, 0, FALSE },
558 { "\xc6\x20", -1, 0, FALSE },
559 { "\xc7\x20", -1, 0, FALSE },
560 { "\xc8\x20", -1, 0, FALSE },
561 { "\xc9\x20", -1, 0, FALSE },
562 { "\xca\x20", -1, 0, FALSE },
563 { "\xcb\x20", -1, 0, FALSE },
564 { "\xcc\x20", -1, 0, FALSE },
565 { "\xcd\x20", -1, 0, FALSE },
566 { "\xce\x20", -1, 0, FALSE },
567 { "\xcf\x20", -1, 0, FALSE },
568 { "\xd0\x20", -1, 0, FALSE },
569 { "\xd1\x20", -1, 0, FALSE },
570 { "\xd2\x20", -1, 0, FALSE },
571 { "\xd3\x20", -1, 0, FALSE },
572 { "\xd4\x20", -1, 0, FALSE },
573 { "\xd5\x20", -1, 0, FALSE },
574 { "\xd6\x20", -1, 0, FALSE },
575 { "\xd7\x20", -1, 0, FALSE },
576 { "\xd8\x20", -1, 0, FALSE },
577 { "\xd9\x20", -1, 0, FALSE },
578 { "\xda\x20", -1, 0, FALSE },
579 { "\xdb\x20", -1, 0, FALSE },
580 { "\xdc\x20", -1, 0, FALSE },
581 { "\xdd\x20", -1, 0, FALSE },
582 { "\xde\x20", -1, 0, FALSE },
583 { "\xdf\x20", -1, 0, FALSE },
584 { "\xe0\x20", -1, 0, FALSE },
585 { "\xe1\x20", -1, 0, FALSE },
586 { "\xe2\x20", -1, 0, FALSE },
587 { "\xe3\x20", -1, 0, FALSE },
588 { "\xe4\x20", -1, 0, FALSE },
589 { "\xe5\x20", -1, 0, FALSE },
590 { "\xe6\x20", -1, 0, FALSE },
591 { "\xe7\x20", -1, 0, FALSE },
592 { "\xe8\x20", -1, 0, FALSE },
593 { "\xe9\x20", -1, 0, FALSE },
594 { "\xea\x20", -1, 0, FALSE },
595 { "\xeb\x20", -1, 0, FALSE },
596 { "\xec\x20", -1, 0, FALSE },
597 { "\xed\x20", -1, 0, FALSE },
598 { "\xee\x20", -1, 0, FALSE },
599 { "\xef\x20", -1, 0, FALSE },
600 { "\xf0\x20", -1, 0, FALSE },
601 { "\xf1\x20", -1, 0, FALSE },
602 { "\xf2\x20", -1, 0, FALSE },
603 { "\xf3\x20", -1, 0, FALSE },
604 { "\xf4\x20", -1, 0, FALSE },
605 { "\xf5\x20", -1, 0, FALSE },
606 { "\xf6\x20", -1, 0, FALSE },
607 { "\xf7\x20", -1, 0, FALSE },
608 { "\xf8\x20", -1, 0, FALSE },
609 { "\xf9\x20", -1, 0, FALSE },
610 { "\xfa\x20", -1, 0, FALSE },
611 { "\xfb\x20", -1, 0, FALSE },
612 { "\xfc\x20", -1, 0, FALSE },
613 { "\xfd\x20", -1, 0, FALSE },
614 /* missing continuation bytes */
615 { "\x20\xc0", -1, 1, FALSE },
616 { "\x20\xe0\x80", -1, 1, FALSE },
617 { "\x20\xf0\x80\x80", -1, 1, FALSE },
618 { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
619 { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
620 { "\x20\xdf", -1, 1, FALSE },
621 { "\x20\xef\xbf", -1, 1, FALSE },
622 { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
623 { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
624 { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
625 /* impossible bytes */
626 { "\x20\xfe\x20", -1, 1, FALSE },
627 { "\x20\xff\x20", -1, 1, FALSE },
628 /* overlong sequences */
629 { "\x20\xc0\xaf\x20", -1, 1, FALSE },
630 { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
631 { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
632 { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
633 { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
634 { "\x20\xc1\xbf\x20", -1, 1, FALSE },
635 { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
636 { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
637 { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
638 { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
639 { "\x20\xc0\x80\x20", -1, 1, FALSE },
640 { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
641 { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
642 { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
643 { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
644 /* illegal code positions */
645 { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
646 { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
647 { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
648 { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
649 { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
650 { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
651 { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
652 { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
653 { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
654 { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
655 { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
656 { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
657 { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
658 { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
659 { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
660 #if 0 /* We don't consider U+FFFE / U+FFFF and similar invalid. */
661 { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
662 { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
663 #endif
664 { "", -1, 0, TRUE }
665 };
666
667 static void
test_buffer_utf8_validity(void)668 test_buffer_utf8_validity (void)
669 {
670 hb_buffer_t *b;
671 unsigned int i;
672
673 b = hb_buffer_create ();
674 hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
675
676 for (i = 0; i < G_N_ELEMENTS (utf8_validity_tests); i++)
677 {
678 const utf8_validity_test_t *test = &utf8_validity_tests[i];
679 unsigned int text_bytes, segment_bytes, j, len;
680 hb_glyph_info_t *glyphs;
681 char *escaped;
682
683 escaped = g_strescape (test->utf8, NULL);
684 g_test_message ("UTF-8 test #%d: %s", i, escaped);
685 g_free (escaped);
686
687 text_bytes = strlen (test->utf8);
688 if (test->max_len == -1)
689 segment_bytes = text_bytes;
690 else
691 segment_bytes = test->max_len;
692
693 hb_buffer_clear_contents (b);
694 hb_buffer_add_utf8 (b, test->utf8, text_bytes, 0, segment_bytes);
695
696 glyphs = hb_buffer_get_glyph_infos (b, &len);
697 for (j = 0; j < len; j++)
698 if (glyphs[j].codepoint == (hb_codepoint_t) -1)
699 break;
700
701 g_assert (test->valid ? j == len : j < len);
702 if (!test->valid)
703 g_assert (glyphs[j].cluster == test->offset);
704 }
705
706 hb_buffer_destroy (b);
707 }
708
709
710 typedef struct {
711 const uint16_t utf16[8];
712 const uint32_t codepoints[8];
713 } utf16_conversion_test_t;
714
715 /* note: we skip the first and last item from utf16 when adding to buffer */
716 static const utf16_conversion_test_t utf16_conversion_tests[] = {
717 {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
718 {{0x41, 0xD800, 0xDF02, 0x61}, {0x10302}},
719 {{0x41, 0xD800, 0xDF02}, {(hb_codepoint_t) -1}},
720 {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, (hb_codepoint_t) -1}},
721 {{0x41, 0xD800, 0x61, 0xDF02}, {(hb_codepoint_t) -1, 0x61}},
722 {{0x41, 0xDF00, 0x61}, {(hb_codepoint_t) -1}},
723 {{0x41, 0x61}, {0}}
724 };
725
726 static void
test_buffer_utf16_conversion(void)727 test_buffer_utf16_conversion (void)
728 {
729 hb_buffer_t *b;
730 unsigned int i;
731
732 b = hb_buffer_create ();
733 hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
734
735 for (i = 0; i < G_N_ELEMENTS (utf16_conversion_tests); i++)
736 {
737 const utf16_conversion_test_t *test = &utf16_conversion_tests[i];
738 unsigned int u_len, chars, j, len;
739 hb_glyph_info_t *glyphs;
740
741 g_test_message ("UTF-16 test #%d", i);
742
743 for (u_len = 0; test->utf16[u_len]; u_len++)
744 ;
745 for (chars = 0; test->codepoints[chars]; chars++)
746 ;
747
748 hb_buffer_clear_contents (b);
749 hb_buffer_add_utf16 (b, test->utf16, u_len, 1, u_len - 2);
750
751 glyphs = hb_buffer_get_glyph_infos (b, &len);
752 g_assert_cmpint (len, ==, chars);
753 for (j = 0; j < chars; j++)
754 g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
755 }
756
757 hb_buffer_destroy (b);
758 }
759
760
761 typedef struct {
762 const uint32_t utf32[8];
763 const uint32_t codepoints[8];
764 } utf32_conversion_test_t;
765
766 /* note: we skip the first and last item from utf32 when adding to buffer */
767 static const utf32_conversion_test_t utf32_conversion_tests[] = {
768 {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, (hb_codepoint_t) -3, (hb_codepoint_t) -3}},
769 {{0x41, 0x004D, 0x0430, 0x4E8C, 0x10302, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
770 {{0x41, 0xD800, 0xDF02, 0x61}, {(hb_codepoint_t) -3, (hb_codepoint_t) -3}},
771 {{0x41, 0xD800, 0xDF02}, {(hb_codepoint_t) -3}},
772 {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, (hb_codepoint_t) -3}},
773 {{0x41, 0xD800, 0x61, 0xDF02}, {(hb_codepoint_t) -3, 0x61}},
774 {{0x41, 0xDF00, 0x61}, {(hb_codepoint_t) -3}},
775 {{0x41, 0x10FFFF, 0x61}, {0x10FFFF}},
776 {{0x41, 0x110000, 0x61}, {(hb_codepoint_t) -3}},
777 {{0x41, 0x61}, {0}}
778 };
779
780 static void
test_buffer_utf32_conversion(void)781 test_buffer_utf32_conversion (void)
782 {
783 hb_buffer_t *b;
784 unsigned int i;
785
786 b = hb_buffer_create ();
787 hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -3);
788
789 for (i = 0; i < G_N_ELEMENTS (utf32_conversion_tests); i++)
790 {
791 const utf32_conversion_test_t *test = &utf32_conversion_tests[i];
792 unsigned int u_len, chars, j, len;
793 hb_glyph_info_t *glyphs;
794
795 g_test_message ("UTF-32 test #%d", i);
796
797 for (u_len = 0; test->utf32[u_len]; u_len++)
798 ;
799 for (chars = 0; test->codepoints[chars]; chars++)
800 ;
801
802 hb_buffer_clear_contents (b);
803 hb_buffer_add_utf32 (b, test->utf32, u_len, 1, u_len - 2);
804
805 glyphs = hb_buffer_get_glyph_infos (b, &len);
806 g_assert_cmpint (len, ==, chars);
807 for (j = 0; j < chars; j++)
808 g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
809 }
810
811 hb_buffer_destroy (b);
812 }
813
814
815 static void
test_empty(hb_buffer_t * b)816 test_empty (hb_buffer_t *b)
817 {
818 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
819 g_assert (!hb_buffer_get_glyph_infos (b, NULL));
820 g_assert (!hb_buffer_get_glyph_positions (b, NULL));
821 }
822
823 static void
test_buffer_empty(void)824 test_buffer_empty (void)
825 {
826 hb_buffer_t *b = hb_buffer_get_empty ();
827
828 g_assert (hb_buffer_get_empty ());
829 g_assert (hb_buffer_get_empty () == b);
830
831 g_assert (!hb_buffer_allocation_successful (b));
832
833 test_empty (b);
834
835 hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
836
837 test_empty (b);
838
839 hb_buffer_reverse (b);
840 hb_buffer_reverse_clusters (b);
841
842 g_assert (!hb_buffer_set_length (b, 10));
843
844 test_empty (b);
845
846 g_assert (hb_buffer_set_length (b, 0));
847
848 test_empty (b);
849
850 g_assert (!hb_buffer_allocation_successful (b));
851
852 hb_buffer_reset (b);
853
854 test_empty (b);
855
856 g_assert (!hb_buffer_allocation_successful (b));
857 }
858
859 typedef struct {
860 const char *contents;
861 hb_buffer_serialize_format_t format;
862 unsigned int num_items;
863 hb_bool_t success;
864 } serialization_test_t;
865
866 static const serialization_test_t serialization_tests[] = {
867 { "<U+0640=0|U+0635=1>", HB_BUFFER_SERIALIZE_FORMAT_TEXT, 2, 1 },
868 { "[{\"u\":1600,\"cl\":0},{\"u\":1589,\"cl\":1}]", HB_BUFFER_SERIALIZE_FORMAT_JSON, 2, 1 },
869
870 /* Mixed glyphs/Unicodes -> parse fail */
871 { "[{\"u\":1600,\"cl\":0},{\"g\":1589,\"cl\":1}]", HB_BUFFER_SERIALIZE_FORMAT_JSON, 0, 0 },
872 { "<U+0640=0|uni0635=1>", HB_BUFFER_SERIALIZE_FORMAT_TEXT, 0, 0 },
873 };
874
875 static void
test_buffer_serialize_deserialize(void)876 test_buffer_serialize_deserialize (void)
877 {
878 hb_buffer_t *b;
879 unsigned int i;
880
881 for (i = 0; i < G_N_ELEMENTS (serialization_tests); i++)
882 {
883 unsigned int consumed;
884 char round_trip[1024];
885
886 b = hb_buffer_create ();
887 hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
888
889 const serialization_test_t *test = &serialization_tests[i];
890 g_test_message ("serialize test #%d", i);
891
892 (void) hb_buffer_deserialize_unicode (b, test->contents, -1, NULL, test->format);
893
894 // Expected parse failure, got one, don't round-trip
895 if (test->success != 0)
896 {
897 unsigned int num_glyphs = hb_buffer_get_length (b);
898 g_assert_cmpint (num_glyphs, ==, test->num_items);
899
900 hb_buffer_serialize_unicode (b, 0, num_glyphs, round_trip,
901 sizeof(round_trip), &consumed, test->format,
902 HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
903 g_assert_cmpstr (round_trip, ==, test->contents);
904 }
905
906 hb_buffer_destroy (b);
907
908 }
909
910 char test[1024];
911 unsigned int consumed;
912 hb_buffer_t *indeterminate = hb_buffer_get_empty ();
913 hb_buffer_serialize (indeterminate, 0, (unsigned) -1,
914 test, sizeof(test), &consumed, NULL,
915 HB_BUFFER_SERIALIZE_FORMAT_JSON,
916 HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
917 g_assert_cmpstr ( test, ==, "[]");
918
919 hb_buffer_serialize (indeterminate, 0, (unsigned) - 1,
920 test, sizeof(test), &consumed, NULL,
921 HB_BUFFER_SERIALIZE_FORMAT_TEXT,
922 HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
923 g_assert_cmpstr ( test, ==, "!!");
924
925 }
926
927 int
main(int argc,char ** argv)928 main (int argc, char **argv)
929 {
930 unsigned int i;
931
932 hb_test_init (&argc, &argv);
933
934 for (i = 0; i < BUFFER_NUM_TYPES; i++)
935 {
936 const void *buffer_type = GINT_TO_POINTER (i);
937 const char *buffer_name = buffer_names[i];
938
939 hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_properties);
940 hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_contents);
941 hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_positions);
942 }
943
944 hb_test_add_fixture (fixture, GINT_TO_POINTER (BUFFER_EMPTY), test_buffer_allocation);
945
946 hb_test_add (test_buffer_utf8_conversion);
947 hb_test_add (test_buffer_utf8_validity);
948 hb_test_add (test_buffer_utf16_conversion);
949 hb_test_add (test_buffer_utf32_conversion);
950 hb_test_add (test_buffer_empty);
951 hb_test_add (test_buffer_serialize_deserialize);
952
953 return hb_test_run();
954 }
955