• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2011  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #include "hb-test.h"
28 
29 /* Unit tests for hb-buffer.h */
30 
31 
32 static const char utf8[10] = "ab\360\240\200\200defg";
33 static const uint16_t utf16[8] = {'a', 'b', 0xD840, 0xDC00, 'd', 'e', 'f', 'g'};
34 static const uint32_t utf32[7] = {'a', 'b', 0x20000, 'd', 'e', 'f', 'g'};
35 
36 
37 typedef enum {
38   BUFFER_EMPTY,
39   BUFFER_ONE_BY_ONE,
40   BUFFER_UTF32,
41   BUFFER_UTF16,
42   BUFFER_UTF8,
43   BUFFER_NUM_TYPES,
44 } buffer_type_t;
45 
46 static const char *buffer_names[] = {
47   "empty",
48   "one-by-one",
49   "utf32",
50   "utf16",
51   "utf8"
52 };
53 
54 typedef struct
55 {
56   hb_buffer_t *buffer;
57 } fixture_t;
58 
59 static void
fixture_init(fixture_t * fixture,gconstpointer user_data)60 fixture_init (fixture_t *fixture, gconstpointer user_data)
61 {
62   hb_buffer_t *b;
63   unsigned int i;
64 
65   b = fixture->buffer = hb_buffer_create ();
66 
67   switch (GPOINTER_TO_INT (user_data))
68   {
69     case BUFFER_EMPTY:
70       break;
71 
72     case BUFFER_ONE_BY_ONE:
73       for (i = 1; i < G_N_ELEMENTS (utf32) - 1; i++)
74       hb_buffer_add (b, utf32[i], i);
75       break;
76 
77     case BUFFER_UTF32:
78       hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
79       break;
80 
81     case BUFFER_UTF16:
82       hb_buffer_add_utf16 (b, utf16, G_N_ELEMENTS (utf16), 1, G_N_ELEMENTS (utf16) - 2);
83       break;
84 
85     case BUFFER_UTF8:
86       hb_buffer_add_utf8  (b, utf8,  G_N_ELEMENTS (utf8),  1, G_N_ELEMENTS (utf8)  - 2);
87       break;
88 
89     default:
90       g_assert_not_reached ();
91   }
92 }
93 
94 static void
fixture_finish(fixture_t * fixture,gconstpointer user_data HB_UNUSED)95 fixture_finish (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
96 {
97   hb_buffer_destroy (fixture->buffer);
98 }
99 
100 
101 static void
test_buffer_properties(fixture_t * fixture,gconstpointer user_data HB_UNUSED)102 test_buffer_properties (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
103 {
104   hb_buffer_t *b = fixture->buffer;
105   hb_unicode_funcs_t *ufuncs;
106 
107   /* test default properties */
108 
109   g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
110   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
111   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
112   g_assert (hb_buffer_get_language (b) == NULL);
113 
114 
115   /* test property changes are retained */
116   ufuncs = hb_unicode_funcs_create (NULL);
117   hb_buffer_set_unicode_funcs (b, ufuncs);
118   hb_unicode_funcs_destroy (ufuncs);
119   g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
120 
121   hb_buffer_set_direction (b, HB_DIRECTION_RTL);
122   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
123 
124   hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
125   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
126 
127   hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
128   g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
129 
130   hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
131   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
132 
133   hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
134   g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
135 
136 
137   /* test clear_contents clears all these properties: */
138 
139   hb_buffer_clear_contents (b);
140 
141   g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
142   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
143   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
144   g_assert (hb_buffer_get_language (b) == NULL);
145 
146   /* but not these: */
147 
148   g_assert (hb_buffer_get_flags (b) != HB_BUFFER_FLAGS_DEFAULT);
149   g_assert (hb_buffer_get_replacement_codepoint (b) != HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
150 
151 
152   /* test reset clears all properties */
153 
154   hb_buffer_set_direction (b, HB_DIRECTION_RTL);
155   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
156 
157   hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
158   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
159 
160   hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
161   g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
162 
163   hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
164   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
165 
166   hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
167   g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
168 
169   hb_buffer_reset (b);
170 
171   g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
172   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
173   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
174   g_assert (hb_buffer_get_language (b) == NULL);
175   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
176   g_assert (hb_buffer_get_replacement_codepoint (b) == HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
177 }
178 
179 static void
test_buffer_contents(fixture_t * fixture,gconstpointer user_data)180 test_buffer_contents (fixture_t *fixture, gconstpointer user_data)
181 {
182   hb_buffer_t *b = fixture->buffer;
183   unsigned int i, len, len2;
184   buffer_type_t buffer_type = GPOINTER_TO_INT (user_data);
185   hb_glyph_info_t *glyphs;
186 
187   if (buffer_type == BUFFER_EMPTY) {
188     g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
189     return;
190   }
191 
192   len = hb_buffer_get_length (b);
193   hb_buffer_get_glyph_infos (b, NULL); /* test NULL */
194   glyphs = hb_buffer_get_glyph_infos (b, &len2);
195   g_assert_cmpint (len, ==, len2);
196   g_assert_cmpint (len, ==, 5);
197 
198   for (i = 0; i < len; i++) {
199     g_assert_cmphex (glyphs[i].mask,      ==, 0);
200     g_assert_cmphex (glyphs[i].var1.u32,  ==, 0);
201     g_assert_cmphex (glyphs[i].var2.u32,  ==, 0);
202   }
203 
204   for (i = 0; i < len; i++) {
205     unsigned int cluster;
206     cluster = 1+i;
207     if (i >= 2) {
208       if (buffer_type == BUFFER_UTF16)
209 	cluster++;
210       else if (buffer_type == BUFFER_UTF8)
211 	cluster += 3;
212     }
213     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
214     g_assert_cmphex (glyphs[i].cluster,   ==, cluster);
215   }
216 
217   /* reverse, test, and reverse back */
218 
219   hb_buffer_reverse (b);
220   for (i = 0; i < len; i++)
221     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
222 
223   hb_buffer_reverse (b);
224   for (i = 0; i < len; i++)
225     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
226 
227   /* reverse_clusters works same as reverse for now since each codepoint is
228    * in its own cluster */
229 
230   hb_buffer_reverse_clusters (b);
231   for (i = 0; i < len; i++)
232     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
233 
234   hb_buffer_reverse_clusters (b);
235   for (i = 0; i < len; i++)
236     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
237 
238   /* now form a cluster and test again */
239   glyphs[2].cluster = glyphs[1].cluster;
240 
241   /* reverse, test, and reverse back */
242 
243   hb_buffer_reverse (b);
244   for (i = 0; i < len; i++)
245     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
246 
247   hb_buffer_reverse (b);
248   for (i = 0; i < len; i++)
249     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
250 
251   /* reverse_clusters twice still should return the original string,
252    * but when applied once, the 1-2 cluster should be retained. */
253 
254   hb_buffer_reverse_clusters (b);
255   for (i = 0; i < len; i++) {
256     unsigned int j = len-1-i;
257     if (j == 1)
258       j = 2;
259     else if (j == 2)
260       j = 1;
261     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+j]);
262   }
263 
264   hb_buffer_reverse_clusters (b);
265   for (i = 0; i < len; i++)
266     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
267 
268 
269   /* test setting length */
270 
271   /* enlarge */
272   g_assert (hb_buffer_set_length (b, 10));
273   glyphs = hb_buffer_get_glyph_infos (b, NULL);
274   g_assert_cmpint (hb_buffer_get_length (b), ==, 10);
275   for (i = 0; i < 5; i++)
276     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
277   for (i = 5; i < 10; i++)
278     g_assert_cmphex (glyphs[i].codepoint, ==, 0);
279   /* shrink */
280   g_assert (hb_buffer_set_length (b, 3));
281   glyphs = hb_buffer_get_glyph_infos (b, NULL);
282   g_assert_cmpint (hb_buffer_get_length (b), ==, 3);
283   for (i = 0; i < 3; i++)
284     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
285 
286 
287   g_assert (hb_buffer_allocation_successful (b));
288 
289 
290   /* test reset clears content */
291 
292   hb_buffer_reset (b);
293   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
294 }
295 
296 static void
test_buffer_positions(fixture_t * fixture,gconstpointer user_data HB_UNUSED)297 test_buffer_positions (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
298 {
299   hb_buffer_t *b = fixture->buffer;
300   unsigned int i, len, len2;
301   hb_glyph_position_t *positions;
302 
303   /* Without shaping, positions should all be zero */
304   len = hb_buffer_get_length (b);
305   hb_buffer_get_glyph_positions (b, NULL); /* test NULL */
306   positions = hb_buffer_get_glyph_positions (b, &len2);
307   g_assert_cmpint (len, ==, len2);
308   for (i = 0; i < len; i++) {
309     g_assert_cmpint (0, ==, positions[i].x_advance);
310     g_assert_cmpint (0, ==, positions[i].y_advance);
311     g_assert_cmpint (0, ==, positions[i].x_offset);
312     g_assert_cmpint (0, ==, positions[i].y_offset);
313     g_assert_cmpint (0, ==, positions[i].var.i32);
314   }
315 
316   /* test reset clears content */
317   hb_buffer_reset (b);
318   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
319 }
320 
321 static void
test_buffer_allocation(fixture_t * fixture,gconstpointer user_data HB_UNUSED)322 test_buffer_allocation (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
323 {
324   hb_buffer_t *b = fixture->buffer;
325 
326   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
327 
328   g_assert (hb_buffer_pre_allocate (b, 100));
329   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
330   g_assert (hb_buffer_allocation_successful (b));
331 
332   /* lets try a huge allocation, make sure it fails */
333   g_assert (!hb_buffer_pre_allocate (b, (unsigned int) -1));
334   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
335   g_assert (!hb_buffer_allocation_successful (b));
336 
337   /* small one again */
338   g_assert (hb_buffer_pre_allocate (b, 50));
339   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
340   g_assert (!hb_buffer_allocation_successful (b));
341 
342   hb_buffer_reset (b);
343   g_assert (hb_buffer_allocation_successful (b));
344 
345   /* all allocation and size  */
346   g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 + 1));
347   g_assert (!hb_buffer_allocation_successful (b));
348 
349   hb_buffer_reset (b);
350   g_assert (hb_buffer_allocation_successful (b));
351 
352   /* technically, this one can actually pass on 64bit machines, but
353    * I'm doubtful that any malloc allows 4GB allocations at a time.
354    * But let's only enable it on a 32-bit machine. */
355   if (sizeof (long) == 4) {
356     g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 - 1));
357     g_assert (!hb_buffer_allocation_successful (b));
358   }
359 
360   hb_buffer_reset (b);
361   g_assert (hb_buffer_allocation_successful (b));
362 }
363 
364 
365 typedef struct {
366   const char utf8[8];
367   const uint32_t codepoints[8];
368 } utf8_conversion_test_t;
369 
370 /* note: we skip the first and last byte when adding to buffer */
371 static const utf8_conversion_test_t utf8_conversion_tests[] = {
372   {"a\303\207", {(hb_codepoint_t) -1}},
373   {"a\303\207b", {0xC7}},
374   {"ab\303cd", {'b', (hb_codepoint_t) -1, 'c'}},
375   {"ab\303\302\301cd", {'b', (hb_codepoint_t) -1, (hb_codepoint_t) -1, (hb_codepoint_t) -1, 'c'}}
376 };
377 
378 static void
test_buffer_utf8_conversion(void)379 test_buffer_utf8_conversion (void)
380 {
381   hb_buffer_t *b;
382   unsigned int chars, i, j, len;
383 
384   b = hb_buffer_create ();
385   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
386 
387   for (i = 0; i < G_N_ELEMENTS (utf8_conversion_tests); i++)
388   {
389     unsigned int bytes;
390     hb_glyph_info_t *glyphs;
391     const utf8_conversion_test_t *test = &utf8_conversion_tests[i];
392     char *escaped;
393 
394     escaped = g_strescape (test->utf8, NULL);
395     g_test_message ("UTF-8 test #%d: %s", i, escaped);
396     g_free (escaped);
397 
398     bytes = strlen (test->utf8);
399     for (chars = 0; test->codepoints[chars]; chars++)
400       ;
401 
402     hb_buffer_clear_contents (b);
403     hb_buffer_add_utf8 (b, test->utf8, bytes,  1, bytes - 2);
404 
405     glyphs = hb_buffer_get_glyph_infos (b, &len);
406     g_assert_cmpint (len, ==, chars);
407     for (j = 0; j < chars; j++)
408       g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
409   }
410 
411   hb_buffer_destroy (b);
412 }
413 
414 
415 
416 /* Following test table is adapted from glib/glib/tests/utf8-validate.c
417  * with relicensing permission from Matthias Clasen. */
418 
419 typedef struct {
420   const char *utf8;
421   int max_len;
422   unsigned int offset;
423   gboolean valid;
424 } utf8_validity_test_t;
425 
426 static const utf8_validity_test_t utf8_validity_tests[] = {
427   /* some tests to check max_len handling */
428   /* length 1 */
429   { "abcde", -1, 5, TRUE },
430   { "abcde", 3, 3, TRUE },
431   { "abcde", 5, 5, TRUE },
432   /* length 2 */
433   { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE },
434   { "\xc2\xa9\xc2\xa9\xc2\xa9",  1, 0, FALSE },
435   { "\xc2\xa9\xc2\xa9\xc2\xa9",  2, 2, TRUE },
436   { "\xc2\xa9\xc2\xa9\xc2\xa9",  3, 2, FALSE },
437   { "\xc2\xa9\xc2\xa9\xc2\xa9",  4, 4, TRUE },
438   { "\xc2\xa9\xc2\xa9\xc2\xa9",  5, 4, FALSE },
439   { "\xc2\xa9\xc2\xa9\xc2\xa9",  6, 6, TRUE },
440   /* length 3 */
441   { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
442   { "\xe2\x89\xa0\xe2\x89\xa0",  1, 0, FALSE },
443   { "\xe2\x89\xa0\xe2\x89\xa0",  2, 0, FALSE },
444   { "\xe2\x89\xa0\xe2\x89\xa0",  3, 3, TRUE },
445   { "\xe2\x89\xa0\xe2\x89\xa0",  4, 3, FALSE },
446   { "\xe2\x89\xa0\xe2\x89\xa0",  5, 3, FALSE },
447   { "\xe2\x89\xa0\xe2\x89\xa0",  6, 6, TRUE },
448 
449   /* examples from https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
450   /* greek 'kosme' */
451   { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
452   /* first sequence of each length */
453   { "\x00", -1, 0, TRUE },
454   { "\xc2\x80", -1, 2, TRUE },
455   { "\xe0\xa0\x80", -1, 3, TRUE },
456   { "\xf0\x90\x80\x80", -1, 4, TRUE },
457   { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
458   { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
459   /* last sequence of each length */
460   { "\x7f", -1, 1, TRUE },
461   { "\xdf\xbf", -1, 2, TRUE },
462   { "\xef\xbf\xbf", -1, 0, TRUE },
463   { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
464   { "\xf4\x90\xbf\xbf", -1, 0, FALSE },
465   { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
466   { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
467   { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
468   /* other boundary conditions */
469   { "\xed\x9f\xbf", -1, 3, TRUE },
470   { "\xed\xa0\x80", -1, 0, FALSE },
471   { "\xed\xbf\xbf", -1, 0, FALSE },
472   { "\xee\x80\x80", -1, 3, TRUE },
473   { "\xef\xbf\xbd", -1, 3, TRUE },
474   { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
475   /* malformed sequences */
476   /* continuation bytes */
477   { "\x80", -1, 0, FALSE },
478   { "\xbf", -1, 0, FALSE },
479   { "\x80\xbf", -1, 0, FALSE },
480   { "\x80\xbf\x80", -1, 0, FALSE },
481   { "\x80\xbf\x80\xbf", -1, 0, FALSE },
482   { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
483   { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
484   { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
485 
486   /* all possible continuation byte */
487   { "\x80", -1, 0, FALSE },
488   { "\x81", -1, 0, FALSE },
489   { "\x82", -1, 0, FALSE },
490   { "\x83", -1, 0, FALSE },
491   { "\x84", -1, 0, FALSE },
492   { "\x85", -1, 0, FALSE },
493   { "\x86", -1, 0, FALSE },
494   { "\x87", -1, 0, FALSE },
495   { "\x88", -1, 0, FALSE },
496   { "\x89", -1, 0, FALSE },
497   { "\x8a", -1, 0, FALSE },
498   { "\x8b", -1, 0, FALSE },
499   { "\x8c", -1, 0, FALSE },
500   { "\x8d", -1, 0, FALSE },
501   { "\x8e", -1, 0, FALSE },
502   { "\x8f", -1, 0, FALSE },
503   { "\x90", -1, 0, FALSE },
504   { "\x91", -1, 0, FALSE },
505   { "\x92", -1, 0, FALSE },
506   { "\x93", -1, 0, FALSE },
507   { "\x94", -1, 0, FALSE },
508   { "\x95", -1, 0, FALSE },
509   { "\x96", -1, 0, FALSE },
510   { "\x97", -1, 0, FALSE },
511   { "\x98", -1, 0, FALSE },
512   { "\x99", -1, 0, FALSE },
513   { "\x9a", -1, 0, FALSE },
514   { "\x9b", -1, 0, FALSE },
515   { "\x9c", -1, 0, FALSE },
516   { "\x9d", -1, 0, FALSE },
517   { "\x9e", -1, 0, FALSE },
518   { "\x9f", -1, 0, FALSE },
519   { "\xa0", -1, 0, FALSE },
520   { "\xa1", -1, 0, FALSE },
521   { "\xa2", -1, 0, FALSE },
522   { "\xa3", -1, 0, FALSE },
523   { "\xa4", -1, 0, FALSE },
524   { "\xa5", -1, 0, FALSE },
525   { "\xa6", -1, 0, FALSE },
526   { "\xa7", -1, 0, FALSE },
527   { "\xa8", -1, 0, FALSE },
528   { "\xa9", -1, 0, FALSE },
529   { "\xaa", -1, 0, FALSE },
530   { "\xab", -1, 0, FALSE },
531   { "\xac", -1, 0, FALSE },
532   { "\xad", -1, 0, FALSE },
533   { "\xae", -1, 0, FALSE },
534   { "\xaf", -1, 0, FALSE },
535   { "\xb0", -1, 0, FALSE },
536   { "\xb1", -1, 0, FALSE },
537   { "\xb2", -1, 0, FALSE },
538   { "\xb3", -1, 0, FALSE },
539   { "\xb4", -1, 0, FALSE },
540   { "\xb5", -1, 0, FALSE },
541   { "\xb6", -1, 0, FALSE },
542   { "\xb7", -1, 0, FALSE },
543   { "\xb8", -1, 0, FALSE },
544   { "\xb9", -1, 0, FALSE },
545   { "\xba", -1, 0, FALSE },
546   { "\xbb", -1, 0, FALSE },
547   { "\xbc", -1, 0, FALSE },
548   { "\xbd", -1, 0, FALSE },
549   { "\xbe", -1, 0, FALSE },
550   { "\xbf", -1, 0, FALSE },
551   /* lone start characters */
552   { "\xc0\x20", -1, 0, FALSE },
553   { "\xc1\x20", -1, 0, FALSE },
554   { "\xc2\x20", -1, 0, FALSE },
555   { "\xc3\x20", -1, 0, FALSE },
556   { "\xc4\x20", -1, 0, FALSE },
557   { "\xc5\x20", -1, 0, FALSE },
558   { "\xc6\x20", -1, 0, FALSE },
559   { "\xc7\x20", -1, 0, FALSE },
560   { "\xc8\x20", -1, 0, FALSE },
561   { "\xc9\x20", -1, 0, FALSE },
562   { "\xca\x20", -1, 0, FALSE },
563   { "\xcb\x20", -1, 0, FALSE },
564   { "\xcc\x20", -1, 0, FALSE },
565   { "\xcd\x20", -1, 0, FALSE },
566   { "\xce\x20", -1, 0, FALSE },
567   { "\xcf\x20", -1, 0, FALSE },
568   { "\xd0\x20", -1, 0, FALSE },
569   { "\xd1\x20", -1, 0, FALSE },
570   { "\xd2\x20", -1, 0, FALSE },
571   { "\xd3\x20", -1, 0, FALSE },
572   { "\xd4\x20", -1, 0, FALSE },
573   { "\xd5\x20", -1, 0, FALSE },
574   { "\xd6\x20", -1, 0, FALSE },
575   { "\xd7\x20", -1, 0, FALSE },
576   { "\xd8\x20", -1, 0, FALSE },
577   { "\xd9\x20", -1, 0, FALSE },
578   { "\xda\x20", -1, 0, FALSE },
579   { "\xdb\x20", -1, 0, FALSE },
580   { "\xdc\x20", -1, 0, FALSE },
581   { "\xdd\x20", -1, 0, FALSE },
582   { "\xde\x20", -1, 0, FALSE },
583   { "\xdf\x20", -1, 0, FALSE },
584   { "\xe0\x20", -1, 0, FALSE },
585   { "\xe1\x20", -1, 0, FALSE },
586   { "\xe2\x20", -1, 0, FALSE },
587   { "\xe3\x20", -1, 0, FALSE },
588   { "\xe4\x20", -1, 0, FALSE },
589   { "\xe5\x20", -1, 0, FALSE },
590   { "\xe6\x20", -1, 0, FALSE },
591   { "\xe7\x20", -1, 0, FALSE },
592   { "\xe8\x20", -1, 0, FALSE },
593   { "\xe9\x20", -1, 0, FALSE },
594   { "\xea\x20", -1, 0, FALSE },
595   { "\xeb\x20", -1, 0, FALSE },
596   { "\xec\x20", -1, 0, FALSE },
597   { "\xed\x20", -1, 0, FALSE },
598   { "\xee\x20", -1, 0, FALSE },
599   { "\xef\x20", -1, 0, FALSE },
600   { "\xf0\x20", -1, 0, FALSE },
601   { "\xf1\x20", -1, 0, FALSE },
602   { "\xf2\x20", -1, 0, FALSE },
603   { "\xf3\x20", -1, 0, FALSE },
604   { "\xf4\x20", -1, 0, FALSE },
605   { "\xf5\x20", -1, 0, FALSE },
606   { "\xf6\x20", -1, 0, FALSE },
607   { "\xf7\x20", -1, 0, FALSE },
608   { "\xf8\x20", -1, 0, FALSE },
609   { "\xf9\x20", -1, 0, FALSE },
610   { "\xfa\x20", -1, 0, FALSE },
611   { "\xfb\x20", -1, 0, FALSE },
612   { "\xfc\x20", -1, 0, FALSE },
613   { "\xfd\x20", -1, 0, FALSE },
614   /* missing continuation bytes */
615   { "\x20\xc0", -1, 1, FALSE },
616   { "\x20\xe0\x80", -1, 1, FALSE },
617   { "\x20\xf0\x80\x80", -1, 1, FALSE },
618   { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
619   { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
620   { "\x20\xdf", -1, 1, FALSE },
621   { "\x20\xef\xbf", -1, 1, FALSE },
622   { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
623   { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
624   { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
625   /* impossible bytes */
626   { "\x20\xfe\x20", -1, 1, FALSE },
627   { "\x20\xff\x20", -1, 1, FALSE },
628   /* overlong sequences */
629   { "\x20\xc0\xaf\x20", -1, 1, FALSE },
630   { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
631   { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
632   { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
633   { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
634   { "\x20\xc1\xbf\x20", -1, 1, FALSE },
635   { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
636   { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
637   { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
638   { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
639   { "\x20\xc0\x80\x20", -1, 1, FALSE },
640   { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
641   { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
642   { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
643   { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
644   /* illegal code positions */
645   { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
646   { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
647   { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
648   { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
649   { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
650   { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
651   { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
652   { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
653   { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
654   { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
655   { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
656   { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
657   { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
658   { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
659   { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
660 #if 0 /* We don't consider U+FFFE / U+FFFF and similar invalid. */
661   { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
662   { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
663 #endif
664   { "", -1, 0, TRUE }
665 };
666 
667 static void
test_buffer_utf8_validity(void)668 test_buffer_utf8_validity (void)
669 {
670   hb_buffer_t *b;
671   unsigned int i;
672 
673   b = hb_buffer_create ();
674   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
675 
676   for (i = 0; i < G_N_ELEMENTS (utf8_validity_tests); i++)
677   {
678     const utf8_validity_test_t *test = &utf8_validity_tests[i];
679     unsigned int text_bytes, segment_bytes, j, len;
680     hb_glyph_info_t *glyphs;
681     char *escaped;
682 
683     escaped = g_strescape (test->utf8, NULL);
684     g_test_message ("UTF-8 test #%d: %s", i, escaped);
685     g_free (escaped);
686 
687     text_bytes = strlen (test->utf8);
688     if (test->max_len == -1)
689       segment_bytes = text_bytes;
690     else
691       segment_bytes = test->max_len;
692 
693     hb_buffer_clear_contents (b);
694     hb_buffer_add_utf8 (b, test->utf8, text_bytes,  0, segment_bytes);
695 
696     glyphs = hb_buffer_get_glyph_infos (b, &len);
697     for (j = 0; j < len; j++)
698       if (glyphs[j].codepoint == (hb_codepoint_t) -1)
699 	break;
700 
701     g_assert (test->valid ? j == len : j < len);
702     if (!test->valid)
703       g_assert (glyphs[j].cluster == test->offset);
704   }
705 
706   hb_buffer_destroy (b);
707 }
708 
709 
710 typedef struct {
711   const uint16_t utf16[8];
712   const uint32_t codepoints[8];
713 } utf16_conversion_test_t;
714 
715 /* note: we skip the first and last item from utf16 when adding to buffer */
716 static const utf16_conversion_test_t utf16_conversion_tests[] = {
717   {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
718   {{0x41, 0xD800, 0xDF02, 0x61}, {0x10302}},
719   {{0x41, 0xD800, 0xDF02}, {(hb_codepoint_t) -1}},
720   {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, (hb_codepoint_t) -1}},
721   {{0x41, 0xD800, 0x61, 0xDF02}, {(hb_codepoint_t) -1, 0x61}},
722   {{0x41, 0xDF00, 0x61}, {(hb_codepoint_t) -1}},
723   {{0x41, 0x61}, {0}}
724 };
725 
726 static void
test_buffer_utf16_conversion(void)727 test_buffer_utf16_conversion (void)
728 {
729   hb_buffer_t *b;
730   unsigned int i;
731 
732   b = hb_buffer_create ();
733   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
734 
735   for (i = 0; i < G_N_ELEMENTS (utf16_conversion_tests); i++)
736   {
737     const utf16_conversion_test_t *test = &utf16_conversion_tests[i];
738     unsigned int u_len, chars, j, len;
739     hb_glyph_info_t *glyphs;
740 
741     g_test_message ("UTF-16 test #%d", i);
742 
743     for (u_len = 0; test->utf16[u_len]; u_len++)
744       ;
745     for (chars = 0; test->codepoints[chars]; chars++)
746       ;
747 
748     hb_buffer_clear_contents (b);
749     hb_buffer_add_utf16 (b, test->utf16, u_len,  1, u_len - 2);
750 
751     glyphs = hb_buffer_get_glyph_infos (b, &len);
752     g_assert_cmpint (len, ==, chars);
753     for (j = 0; j < chars; j++)
754       g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
755   }
756 
757   hb_buffer_destroy (b);
758 }
759 
760 
761 typedef struct {
762   const uint32_t utf32[8];
763   const uint32_t codepoints[8];
764 } utf32_conversion_test_t;
765 
766 /* note: we skip the first and last item from utf32 when adding to buffer */
767 static const utf32_conversion_test_t utf32_conversion_tests[] = {
768   {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, (hb_codepoint_t) -3, (hb_codepoint_t) -3}},
769   {{0x41, 0x004D, 0x0430, 0x4E8C, 0x10302, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
770   {{0x41, 0xD800, 0xDF02, 0x61}, {(hb_codepoint_t) -3, (hb_codepoint_t) -3}},
771   {{0x41, 0xD800, 0xDF02}, {(hb_codepoint_t) -3}},
772   {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, (hb_codepoint_t) -3}},
773   {{0x41, 0xD800, 0x61, 0xDF02}, {(hb_codepoint_t) -3, 0x61}},
774   {{0x41, 0xDF00, 0x61}, {(hb_codepoint_t) -3}},
775   {{0x41, 0x10FFFF, 0x61}, {0x10FFFF}},
776   {{0x41, 0x110000, 0x61}, {(hb_codepoint_t) -3}},
777   {{0x41, 0x61}, {0}}
778 };
779 
780 static void
test_buffer_utf32_conversion(void)781 test_buffer_utf32_conversion (void)
782 {
783   hb_buffer_t *b;
784   unsigned int i;
785 
786   b = hb_buffer_create ();
787   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -3);
788 
789   for (i = 0; i < G_N_ELEMENTS (utf32_conversion_tests); i++)
790   {
791     const utf32_conversion_test_t *test = &utf32_conversion_tests[i];
792     unsigned int u_len, chars, j, len;
793     hb_glyph_info_t *glyphs;
794 
795     g_test_message ("UTF-32 test #%d", i);
796 
797     for (u_len = 0; test->utf32[u_len]; u_len++)
798       ;
799     for (chars = 0; test->codepoints[chars]; chars++)
800       ;
801 
802     hb_buffer_clear_contents (b);
803     hb_buffer_add_utf32 (b, test->utf32, u_len,  1, u_len - 2);
804 
805     glyphs = hb_buffer_get_glyph_infos (b, &len);
806     g_assert_cmpint (len, ==, chars);
807     for (j = 0; j < chars; j++)
808       g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
809   }
810 
811   hb_buffer_destroy (b);
812 }
813 
814 
815 static void
test_empty(hb_buffer_t * b)816 test_empty (hb_buffer_t *b)
817 {
818   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
819   g_assert (!hb_buffer_get_glyph_infos (b, NULL));
820   g_assert (!hb_buffer_get_glyph_positions (b, NULL));
821 }
822 
823 static void
test_buffer_empty(void)824 test_buffer_empty (void)
825 {
826   hb_buffer_t *b = hb_buffer_get_empty ();
827 
828   g_assert (hb_buffer_get_empty ());
829   g_assert (hb_buffer_get_empty () == b);
830 
831   g_assert (!hb_buffer_allocation_successful (b));
832 
833   test_empty (b);
834 
835   hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
836 
837   test_empty (b);
838 
839   hb_buffer_reverse (b);
840   hb_buffer_reverse_clusters (b);
841 
842   g_assert (!hb_buffer_set_length (b, 10));
843 
844   test_empty (b);
845 
846   g_assert (hb_buffer_set_length (b, 0));
847 
848   test_empty (b);
849 
850   g_assert (!hb_buffer_allocation_successful (b));
851 
852   hb_buffer_reset (b);
853 
854   test_empty (b);
855 
856   g_assert (!hb_buffer_allocation_successful (b));
857 }
858 
859 typedef struct {
860   const char *contents;
861   hb_buffer_serialize_format_t format;
862   unsigned int num_items;
863   hb_bool_t success;
864 } serialization_test_t;
865 
866 static const serialization_test_t serialization_tests[] = {
867   { "<U+0640=0|U+0635=1>", HB_BUFFER_SERIALIZE_FORMAT_TEXT, 2, 1 },
868   { "[{\"u\":1600,\"cl\":0},{\"u\":1589,\"cl\":1}]", HB_BUFFER_SERIALIZE_FORMAT_JSON, 2, 1 },
869 
870   /* Mixed glyphs/Unicodes -> parse fail */
871   { "[{\"u\":1600,\"cl\":0},{\"g\":1589,\"cl\":1}]", HB_BUFFER_SERIALIZE_FORMAT_JSON, 0, 0 },
872   { "<U+0640=0|uni0635=1>", HB_BUFFER_SERIALIZE_FORMAT_TEXT, 0, 0 },
873 };
874 
875 static void
test_buffer_serialize_deserialize(void)876 test_buffer_serialize_deserialize (void)
877 {
878   hb_buffer_t *b;
879   unsigned int i;
880 
881   for (i = 0; i < G_N_ELEMENTS (serialization_tests); i++)
882   {
883     unsigned int consumed;
884     char round_trip[1024];
885 
886     b = hb_buffer_create ();
887     hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
888 
889     const serialization_test_t *test = &serialization_tests[i];
890     g_test_message ("serialize test #%d", i);
891 
892     (void) hb_buffer_deserialize_unicode (b, test->contents, -1, NULL, test->format);
893 
894     // Expected parse failure, got one, don't round-trip
895     if (test->success != 0)
896     {
897       unsigned int num_glyphs = hb_buffer_get_length (b);
898       g_assert_cmpint (num_glyphs, ==, test->num_items);
899 
900       hb_buffer_serialize_unicode (b, 0, num_glyphs, round_trip,
901 				   sizeof(round_trip), &consumed, test->format,
902 				   HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
903       g_assert_cmpstr (round_trip, ==, test->contents);
904     }
905 
906     hb_buffer_destroy (b);
907 
908   }
909 
910   char test[1024];
911   unsigned int consumed;
912   hb_buffer_t *indeterminate = hb_buffer_get_empty ();
913   hb_buffer_serialize (indeterminate, 0, (unsigned) -1,
914 		       test, sizeof(test), &consumed, NULL,
915 		       HB_BUFFER_SERIALIZE_FORMAT_JSON,
916 		       HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
917   g_assert_cmpstr ( test, ==, "[]");
918 
919   hb_buffer_serialize (indeterminate, 0, (unsigned) - 1,
920 		       test, sizeof(test), &consumed, NULL,
921 		       HB_BUFFER_SERIALIZE_FORMAT_TEXT,
922 		       HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
923   g_assert_cmpstr ( test, ==, "!!");
924 
925 }
926 
927 int
main(int argc,char ** argv)928 main (int argc, char **argv)
929 {
930   unsigned int i;
931 
932   hb_test_init (&argc, &argv);
933 
934   for (i = 0; i < BUFFER_NUM_TYPES; i++)
935   {
936     const void *buffer_type = GINT_TO_POINTER (i);
937     const char *buffer_name = buffer_names[i];
938 
939     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_properties);
940     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_contents);
941     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_positions);
942   }
943 
944   hb_test_add_fixture (fixture, GINT_TO_POINTER (BUFFER_EMPTY), test_buffer_allocation);
945 
946   hb_test_add (test_buffer_utf8_conversion);
947   hb_test_add (test_buffer_utf8_validity);
948   hb_test_add (test_buffer_utf16_conversion);
949   hb_test_add (test_buffer_utf32_conversion);
950   hb_test_add (test_buffer_empty);
951   hb_test_add (test_buffer_serialize_deserialize);
952 
953   return hb_test_run();
954 }
955