1 /*
2 * Copyright © 2018 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Garret Rieger, Rod Sheeter, Behdad Esfahbod
25 */
26
27 #include "hb-subset.hh"
28 #include "hb-set.hh"
29 #include "hb-utf.hh"
30 /**
31 * hb_subset_input_create_or_fail:
32 *
33 * Creates a new subset input object.
34 *
35 * Return value: (transfer full): New subset input, or `NULL` if failed. Destroy
36 * with hb_subset_input_destroy().
37 *
38 * Since: 1.8.0
39 **/
40 hb_subset_input_t *
hb_subset_input_create_or_fail(void)41 hb_subset_input_create_or_fail (void)
42 {
43 hb_subset_input_t *input = hb_object_create<hb_subset_input_t>();
44
45 if (unlikely (!input))
46 return nullptr;
47
48 for (auto& set : input->sets_iter ())
49 set = hb_set_create ();
50
51 input->axes_location = hb_hashmap_create<hb_tag_t, float> ();
52 #ifdef HB_EXPERIMENTAL_API
53 input->name_table_overrides = hb_hashmap_create<hb_ot_name_record_ids_t, hb_bytes_t> ();
54 #endif
55
56 if (!input->axes_location ||
57 #ifdef HB_EXPERIMENTAL_API
58 !input->name_table_overrides ||
59 #endif
60 input->in_error ())
61 {
62 hb_subset_input_destroy (input);
63 return nullptr;
64 }
65
66 input->flags = HB_SUBSET_FLAGS_DEFAULT;
67
68 hb_set_add_range (input->sets.name_ids, 0, 6);
69 hb_set_add (input->sets.name_languages, 0x0409);
70
71 hb_tag_t default_drop_tables[] = {
72 // Layout disabled by default
73 HB_TAG ('m', 'o', 'r', 'x'),
74 HB_TAG ('m', 'o', 'r', 't'),
75 HB_TAG ('k', 'e', 'r', 'x'),
76 HB_TAG ('k', 'e', 'r', 'n'),
77
78 // Copied from fontTools:
79 HB_TAG ('B', 'A', 'S', 'E'),
80 HB_TAG ('J', 'S', 'T', 'F'),
81 HB_TAG ('D', 'S', 'I', 'G'),
82 HB_TAG ('E', 'B', 'D', 'T'),
83 HB_TAG ('E', 'B', 'L', 'C'),
84 HB_TAG ('E', 'B', 'S', 'C'),
85 HB_TAG ('S', 'V', 'G', ' '),
86 HB_TAG ('P', 'C', 'L', 'T'),
87 HB_TAG ('L', 'T', 'S', 'H'),
88 // Graphite tables
89 HB_TAG ('F', 'e', 'a', 't'),
90 HB_TAG ('G', 'l', 'a', 't'),
91 HB_TAG ('G', 'l', 'o', 'c'),
92 HB_TAG ('S', 'i', 'l', 'f'),
93 HB_TAG ('S', 'i', 'l', 'l'),
94 };
95 input->sets.drop_tables->add_array (default_drop_tables, ARRAY_LENGTH (default_drop_tables));
96
97 hb_tag_t default_no_subset_tables[] = {
98 HB_TAG ('a', 'v', 'a', 'r'),
99 HB_TAG ('g', 'a', 's', 'p'),
100 HB_TAG ('c', 'v', 't', ' '),
101 HB_TAG ('f', 'p', 'g', 'm'),
102 HB_TAG ('p', 'r', 'e', 'p'),
103 HB_TAG ('V', 'D', 'M', 'X'),
104 HB_TAG ('D', 'S', 'I', 'G'),
105 HB_TAG ('M', 'V', 'A', 'R'),
106 HB_TAG ('c', 'v', 'a', 'r'),
107 };
108 input->sets.no_subset_tables->add_array (default_no_subset_tables,
109 ARRAY_LENGTH (default_no_subset_tables));
110
111 //copied from _layout_features_groups in fonttools
112 hb_tag_t default_layout_features[] = {
113 // default shaper
114 // common
115 HB_TAG ('r', 'v', 'r', 'n'),
116 HB_TAG ('c', 'c', 'm', 'p'),
117 HB_TAG ('l', 'i', 'g', 'a'),
118 HB_TAG ('l', 'o', 'c', 'l'),
119 HB_TAG ('m', 'a', 'r', 'k'),
120 HB_TAG ('m', 'k', 'm', 'k'),
121 HB_TAG ('r', 'l', 'i', 'g'),
122
123 //fractions
124 HB_TAG ('f', 'r', 'a', 'c'),
125 HB_TAG ('n', 'u', 'm', 'r'),
126 HB_TAG ('d', 'n', 'o', 'm'),
127
128 //horizontal
129 HB_TAG ('c', 'a', 'l', 't'),
130 HB_TAG ('c', 'l', 'i', 'g'),
131 HB_TAG ('c', 'u', 'r', 's'),
132 HB_TAG ('k', 'e', 'r', 'n'),
133 HB_TAG ('r', 'c', 'l', 't'),
134
135 //vertical
136 HB_TAG ('v', 'a', 'l', 't'),
137 HB_TAG ('v', 'e', 'r', 't'),
138 HB_TAG ('v', 'k', 'r', 'n'),
139 HB_TAG ('v', 'p', 'a', 'l'),
140 HB_TAG ('v', 'r', 't', '2'),
141
142 //ltr
143 HB_TAG ('l', 't', 'r', 'a'),
144 HB_TAG ('l', 't', 'r', 'm'),
145
146 //rtl
147 HB_TAG ('r', 't', 'l', 'a'),
148 HB_TAG ('r', 't', 'l', 'm'),
149
150 //random
151 HB_TAG ('r', 'a', 'n', 'd'),
152
153 //justify
154 HB_TAG ('j', 'a', 'l', 't'), // HarfBuzz doesn't use; others might
155
156 //private
157 HB_TAG ('H', 'a', 'r', 'f'),
158 HB_TAG ('H', 'A', 'R', 'F'),
159 HB_TAG ('B', 'u', 'z', 'z'),
160 HB_TAG ('B', 'U', 'Z', 'Z'),
161
162 //shapers
163
164 //arabic
165 HB_TAG ('i', 'n', 'i', 't'),
166 HB_TAG ('m', 'e', 'd', 'i'),
167 HB_TAG ('f', 'i', 'n', 'a'),
168 HB_TAG ('i', 's', 'o', 'l'),
169 HB_TAG ('m', 'e', 'd', '2'),
170 HB_TAG ('f', 'i', 'n', '2'),
171 HB_TAG ('f', 'i', 'n', '3'),
172 HB_TAG ('c', 's', 'w', 'h'),
173 HB_TAG ('m', 's', 'e', 't'),
174 HB_TAG ('s', 't', 'c', 'h'),
175
176 //hangul
177 HB_TAG ('l', 'j', 'm', 'o'),
178 HB_TAG ('v', 'j', 'm', 'o'),
179 HB_TAG ('t', 'j', 'm', 'o'),
180
181 //tibetan
182 HB_TAG ('a', 'b', 'v', 's'),
183 HB_TAG ('b', 'l', 'w', 's'),
184 HB_TAG ('a', 'b', 'v', 'm'),
185 HB_TAG ('b', 'l', 'w', 'm'),
186
187 //indic
188 HB_TAG ('n', 'u', 'k', 't'),
189 HB_TAG ('a', 'k', 'h', 'n'),
190 HB_TAG ('r', 'p', 'h', 'f'),
191 HB_TAG ('r', 'k', 'r', 'f'),
192 HB_TAG ('p', 'r', 'e', 'f'),
193 HB_TAG ('b', 'l', 'w', 'f'),
194 HB_TAG ('h', 'a', 'l', 'f'),
195 HB_TAG ('a', 'b', 'v', 'f'),
196 HB_TAG ('p', 's', 't', 'f'),
197 HB_TAG ('c', 'f', 'a', 'r'),
198 HB_TAG ('v', 'a', 't', 'u'),
199 HB_TAG ('c', 'j', 'c', 't'),
200 HB_TAG ('i', 'n', 'i', 't'),
201 HB_TAG ('p', 'r', 'e', 's'),
202 HB_TAG ('a', 'b', 'v', 's'),
203 HB_TAG ('b', 'l', 'w', 's'),
204 HB_TAG ('p', 's', 't', 's'),
205 HB_TAG ('h', 'a', 'l', 'n'),
206 HB_TAG ('d', 'i', 's', 't'),
207 HB_TAG ('a', 'b', 'v', 'm'),
208 HB_TAG ('b', 'l', 'w', 'm'),
209 };
210
211 input->sets.layout_features->add_array (default_layout_features, ARRAY_LENGTH (default_layout_features));
212
213 input->sets.layout_scripts->invert (); // Default to all scripts.
214
215 if (input->in_error ())
216 {
217 hb_subset_input_destroy (input);
218 return nullptr;
219 }
220 return input;
221 }
222
223 /**
224 * hb_subset_input_reference: (skip)
225 * @input: a #hb_subset_input_t object.
226 *
227 * Increases the reference count on @input.
228 *
229 * Return value: @input.
230 *
231 * Since: 1.8.0
232 **/
233 hb_subset_input_t *
hb_subset_input_reference(hb_subset_input_t * input)234 hb_subset_input_reference (hb_subset_input_t *input)
235 {
236 return hb_object_reference (input);
237 }
238
239 /**
240 * hb_subset_input_destroy:
241 * @input: a #hb_subset_input_t object.
242 *
243 * Decreases the reference count on @input, and if it reaches zero, destroys
244 * @input, freeing all memory.
245 *
246 * Since: 1.8.0
247 **/
248 void
hb_subset_input_destroy(hb_subset_input_t * input)249 hb_subset_input_destroy (hb_subset_input_t *input)
250 {
251 if (!hb_object_destroy (input)) return;
252
253 for (hb_set_t* set : input->sets_iter ())
254 hb_set_destroy (set);
255
256 hb_hashmap_destroy (input->axes_location);
257
258 #ifdef HB_EXPERIMENTAL_API
259 if (input->name_table_overrides)
260 {
261 for (auto _ : *input->name_table_overrides)
262 _.second.fini ();
263 }
264 hb_hashmap_destroy (input->name_table_overrides);
265 #endif
266
267 hb_free (input);
268 }
269
270 /**
271 * hb_subset_input_unicode_set:
272 * @input: a #hb_subset_input_t object.
273 *
274 * Gets the set of Unicode code points to retain, the caller should modify the
275 * set as needed.
276 *
277 * Return value: (transfer none): pointer to the #hb_set_t of Unicode code
278 * points.
279 *
280 * Since: 1.8.0
281 **/
282 HB_EXTERN hb_set_t *
hb_subset_input_unicode_set(hb_subset_input_t * input)283 hb_subset_input_unicode_set (hb_subset_input_t *input)
284 {
285 return input->sets.unicodes;
286 }
287
288 /**
289 * hb_subset_input_glyph_set:
290 * @input: a #hb_subset_input_t object.
291 *
292 * Gets the set of glyph IDs to retain, the caller should modify the set as
293 * needed.
294 *
295 * Return value: (transfer none): pointer to the #hb_set_t of glyph IDs.
296 *
297 * Since: 1.8.0
298 **/
299 HB_EXTERN hb_set_t *
hb_subset_input_glyph_set(hb_subset_input_t * input)300 hb_subset_input_glyph_set (hb_subset_input_t *input)
301 {
302 return input->sets.glyphs;
303 }
304
305 /**
306 * hb_subset_input_set:
307 * @input: a #hb_subset_input_t object.
308 * @set_type: a #hb_subset_sets_t set type.
309 *
310 * Gets the set of the specified type.
311 *
312 * Return value: (transfer none): pointer to the #hb_set_t of the specified type.
313 *
314 * Since: 2.9.1
315 **/
316 HB_EXTERN hb_set_t *
hb_subset_input_set(hb_subset_input_t * input,hb_subset_sets_t set_type)317 hb_subset_input_set (hb_subset_input_t *input, hb_subset_sets_t set_type)
318 {
319 return input->sets_iter () [set_type];
320 }
321
322 /**
323 * hb_subset_input_get_flags:
324 * @input: a #hb_subset_input_t object.
325 *
326 * Gets all of the subsetting flags in the input object.
327 *
328 * Return value: the subsetting flags bit field.
329 *
330 * Since: 2.9.0
331 **/
332 HB_EXTERN hb_subset_flags_t
hb_subset_input_get_flags(hb_subset_input_t * input)333 hb_subset_input_get_flags (hb_subset_input_t *input)
334 {
335 return (hb_subset_flags_t) input->flags;
336 }
337
338 /**
339 * hb_subset_input_set_flags:
340 * @input: a #hb_subset_input_t object.
341 * @value: bit field of flags
342 *
343 * Sets all of the flags in the input object to the values specified by the bit
344 * field.
345 *
346 * Since: 2.9.0
347 **/
348 HB_EXTERN void
hb_subset_input_set_flags(hb_subset_input_t * input,unsigned value)349 hb_subset_input_set_flags (hb_subset_input_t *input,
350 unsigned value)
351 {
352 input->flags = (hb_subset_flags_t) value;
353 }
354
355 /**
356 * hb_subset_input_set_user_data: (skip)
357 * @input: a #hb_subset_input_t object.
358 * @key: The user-data key to set
359 * @data: A pointer to the user data
360 * @destroy: (nullable): A callback to call when @data is not needed anymore
361 * @replace: Whether to replace an existing data with the same key
362 *
363 * Attaches a user-data key/data pair to the given subset input object.
364 *
365 * Return value: `true` if success, `false` otherwise
366 *
367 * Since: 2.9.0
368 **/
369 hb_bool_t
hb_subset_input_set_user_data(hb_subset_input_t * input,hb_user_data_key_t * key,void * data,hb_destroy_func_t destroy,hb_bool_t replace)370 hb_subset_input_set_user_data (hb_subset_input_t *input,
371 hb_user_data_key_t *key,
372 void * data,
373 hb_destroy_func_t destroy,
374 hb_bool_t replace)
375 {
376 return hb_object_set_user_data (input, key, data, destroy, replace);
377 }
378
379 /**
380 * hb_subset_input_get_user_data: (skip)
381 * @input: a #hb_subset_input_t object.
382 * @key: The user-data key to query
383 *
384 * Fetches the user data associated with the specified key,
385 * attached to the specified subset input object.
386 *
387 * Return value: (transfer none): A pointer to the user data
388 *
389 * Since: 2.9.0
390 **/
391 void *
hb_subset_input_get_user_data(const hb_subset_input_t * input,hb_user_data_key_t * key)392 hb_subset_input_get_user_data (const hb_subset_input_t *input,
393 hb_user_data_key_t *key)
394 {
395 return hb_object_get_user_data (input, key);
396 }
397
398 #ifndef HB_NO_VAR
399 /**
400 * hb_subset_input_pin_axis_to_default: (skip)
401 * @input: a #hb_subset_input_t object.
402 * @axis_tag: Tag of the axis to be pinned
403 *
404 * Pin an axis to its default location in the given subset input object.
405 *
406 * Currently only works for fonts with 'glyf' tables. CFF and CFF2 is not
407 * yet supported. Additionally all axes in a font must be pinned.
408 *
409 * Return value: `true` if success, `false` otherwise
410 *
411 * Since: 6.0.0
412 **/
413 HB_EXTERN hb_bool_t
hb_subset_input_pin_axis_to_default(hb_subset_input_t * input,hb_face_t * face,hb_tag_t axis_tag)414 hb_subset_input_pin_axis_to_default (hb_subset_input_t *input,
415 hb_face_t *face,
416 hb_tag_t axis_tag)
417 {
418 hb_ot_var_axis_info_t axis_info;
419 if (!hb_ot_var_find_axis_info (face, axis_tag, &axis_info))
420 return false;
421
422 return input->axes_location->set (axis_tag, axis_info.default_value);
423 }
424
425 /**
426 * hb_subset_input_pin_axis_location: (skip)
427 * @input: a #hb_subset_input_t object.
428 * @axis_tag: Tag of the axis to be pinned
429 * @axis_value: Location on the axis to be pinned at
430 *
431 * Pin an axis to a fixed location in the given subset input object.
432 *
433 * Currently only works for fonts with 'glyf' tables. CFF and CFF2 is not
434 * yet supported. Additionally all axes in a font must be pinned.
435 *
436 * Return value: `true` if success, `false` otherwise
437 *
438 * Since: 6.0.0
439 **/
440 HB_EXTERN hb_bool_t
hb_subset_input_pin_axis_location(hb_subset_input_t * input,hb_face_t * face,hb_tag_t axis_tag,float axis_value)441 hb_subset_input_pin_axis_location (hb_subset_input_t *input,
442 hb_face_t *face,
443 hb_tag_t axis_tag,
444 float axis_value)
445 {
446 hb_ot_var_axis_info_t axis_info;
447 if (!hb_ot_var_find_axis_info (face, axis_tag, &axis_info))
448 return false;
449
450 float val = hb_clamp(axis_value, axis_info.min_value, axis_info.max_value);
451 return input->axes_location->set (axis_tag, val);
452 }
453 #endif
454
455 /**
456 * hb_subset_preprocess:
457 * @source: a #hb_face_t object.
458 *
459 * Preprocesses the face and attaches data that will be needed by the
460 * subsetter. Future subsetting operations can then use the precomputed data
461 * to speed up the subsetting operation.
462 *
463 * See [subset-preprocessing](https://github.com/harfbuzz/harfbuzz/blob/main/docs/subset-preprocessing.md)
464 * for more information.
465 *
466 * Note: the preprocessed face may contain sub-blobs that reference the memory
467 * backing the source #hb_face_t. Therefore in the case that this memory is not
468 * owned by the source face you will need to ensure that memory lives
469 * as long as the returned #hb_face_t.
470 *
471 * Returns: a new #hb_face_t.
472 *
473 * Since: 6.0.0
474 **/
475
476 HB_EXTERN hb_face_t *
hb_subset_preprocess(hb_face_t * source)477 hb_subset_preprocess (hb_face_t *source)
478 {
479 hb_subset_input_t* input = hb_subset_input_create_or_fail ();
480 if (!input)
481 return source;
482
483 hb_set_clear (hb_subset_input_set(input, HB_SUBSET_SETS_UNICODE));
484 hb_set_invert (hb_subset_input_set(input, HB_SUBSET_SETS_UNICODE));
485
486 hb_set_clear (hb_subset_input_set(input, HB_SUBSET_SETS_GLYPH_INDEX));
487 hb_set_invert (hb_subset_input_set(input, HB_SUBSET_SETS_GLYPH_INDEX));
488
489 hb_set_clear (hb_subset_input_set(input,
490 HB_SUBSET_SETS_LAYOUT_FEATURE_TAG));
491 hb_set_invert (hb_subset_input_set(input,
492 HB_SUBSET_SETS_LAYOUT_FEATURE_TAG));
493
494 hb_set_clear (hb_subset_input_set(input,
495 HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG));
496 hb_set_invert (hb_subset_input_set(input,
497 HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG));
498
499 hb_set_clear (hb_subset_input_set(input,
500 HB_SUBSET_SETS_NAME_ID));
501 hb_set_invert (hb_subset_input_set(input,
502 HB_SUBSET_SETS_NAME_ID));
503
504 hb_set_clear (hb_subset_input_set(input,
505 HB_SUBSET_SETS_NAME_LANG_ID));
506 hb_set_invert (hb_subset_input_set(input,
507 HB_SUBSET_SETS_NAME_LANG_ID));
508
509 hb_subset_input_set_flags(input,
510 HB_SUBSET_FLAGS_NOTDEF_OUTLINE |
511 HB_SUBSET_FLAGS_GLYPH_NAMES |
512 HB_SUBSET_FLAGS_RETAIN_GIDS |
513 HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES);
514 input->attach_accelerator_data = true;
515
516 // Always use long loca in the preprocessed version. This allows
517 // us to store the glyph bytes unpadded which allows the future subset
518 // operation to run faster by skipping the trim padding step.
519 input->force_long_loca = true;
520
521 hb_face_t* new_source = hb_subset_or_fail (source, input);
522 hb_subset_input_destroy (input);
523
524 if (!new_source) {
525 DEBUG_MSG (SUBSET, nullptr, "Preprocessing failed due to subset failure.");
526 return source;
527 }
528
529 return new_source;
530 }
531
532 #ifdef HB_EXPERIMENTAL_API
533 /**
534 * hb_subset_input_override_name_table:
535 * @input: a #hb_subset_input_t object.
536 * @name_id: name_id of a nameRecord
537 * @platform_id: platform ID of a nameRecord
538 * @encoding_id: encoding ID of a nameRecord
539 * @language_id: language ID of a nameRecord
540 * @name_str: pointer to name string new value or null to indicate should remove
541 * @str_len: the size of @name_str, or -1 if it is `NULL`-terminated
542 *
543 * Override the name string of the NameRecord identified by name_id,
544 * platform_id, encoding_id and language_id. If a record with that name_id
545 * doesn't exist, create it and insert to the name table.
546 *
547 * Note: for mac platform, we only support name_str with all ascii characters,
548 * name_str with non-ascii characters will be ignored.
549 *
550 * Since: EXPERIMENTAL
551 **/
552 HB_EXTERN hb_bool_t
hb_subset_input_override_name_table(hb_subset_input_t * input,hb_ot_name_id_t name_id,unsigned platform_id,unsigned encoding_id,unsigned language_id,const char * name_str,int str_len)553 hb_subset_input_override_name_table (hb_subset_input_t *input,
554 hb_ot_name_id_t name_id,
555 unsigned platform_id,
556 unsigned encoding_id,
557 unsigned language_id,
558 const char *name_str,
559 int str_len /* -1 means nul-terminated */)
560 {
561 if (!name_str)
562 {
563 str_len = 0;
564 }
565 else if (str_len == -1)
566 {
567 str_len = strlen (name_str);
568 }
569
570 hb_bytes_t name_bytes (nullptr, 0);
571 if (str_len)
572 {
573 if (platform_id == 1)
574 {
575 const uint8_t *src = reinterpret_cast<const uint8_t*> (name_str);
576 const uint8_t *src_end = src + str_len;
577
578 hb_codepoint_t unicode;
579 const hb_codepoint_t replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT;
580 while (src < src_end)
581 {
582 src = hb_utf8_t::next (src, src_end, &unicode, replacement);
583 if (unicode >= 0x0080u)
584 {
585 printf ("Non-ascii character detected, ignored...This API supports acsii characters only for mac platform\n");
586 return false;
587 }
588 }
589 }
590 char *override_name = (char *) hb_malloc (str_len);
591 if (unlikely (!override_name)) return false;
592
593 hb_memcpy (override_name, name_str, str_len);
594 name_bytes = hb_bytes_t (override_name, str_len);
595 }
596 input->name_table_overrides->set (hb_ot_name_record_ids_t (platform_id, encoding_id, language_id, name_id), name_bytes);
597 return true;
598 }
599
600 #endif
601