• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Adobe Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Adobe Author(s): Michiharu Ariza
25  */
26 
27 #ifndef HB_SUBSET_CFF_COMMON_HH
28 #define HB_SUBSET_CFF_COMMON_HH
29 
30 #include "hb.hh"
31 
32 #include "hb-subset-plan.hh"
33 #include "hb-cff-interp-cs-common.hh"
34 
35 namespace CFF {
36 
37 /* Used for writing a temporary charstring */
38 struct str_encoder_t
39 {
str_encoder_tCFF::str_encoder_t40   str_encoder_t (str_buff_t &buff_)
41     : buff (buff_) {}
42 
resetCFF::str_encoder_t43   void reset () { buff.reset (); }
44 
encode_byteCFF::str_encoder_t45   void encode_byte (unsigned char b)
46   {
47     if (likely ((signed) buff.length < buff.allocated))
48       buff.arrayZ[buff.length++] = b;
49     else
50       buff.push (b);
51   }
52 
encode_intCFF::str_encoder_t53   void encode_int (int v)
54   {
55     if ((-1131 <= v) && (v <= 1131))
56     {
57       if ((-107 <= v) && (v <= 107))
58 	encode_byte (v + 139);
59       else if (v > 0)
60       {
61 	v -= 108;
62 	encode_byte ((v >> 8) + OpCode_TwoBytePosInt0);
63 	encode_byte (v & 0xFF);
64       }
65       else
66       {
67 	v = -v - 108;
68 	encode_byte ((v >> 8) + OpCode_TwoByteNegInt0);
69 	encode_byte (v & 0xFF);
70       }
71     }
72     else
73     {
74       if (unlikely (v < -32768))
75 	v = -32768;
76       else if (unlikely (v > 32767))
77 	v = 32767;
78       encode_byte (OpCode_shortint);
79       encode_byte ((v >> 8) & 0xFF);
80       encode_byte (v & 0xFF);
81     }
82   }
83 
84   // Encode number for CharString
encode_num_csCFF::str_encoder_t85   void encode_num_cs (const number_t& n)
86   {
87     if (n.in_int_range ())
88     {
89       encode_int (n.to_int ());
90     }
91     else
92     {
93       int32_t v = n.to_fixed ();
94       encode_byte (OpCode_fixedcs);
95       encode_byte ((v >> 24) & 0xFF);
96       encode_byte ((v >> 16) & 0xFF);
97       encode_byte ((v >> 8) & 0xFF);
98       encode_byte (v & 0xFF);
99     }
100   }
101 
102   // Encode number for TopDict / Private
encode_num_tpCFF::str_encoder_t103   void encode_num_tp (const number_t& n)
104   {
105     if (n.in_int_range ())
106     {
107       // TODO longint
108       encode_int (n.to_int ());
109     }
110     else
111     {
112       // Sigh. BCD
113       // https://learn.microsoft.com/en-us/typography/opentype/spec/cff2#table-5-nibble-definitions
114       double v = n.to_real ();
115       encode_byte (OpCode_BCD);
116 
117       // Based on:
118       // https://github.com/fonttools/fonttools/blob/97ed3a61cde03e17b8be36f866192fbd56f1d1a7/Lib/fontTools/misc/psCharStrings.py#L265-L294
119 
120       char buf[16];
121       /* FontTools has the following comment:
122        *
123        * # Note: 14 decimal digits seems to be the limitation for CFF real numbers
124        * # in macOS. However, we use 8 here to match the implementation of AFDKO.
125        *
126        * We use 8 here to match FontTools X-).
127        */
128 
129       hb_locale_t clocale HB_UNUSED;
130       hb_locale_t oldlocale HB_UNUSED;
131       oldlocale = hb_uselocale (clocale = newlocale (LC_ALL_MASK, "C", NULL));
132       snprintf (buf, sizeof (buf), "%.8G", v);
133       (void) hb_uselocale (((void) freelocale (clocale), oldlocale));
134 
135       char *s = buf;
136       if (s[0] == '0' && s[1] == '.')
137 	s++;
138       else if (s[0] == '-' && s[1] == '0' && s[2] == '.')
139       {
140 	s[1] = '-';
141 	s++;
142       }
143       hb_vector_t<char> nibbles;
144       while (*s)
145       {
146 	char c = s[0];
147 	s++;
148 
149 	switch (c)
150 	{
151 	  case 'E':
152 	  {
153 	    char c2 = *s;
154 	    if (c2 == '-')
155 	    {
156 	      s++;
157 	      nibbles.push (0x0C); // E-
158 	      continue;
159 	    }
160 	    if (c2 == '+')
161 	      s++;
162 	    nibbles.push (0x0B); // E
163 	    continue;
164 	  }
165 
166 	  case '.': case ',': // Comma for some European locales in case no uselocale available.
167 	    nibbles.push (0x0A); // .
168 	    continue;
169 
170 	  case '-':
171 	    nibbles.push (0x0E); // .
172 	    continue;
173 	}
174 
175 	nibbles.push (c - '0');
176       }
177       nibbles.push (0x0F);
178       if (nibbles.length % 2)
179 	nibbles.push (0x0F);
180 
181       unsigned count = nibbles.length;
182       for (unsigned i = 0; i < count; i += 2)
183         encode_byte ((nibbles[i] << 4) | nibbles[i+1]);
184     }
185   }
186 
encode_opCFF::str_encoder_t187   void encode_op (op_code_t op)
188   {
189     if (Is_OpCode_ESC (op))
190     {
191       encode_byte (OpCode_escape);
192       encode_byte (Unmake_OpCode_ESC (op));
193     }
194     else
195       encode_byte (op);
196   }
197 
copy_strCFF::str_encoder_t198   void copy_str (const unsigned char *str, unsigned length)
199   {
200     assert ((signed) (buff.length + length) <= buff.allocated);
201     hb_memcpy (buff.arrayZ + buff.length, str, length);
202     buff.length += length;
203   }
204 
in_errorCFF::str_encoder_t205   bool in_error () const { return buff.in_error (); }
206 
207   protected:
208 
209   str_buff_t &buff;
210 };
211 
212 struct cff_sub_table_info_t {
cff_sub_table_info_tCFF::cff_sub_table_info_t213   cff_sub_table_info_t ()
214     : fd_array_link (0),
215       char_strings_link (0)
216   {
217     fd_select.init ();
218   }
219 
220   table_info_t     fd_select;
221   objidx_t     	   fd_array_link;
222   objidx_t     	   char_strings_link;
223 };
224 
225 template <typename OPSTR=op_str_t>
226 struct cff_top_dict_op_serializer_t : op_serializer_t
227 {
serializeCFF::cff_top_dict_op_serializer_t228   bool serialize (hb_serialize_context_t *c,
229 		  const OPSTR &opstr,
230 		  const cff_sub_table_info_t &info) const
231   {
232     TRACE_SERIALIZE (this);
233 
234     switch (opstr.op)
235     {
236       case OpCode_CharStrings:
237 	return_trace (FontDict::serialize_link4_op(c, opstr.op, info.char_strings_link, whence_t::Absolute));
238 
239       case OpCode_FDArray:
240 	return_trace (FontDict::serialize_link4_op(c, opstr.op, info.fd_array_link, whence_t::Absolute));
241 
242       case OpCode_FDSelect:
243 	return_trace (FontDict::serialize_link4_op(c, opstr.op, info.fd_select.link, whence_t::Absolute));
244 
245       default:
246 	return_trace (copy_opstr (c, opstr));
247     }
248     return_trace (true);
249   }
250 };
251 
252 struct cff_font_dict_op_serializer_t : op_serializer_t
253 {
serializeCFF::cff_font_dict_op_serializer_t254   bool serialize (hb_serialize_context_t *c,
255 		  const op_str_t &opstr,
256 		  const table_info_t &privateDictInfo) const
257   {
258     TRACE_SERIALIZE (this);
259 
260     if (opstr.op == OpCode_Private)
261     {
262       /* serialize the private dict size & offset as 2-byte & 4-byte integers */
263       return_trace (UnsizedByteStr::serialize_int2 (c, privateDictInfo.size) &&
264 		    Dict::serialize_link4_op (c, opstr.op, privateDictInfo.link, whence_t::Absolute));
265     }
266     else
267     {
268       unsigned char *d = c->allocate_size<unsigned char> (opstr.length);
269       if (unlikely (!d)) return_trace (false);
270       /* Faster than hb_memcpy for small strings. */
271       for (unsigned i = 0; i < opstr.length; i++)
272 	d[i] = opstr.ptr[i];
273       //hb_memcpy (d, opstr.ptr, opstr.length);
274     }
275     return_trace (true);
276   }
277 };
278 
279 struct flatten_param_t
280 {
281   str_buff_t     &flatStr;
282   bool	drop_hints;
283   const hb_subset_plan_t *plan;
284 };
285 
286 template <typename ACC, typename ENV, typename OPSET, op_code_t endchar_op=OpCode_Invalid>
287 struct subr_flattener_t
288 {
subr_flattener_tCFF::subr_flattener_t289   subr_flattener_t (const ACC &acc_,
290 		    const hb_subset_plan_t *plan_)
291 		   : acc (acc_), plan (plan_) {}
292 
flattenCFF::subr_flattener_t293   bool flatten (str_buff_vec_t &flat_charstrings)
294   {
295     unsigned count = plan->num_output_glyphs ();
296     if (!flat_charstrings.resize_exact (count))
297       return false;
298     for (unsigned int i = 0; i < count; i++)
299     {
300       hb_codepoint_t  glyph;
301       if (!plan->old_gid_for_new_gid (i, &glyph))
302       {
303 	/* add an endchar only charstring for a missing glyph if CFF1 */
304 	if (endchar_op != OpCode_Invalid) flat_charstrings[i].push (endchar_op);
305 	continue;
306       }
307       const hb_ubytes_t str = (*acc.charStrings)[glyph];
308       unsigned int fd = acc.fdSelect->get_fd (glyph);
309       if (unlikely (fd >= acc.fdCount))
310 	return false;
311 
312 
313       ENV env (str, acc, fd,
314 	       plan->normalized_coords.arrayZ, plan->normalized_coords.length);
315       cs_interpreter_t<ENV, OPSET, flatten_param_t> interp (env);
316       flatten_param_t  param = {
317         flat_charstrings.arrayZ[i],
318         (bool) (plan->flags & HB_SUBSET_FLAGS_NO_HINTING),
319 	plan
320       };
321       if (unlikely (!interp.interpret (param)))
322 	return false;
323     }
324     return true;
325   }
326 
327   const ACC &acc;
328   const hb_subset_plan_t *plan;
329 };
330 
331 struct subr_closures_t
332 {
subr_closures_tCFF::subr_closures_t333   subr_closures_t (unsigned int fd_count) : global_closure (), local_closures ()
334   {
335     local_closures.resize_exact (fd_count);
336   }
337 
resetCFF::subr_closures_t338   void reset ()
339   {
340     global_closure.clear();
341     for (unsigned int i = 0; i < local_closures.length; i++)
342       local_closures[i].clear();
343   }
344 
in_errorCFF::subr_closures_t345   bool in_error () const { return local_closures.in_error (); }
346   hb_set_t  global_closure;
347   hb_vector_t<hb_set_t> local_closures;
348 };
349 
350 struct parsed_cs_op_t : op_str_t
351 {
parsed_cs_op_tCFF::parsed_cs_op_t352   parsed_cs_op_t (unsigned int subr_num_ = 0) :
353     subr_num (subr_num_) {}
354 
is_hintingCFF::parsed_cs_op_t355   bool is_hinting () const { return hinting_flag; }
set_hintingCFF::parsed_cs_op_t356   void set_hinting ()       { hinting_flag = true; }
357 
358   /* The layout of this struct is designed to fit within the
359    * padding of op_str_t! */
360 
361   protected:
362   bool	  hinting_flag = false;
363 
364   public:
365   uint16_t subr_num;
366 };
367 
368 struct parsed_cs_str_t : parsed_values_t<parsed_cs_op_t>
369 {
parsed_cs_str_tCFF::parsed_cs_str_t370   parsed_cs_str_t () :
371     parsed (false),
372     hint_dropped (false),
373     has_prefix_ (false),
374     has_calls_ (false)
375   {
376     SUPER::init ();
377   }
378 
add_opCFF::parsed_cs_str_t379   void add_op (op_code_t op, const byte_str_ref_t& str_ref)
380   {
381     if (!is_parsed ())
382       SUPER::add_op (op, str_ref);
383   }
384 
add_call_opCFF::parsed_cs_str_t385   void add_call_op (op_code_t op, const byte_str_ref_t& str_ref, unsigned int subr_num)
386   {
387     if (!is_parsed ())
388     {
389       has_calls_ = true;
390 
391       /* Pop the subroutine number. */
392       values.pop ();
393 
394       SUPER::add_op (op, str_ref, {subr_num});
395     }
396   }
397 
set_prefixCFF::parsed_cs_str_t398   void set_prefix (const number_t &num, op_code_t op = OpCode_Invalid)
399   {
400     has_prefix_ = true;
401     prefix_op_ = op;
402     prefix_num_ = num;
403   }
404 
at_endCFF::parsed_cs_str_t405   bool at_end (unsigned int pos) const
406   {
407     return ((pos + 1 >= values.length) /* CFF2 */
408 	|| (values[pos + 1].op == OpCode_return));
409   }
410 
is_parsedCFF::parsed_cs_str_t411   bool is_parsed () const { return parsed; }
set_parsedCFF::parsed_cs_str_t412   void set_parsed ()      { parsed = true; }
413 
is_hint_droppedCFF::parsed_cs_str_t414   bool is_hint_dropped () const { return hint_dropped; }
set_hint_droppedCFF::parsed_cs_str_t415   void set_hint_dropped ()      { hint_dropped = true; }
416 
is_vsindex_droppedCFF::parsed_cs_str_t417   bool is_vsindex_dropped () const { return vsindex_dropped; }
set_vsindex_droppedCFF::parsed_cs_str_t418   void set_vsindex_dropped ()      { vsindex_dropped = true; }
419 
has_prefixCFF::parsed_cs_str_t420   bool has_prefix () const          { return has_prefix_; }
prefix_opCFF::parsed_cs_str_t421   op_code_t prefix_op () const         { return prefix_op_; }
prefix_numCFF::parsed_cs_str_t422   const number_t &prefix_num () const { return prefix_num_; }
423 
has_callsCFF::parsed_cs_str_t424   bool has_calls () const          { return has_calls_; }
425 
compactCFF::parsed_cs_str_t426   void compact ()
427   {
428     unsigned count = values.length;
429     if (!count) return;
430     auto &opstr = values.arrayZ;
431     unsigned j = 0;
432     for (unsigned i = 1; i < count; i++)
433     {
434       /* See if we can combine op j and op i. */
435       bool combine =
436         (opstr[j].op != OpCode_callsubr && opstr[j].op != OpCode_callgsubr) &&
437         (opstr[i].op != OpCode_callsubr && opstr[i].op != OpCode_callgsubr) &&
438         (opstr[j].is_hinting () == opstr[i].is_hinting ()) &&
439         (opstr[j].ptr + opstr[j].length == opstr[i].ptr) &&
440         (opstr[j].length + opstr[i].length <= 255);
441 
442       if (combine)
443       {
444 	opstr[j].length += opstr[i].length;
445 	opstr[j].op = OpCode_Invalid;
446       }
447       else
448       {
449 	opstr[++j] = opstr[i];
450       }
451     }
452     values.shrink (j + 1);
453   }
454 
455   protected:
456   bool    parsed : 1;
457   bool    hint_dropped : 1;
458   bool    vsindex_dropped : 1;
459   bool    has_prefix_ : 1;
460   bool    has_calls_ : 1;
461   op_code_t	prefix_op_;
462   number_t	prefix_num_;
463 
464   private:
465   typedef parsed_values_t<parsed_cs_op_t> SUPER;
466 };
467 
468 struct parsed_cs_str_vec_t : hb_vector_t<parsed_cs_str_t>
469 {
470   private:
471   typedef hb_vector_t<parsed_cs_str_t> SUPER;
472 };
473 
474 struct cff_subset_accelerator_t
475 {
createCFF::cff_subset_accelerator_t476   static cff_subset_accelerator_t* create (
477       hb_blob_t* original_blob,
478       const parsed_cs_str_vec_t& parsed_charstrings,
479       const parsed_cs_str_vec_t& parsed_global_subrs,
480       const hb_vector_t<parsed_cs_str_vec_t>& parsed_local_subrs) {
481     cff_subset_accelerator_t* accel =
482         (cff_subset_accelerator_t*) hb_malloc (sizeof(cff_subset_accelerator_t));
483     if (unlikely (!accel)) return nullptr;
484     new (accel) cff_subset_accelerator_t (original_blob,
485                                           parsed_charstrings,
486                                           parsed_global_subrs,
487                                           parsed_local_subrs);
488     return accel;
489   }
490 
destroyCFF::cff_subset_accelerator_t491   static void destroy (void* value) {
492     if (!value) return;
493 
494     cff_subset_accelerator_t* accel = (cff_subset_accelerator_t*) value;
495     accel->~cff_subset_accelerator_t ();
496     hb_free (accel);
497   }
498 
cff_subset_accelerator_tCFF::cff_subset_accelerator_t499   cff_subset_accelerator_t(
500       hb_blob_t* original_blob_,
501       const parsed_cs_str_vec_t& parsed_charstrings_,
502       const parsed_cs_str_vec_t& parsed_global_subrs_,
503       const hb_vector_t<parsed_cs_str_vec_t>& parsed_local_subrs_)
504   {
505     parsed_charstrings = parsed_charstrings_;
506     parsed_global_subrs = parsed_global_subrs_;
507     parsed_local_subrs = parsed_local_subrs_;
508 
509     // the parsed charstrings point to memory in the original CFF table so we must hold a reference
510     // to it to keep the memory valid.
511     original_blob = hb_blob_reference (original_blob_);
512   }
513 
~cff_subset_accelerator_tCFF::cff_subset_accelerator_t514   ~cff_subset_accelerator_t()
515   {
516     hb_blob_destroy (original_blob);
517     auto *mapping = glyph_to_sid_map.get_relaxed ();
518     if (mapping)
519     {
520       mapping->~glyph_to_sid_map_t ();
521       hb_free (mapping);
522     }
523   }
524 
525   parsed_cs_str_vec_t parsed_charstrings;
526   parsed_cs_str_vec_t parsed_global_subrs;
527   hb_vector_t<parsed_cs_str_vec_t> parsed_local_subrs;
528   mutable hb_atomic_ptr_t<glyph_to_sid_map_t> glyph_to_sid_map;
529 
530  private:
531   hb_blob_t* original_blob;
532 };
533 
534 struct subr_subset_param_t
535 {
subr_subset_param_tCFF::subr_subset_param_t536   subr_subset_param_t (parsed_cs_str_t *parsed_charstring_,
537 		       parsed_cs_str_vec_t *parsed_global_subrs_,
538 		       parsed_cs_str_vec_t *parsed_local_subrs_,
539 		       hb_set_t *global_closure_,
540 		       hb_set_t *local_closure_,
541 		       bool drop_hints_) :
542       current_parsed_str (parsed_charstring_),
543       parsed_charstring (parsed_charstring_),
544       parsed_global_subrs (parsed_global_subrs_),
545       parsed_local_subrs (parsed_local_subrs_),
546       global_closure (global_closure_),
547       local_closure (local_closure_),
548       drop_hints (drop_hints_) {}
549 
get_parsed_str_for_contextCFF::subr_subset_param_t550   parsed_cs_str_t *get_parsed_str_for_context (call_context_t &context)
551   {
552     switch (context.type)
553     {
554       case CSType_CharString:
555 	return parsed_charstring;
556 
557       case CSType_LocalSubr:
558 	if (likely (context.subr_num < parsed_local_subrs->length))
559 	  return &(*parsed_local_subrs)[context.subr_num];
560 	break;
561 
562       case CSType_GlobalSubr:
563 	if (likely (context.subr_num < parsed_global_subrs->length))
564 	  return &(*parsed_global_subrs)[context.subr_num];
565 	break;
566     }
567     return nullptr;
568   }
569 
570   template <typename ENV>
set_current_strCFF::subr_subset_param_t571   void set_current_str (ENV &env, bool calling)
572   {
573     parsed_cs_str_t *parsed_str = get_parsed_str_for_context (env.context);
574     if (unlikely (!parsed_str))
575     {
576       env.set_error ();
577       return;
578     }
579     /* If the called subroutine is parsed partially but not completely yet,
580      * it must be because we are calling it recursively.
581      * Handle it as an error. */
582     if (unlikely (calling && !parsed_str->is_parsed () && (parsed_str->values.length > 0)))
583       env.set_error ();
584     else
585     {
586       if (!parsed_str->is_parsed ())
587         parsed_str->alloc (env.str_ref.total_size ());
588       current_parsed_str = parsed_str;
589     }
590   }
591 
592   parsed_cs_str_t	*current_parsed_str;
593 
594   parsed_cs_str_t	*parsed_charstring;
595   parsed_cs_str_vec_t	*parsed_global_subrs;
596   parsed_cs_str_vec_t	*parsed_local_subrs;
597   hb_set_t      *global_closure;
598   hb_set_t      *local_closure;
599   bool	  drop_hints;
600 };
601 
602 struct subr_remap_t : hb_inc_bimap_t
603 {
createCFF::subr_remap_t604   void create (const hb_set_t *closure)
605   {
606     /* create a remapping of subroutine numbers from old to new.
607      * no optimization based on usage counts. fonttools doesn't appear doing that either.
608      */
609 
610     alloc (closure->get_population ());
611     for (auto old_num : *closure)
612       add (old_num);
613 
614     if (get_population () < 1240)
615       bias = 107;
616     else if (get_population () < 33900)
617       bias = 1131;
618     else
619       bias = 32768;
620   }
621 
biased_numCFF::subr_remap_t622   int biased_num (unsigned int old_num) const
623   {
624     hb_codepoint_t new_num = get (old_num);
625     return (int)new_num - bias;
626   }
627 
628   protected:
629   int bias;
630 };
631 
632 struct subr_remaps_t
633 {
subr_remaps_tCFF::subr_remaps_t634   subr_remaps_t (unsigned int fdCount)
635   {
636     local_remaps.resize (fdCount);
637   }
638 
in_errorCFF::subr_remaps_t639   bool in_error()
640   {
641     return local_remaps.in_error ();
642   }
643 
createCFF::subr_remaps_t644   void create (subr_closures_t& closures)
645   {
646     global_remap.create (&closures.global_closure);
647     for (unsigned int i = 0; i < local_remaps.length; i++)
648       local_remaps.arrayZ[i].create (&closures.local_closures[i]);
649   }
650 
651   subr_remap_t	       global_remap;
652   hb_vector_t<subr_remap_t>  local_remaps;
653 };
654 
655 template <typename SUBSETTER, typename SUBRS, typename ACC, typename ENV, typename OPSET, op_code_t endchar_op=OpCode_Invalid>
656 struct subr_subsetter_t
657 {
subr_subsetter_tCFF::subr_subsetter_t658   subr_subsetter_t (ACC &acc_, const hb_subset_plan_t *plan_)
659       : acc (acc_), plan (plan_), closures(acc_.fdCount),
660         remaps(acc_.fdCount)
661   {}
662 
663   /* Subroutine subsetting with --no-desubroutinize runs in phases:
664    *
665    * 1. execute charstrings/subroutines to determine subroutine closures
666    * 2. parse out all operators and numbers
667    * 3. mark hint operators and operands for removal if --no-hinting
668    * 4. re-encode all charstrings and subroutines with new subroutine numbers
669    *
670    * Phases #1 and #2 are done at the same time in collect_subrs ().
671    * Phase #3 walks charstrings/subroutines forward then backward (hence parsing required),
672    * because we can't tell if a number belongs to a hint op until we see the first moveto.
673    *
674    * Assumption: a callsubr/callgsubr operator must immediately follow a (biased) subroutine number
675    * within the same charstring/subroutine, e.g., not split across a charstring and a subroutine.
676    */
subsetCFF::subr_subsetter_t677   bool subset (void)
678   {
679     unsigned fd_count = acc.fdCount;
680     const cff_subset_accelerator_t* cff_accelerator = nullptr;
681     if (acc.cff_accelerator) {
682       cff_accelerator = acc.cff_accelerator;
683       fd_count = cff_accelerator->parsed_local_subrs.length;
684     }
685 
686     if (cff_accelerator) {
687       // If we are not dropping hinting then charstrings are not modified so we can
688       // just use a reference to the cached copies.
689       cached_charstrings.resize_exact (plan->num_output_glyphs ());
690       parsed_global_subrs = &cff_accelerator->parsed_global_subrs;
691       parsed_local_subrs = &cff_accelerator->parsed_local_subrs;
692     } else {
693       parsed_charstrings.resize_exact (plan->num_output_glyphs ());
694       parsed_global_subrs_storage.resize_exact (acc.globalSubrs->count);
695 
696       if (unlikely (!parsed_local_subrs_storage.resize (fd_count))) return false;
697 
698       for (unsigned int i = 0; i < acc.fdCount; i++)
699       {
700         unsigned count = acc.privateDicts[i].localSubrs->count;
701         parsed_local_subrs_storage[i].resize (count);
702         if (unlikely (parsed_local_subrs_storage[i].in_error ())) return false;
703       }
704 
705       parsed_global_subrs = &parsed_global_subrs_storage;
706       parsed_local_subrs = &parsed_local_subrs_storage;
707     }
708 
709     if (unlikely (remaps.in_error()
710                   || cached_charstrings.in_error ()
711                   || parsed_charstrings.in_error ()
712                   || parsed_global_subrs->in_error ()
713                   || closures.in_error ())) {
714       return false;
715     }
716 
717     /* phase 1 & 2 */
718     for (auto _ : plan->new_to_old_gid_list)
719     {
720       hb_codepoint_t new_glyph = _.first;
721       hb_codepoint_t old_glyph = _.second;
722 
723       const hb_ubytes_t str = (*acc.charStrings)[old_glyph];
724       unsigned int fd = acc.fdSelect->get_fd (old_glyph);
725       if (unlikely (fd >= acc.fdCount))
726         return false;
727 
728       if (cff_accelerator)
729       {
730         // parsed string already exists in accelerator, copy it and move
731         // on.
732         if (cached_charstrings)
733           cached_charstrings[new_glyph] = &cff_accelerator->parsed_charstrings[old_glyph];
734         else
735           parsed_charstrings[new_glyph] = cff_accelerator->parsed_charstrings[old_glyph];
736 
737         continue;
738       }
739 
740       ENV env (str, acc, fd);
741       cs_interpreter_t<ENV, OPSET, subr_subset_param_t> interp (env);
742 
743       parsed_charstrings[new_glyph].alloc (str.length);
744       subr_subset_param_t  param (&parsed_charstrings[new_glyph],
745                                   &parsed_global_subrs_storage,
746                                   &parsed_local_subrs_storage[fd],
747                                   &closures.global_closure,
748                                   &closures.local_closures[fd],
749                                   plan->flags & HB_SUBSET_FLAGS_NO_HINTING);
750 
751       if (unlikely (!interp.interpret (param)))
752         return false;
753 
754       /* complete parsed string esp. copy CFF1 width or CFF2 vsindex to the parsed charstring for encoding */
755       SUBSETTER::complete_parsed_str (interp.env, param, parsed_charstrings[new_glyph]);
756 
757       /* mark hint ops and arguments for drop */
758       if ((plan->flags & HB_SUBSET_FLAGS_NO_HINTING) || plan->inprogress_accelerator)
759       {
760 	subr_subset_param_t  param (&parsed_charstrings[new_glyph],
761 				    &parsed_global_subrs_storage,
762 				    &parsed_local_subrs_storage[fd],
763 				    &closures.global_closure,
764 				    &closures.local_closures[fd],
765 				    plan->flags & HB_SUBSET_FLAGS_NO_HINTING);
766 
767 	drop_hints_param_t  drop;
768 	if (drop_hints_in_str (parsed_charstrings[new_glyph], param, drop))
769 	{
770 	  parsed_charstrings[new_glyph].set_hint_dropped ();
771 	  if (drop.vsindex_dropped)
772 	    parsed_charstrings[new_glyph].set_vsindex_dropped ();
773 	}
774       }
775 
776       /* Doing this here one by one instead of compacting all at the end
777        * has massive peak-memory saving.
778        *
779        * The compacting both saves memory and makes further operations
780        * faster.
781        */
782       parsed_charstrings[new_glyph].compact ();
783     }
784 
785     /* Since parsed strings were loaded from accelerator, we still need
786      * to compute the subroutine closures which would have normally happened during
787      * parsing.
788      *
789      * Or if we are dropping hinting, redo closure to get actually used subrs.
790      */
791     if ((cff_accelerator ||
792 	(!cff_accelerator && plan->flags & HB_SUBSET_FLAGS_NO_HINTING)) &&
793         !closure_subroutines(*parsed_global_subrs,
794                              *parsed_local_subrs))
795       return false;
796 
797     remaps.create (closures);
798 
799     populate_subset_accelerator ();
800     return true;
801   }
802 
encode_charstringsCFF::subr_subsetter_t803   bool encode_charstrings (str_buff_vec_t &buffArray, bool encode_prefix = true) const
804   {
805     unsigned num_glyphs = plan->num_output_glyphs ();
806     if (unlikely (!buffArray.resize_exact (num_glyphs)))
807       return false;
808     hb_codepoint_t last = 0;
809     for (auto _ : plan->new_to_old_gid_list)
810     {
811       hb_codepoint_t gid = _.first;
812       hb_codepoint_t old_glyph = _.second;
813 
814       if (endchar_op != OpCode_Invalid)
815         for (; last < gid; last++)
816 	{
817 	  // Hack to point vector to static string.
818 	  auto &b = buffArray.arrayZ[last];
819 	  b.length = 1;
820 	  b.arrayZ = const_cast<unsigned char *>(endchar_str);
821 	}
822 
823       last++; // Skip over gid
824       unsigned int  fd = acc.fdSelect->get_fd (old_glyph);
825       if (unlikely (fd >= acc.fdCount))
826 	return false;
827       if (unlikely (!encode_str (get_parsed_charstring (gid), fd, buffArray.arrayZ[gid], encode_prefix)))
828 	return false;
829     }
830     if (endchar_op != OpCode_Invalid)
831       for (; last < num_glyphs; last++)
832       {
833 	// Hack to point vector to static string.
834 	auto &b = buffArray.arrayZ[last];
835 	b.length = 1;
836 	b.arrayZ = const_cast<unsigned char *>(endchar_str);
837       }
838 
839     return true;
840   }
841 
encode_subrsCFF::subr_subsetter_t842   bool encode_subrs (const parsed_cs_str_vec_t &subrs, const subr_remap_t& remap, unsigned int fd, str_buff_vec_t &buffArray) const
843   {
844     unsigned int  count = remap.get_population ();
845 
846     if (unlikely (!buffArray.resize_exact (count)))
847       return false;
848     for (unsigned int new_num = 0; new_num < count; new_num++)
849     {
850       hb_codepoint_t old_num = remap.backward (new_num);
851       assert (old_num != CFF_UNDEF_CODE);
852 
853       if (unlikely (!encode_str (subrs[old_num], fd, buffArray[new_num])))
854 	return false;
855     }
856     return true;
857   }
858 
encode_globalsubrsCFF::subr_subsetter_t859   bool encode_globalsubrs (str_buff_vec_t &buffArray)
860   {
861     return encode_subrs (*parsed_global_subrs, remaps.global_remap, 0, buffArray);
862   }
863 
encode_localsubrsCFF::subr_subsetter_t864   bool encode_localsubrs (unsigned int fd, str_buff_vec_t &buffArray) const
865   {
866     return encode_subrs ((*parsed_local_subrs)[fd], remaps.local_remaps[fd], fd, buffArray);
867   }
868 
869   protected:
870   struct drop_hints_param_t
871   {
drop_hints_param_tCFF::subr_subsetter_t::drop_hints_param_t872     drop_hints_param_t ()
873       : seen_moveto (false),
874 	ends_in_hint (false),
875 	all_dropped (false),
876 	vsindex_dropped (false) {}
877 
878     bool  seen_moveto;
879     bool  ends_in_hint;
880     bool  all_dropped;
881     bool  vsindex_dropped;
882   };
883 
drop_hints_in_subrCFF::subr_subsetter_t884   bool drop_hints_in_subr (parsed_cs_str_t &str, unsigned int pos,
885 			   parsed_cs_str_vec_t &subrs, unsigned int subr_num,
886 			   const subr_subset_param_t &param, drop_hints_param_t &drop)
887   {
888     drop.ends_in_hint = false;
889     bool has_hint = drop_hints_in_str (subrs[subr_num], param, drop);
890 
891     /* if this subr ends with a stem hint (i.e., not a number; potential argument for moveto),
892      * then this entire subroutine must be a hint. drop its call. */
893     if (drop.ends_in_hint)
894     {
895       str.values[pos].set_hinting ();
896       /* if this subr call is at the end of the parent subr, propagate the flag
897        * otherwise reset the flag */
898       if (!str.at_end (pos))
899 	drop.ends_in_hint = false;
900     }
901     else if (drop.all_dropped)
902     {
903       str.values[pos].set_hinting ();
904     }
905 
906     return has_hint;
907   }
908 
909   /* returns true if it sees a hint op before the first moveto */
drop_hints_in_strCFF::subr_subsetter_t910   bool drop_hints_in_str (parsed_cs_str_t &str, const subr_subset_param_t &param, drop_hints_param_t &drop)
911   {
912     bool  seen_hint = false;
913 
914     unsigned count = str.values.length;
915     auto *values = str.values.arrayZ;
916     for (unsigned int pos = 0; pos < count; pos++)
917     {
918       bool  has_hint = false;
919       switch (values[pos].op)
920       {
921 	case OpCode_callsubr:
922 	  has_hint = drop_hints_in_subr (str, pos,
923 					*param.parsed_local_subrs, values[pos].subr_num,
924 					param, drop);
925 	  break;
926 
927 	case OpCode_callgsubr:
928 	  has_hint = drop_hints_in_subr (str, pos,
929 					*param.parsed_global_subrs, values[pos].subr_num,
930 					param, drop);
931 	  break;
932 
933 	case OpCode_rmoveto:
934 	case OpCode_hmoveto:
935 	case OpCode_vmoveto:
936 	  drop.seen_moveto = true;
937 	  break;
938 
939 	case OpCode_hintmask:
940 	case OpCode_cntrmask:
941 	  if (drop.seen_moveto)
942 	  {
943 	    values[pos].set_hinting ();
944 	    break;
945 	  }
946 	  HB_FALLTHROUGH;
947 
948 	case OpCode_hstemhm:
949 	case OpCode_vstemhm:
950 	case OpCode_hstem:
951 	case OpCode_vstem:
952 	  has_hint = true;
953 	  values[pos].set_hinting ();
954 	  if (str.at_end (pos))
955 	    drop.ends_in_hint = true;
956 	  break;
957 
958 	case OpCode_dotsection:
959 	  values[pos].set_hinting ();
960 	  break;
961 
962 	default:
963 	  /* NONE */
964 	  break;
965       }
966       if (has_hint)
967       {
968 	for (int i = pos - 1; i >= 0; i--)
969 	{
970 	  parsed_cs_op_t  &csop = values[(unsigned)i];
971 	  if (csop.is_hinting ())
972 	    break;
973 	  csop.set_hinting ();
974 	  if (csop.op == OpCode_vsindexcs)
975 	    drop.vsindex_dropped = true;
976 	}
977 	seen_hint |= has_hint;
978       }
979     }
980 
981     /* Raise all_dropped flag if all operators except return are dropped from a subr.
982      * It may happen even after seeing the first moveto if a subr contains
983      * only (usually one) hintmask operator, then calls to this subr can be dropped.
984      */
985     drop.all_dropped = true;
986     for (unsigned int pos = 0; pos < count; pos++)
987     {
988       parsed_cs_op_t  &csop = values[pos];
989       if (csop.op == OpCode_return)
990 	break;
991       if (!csop.is_hinting ())
992       {
993 	drop.all_dropped = false;
994 	break;
995       }
996     }
997 
998     return seen_hint;
999   }
1000 
closure_subroutinesCFF::subr_subsetter_t1001   bool closure_subroutines (const parsed_cs_str_vec_t& global_subrs,
1002                             const hb_vector_t<parsed_cs_str_vec_t>& local_subrs)
1003   {
1004     closures.reset ();
1005     for (auto _ : plan->new_to_old_gid_list)
1006     {
1007       hb_codepoint_t new_glyph = _.first;
1008       hb_codepoint_t old_glyph = _.second;
1009       unsigned int fd = acc.fdSelect->get_fd (old_glyph);
1010       if (unlikely (fd >= acc.fdCount))
1011         return false;
1012 
1013       // Note: const cast is safe here because the collect_subr_refs_in_str only performs a
1014       //       closure and does not modify any of the charstrings.
1015       subr_subset_param_t  param (const_cast<parsed_cs_str_t*> (&get_parsed_charstring (new_glyph)),
1016                                   const_cast<parsed_cs_str_vec_t*> (&global_subrs),
1017                                   const_cast<parsed_cs_str_vec_t*> (&local_subrs[fd]),
1018                                   &closures.global_closure,
1019                                   &closures.local_closures[fd],
1020                                   plan->flags & HB_SUBSET_FLAGS_NO_HINTING);
1021       collect_subr_refs_in_str (get_parsed_charstring (new_glyph), param);
1022     }
1023 
1024     return true;
1025   }
1026 
collect_subr_refs_in_subrCFF::subr_subsetter_t1027   void collect_subr_refs_in_subr (unsigned int subr_num, parsed_cs_str_vec_t &subrs,
1028 				  hb_set_t *closure,
1029 				  const subr_subset_param_t &param)
1030   {
1031     if (closure->has (subr_num))
1032       return;
1033     closure->add (subr_num);
1034     collect_subr_refs_in_str (subrs[subr_num], param);
1035   }
1036 
collect_subr_refs_in_strCFF::subr_subsetter_t1037   void collect_subr_refs_in_str (const parsed_cs_str_t &str,
1038                                  const subr_subset_param_t &param)
1039   {
1040     if (!str.has_calls ())
1041       return;
1042 
1043     for (auto &opstr : str.values)
1044     {
1045       if (!param.drop_hints || !opstr.is_hinting ())
1046       {
1047 	switch (opstr.op)
1048 	{
1049 	  case OpCode_callsubr:
1050 	    collect_subr_refs_in_subr (opstr.subr_num, *param.parsed_local_subrs,
1051 				       param.local_closure, param);
1052 	    break;
1053 
1054 	  case OpCode_callgsubr:
1055 	    collect_subr_refs_in_subr (opstr.subr_num, *param.parsed_global_subrs,
1056 				       param.global_closure, param);
1057 	    break;
1058 
1059 	  default: break;
1060 	}
1061       }
1062     }
1063   }
1064 
encode_strCFF::subr_subsetter_t1065   bool encode_str (const parsed_cs_str_t &str, const unsigned int fd, str_buff_t &buff, bool encode_prefix = true) const
1066   {
1067     str_encoder_t  encoder (buff);
1068     encoder.reset ();
1069     bool hinting = !(plan->flags & HB_SUBSET_FLAGS_NO_HINTING);
1070     /* if a prefix (CFF1 width or CFF2 vsindex) has been removed along with hints,
1071      * re-insert it at the beginning of charstreing */
1072     if (encode_prefix && str.has_prefix () && !hinting && str.is_hint_dropped ())
1073     {
1074       encoder.encode_num_cs (str.prefix_num ());
1075       if (str.prefix_op () != OpCode_Invalid)
1076 	encoder.encode_op (str.prefix_op ());
1077     }
1078 
1079     unsigned size = 0;
1080     for (auto &opstr : str.values)
1081     {
1082       size += opstr.length;
1083       if (opstr.op == OpCode_callsubr || opstr.op == OpCode_callgsubr)
1084         size += 3;
1085     }
1086     if (!buff.alloc (buff.length + size, true))
1087       return false;
1088 
1089     for (auto &opstr : str.values)
1090     {
1091       if (hinting || !opstr.is_hinting ())
1092       {
1093 	switch (opstr.op)
1094 	{
1095 	  case OpCode_callsubr:
1096 	    encoder.encode_int (remaps.local_remaps[fd].biased_num (opstr.subr_num));
1097 	    encoder.copy_str (opstr.ptr, opstr.length);
1098 	    break;
1099 
1100 	  case OpCode_callgsubr:
1101 	    encoder.encode_int (remaps.global_remap.biased_num (opstr.subr_num));
1102 	    encoder.copy_str (opstr.ptr, opstr.length);
1103 	    break;
1104 
1105 	  default:
1106 	    encoder.copy_str (opstr.ptr, opstr.length);
1107 	    break;
1108 	}
1109       }
1110     }
1111     return !encoder.in_error ();
1112   }
1113 
compact_parsed_subrsCFF::subr_subsetter_t1114   void compact_parsed_subrs () const
1115   {
1116     for (auto &cs : parsed_global_subrs_storage)
1117       cs.compact ();
1118     for (auto &vec : parsed_local_subrs_storage)
1119       for (auto &cs : vec)
1120 	cs.compact ();
1121   }
1122 
populate_subset_acceleratorCFF::subr_subsetter_t1123   void populate_subset_accelerator () const
1124   {
1125     if (!plan->inprogress_accelerator) return;
1126 
1127     compact_parsed_subrs ();
1128 
1129     acc.cff_accelerator =
1130         cff_subset_accelerator_t::create(acc.blob,
1131                                          parsed_charstrings,
1132                                          parsed_global_subrs_storage,
1133                                          parsed_local_subrs_storage);
1134   }
1135 
get_parsed_charstringCFF::subr_subsetter_t1136   const parsed_cs_str_t& get_parsed_charstring (unsigned i) const
1137   {
1138     if (cached_charstrings) return *(cached_charstrings[i]);
1139     return parsed_charstrings[i];
1140   }
1141 
1142   protected:
1143   const ACC			&acc;
1144   const hb_subset_plan_t	*plan;
1145 
1146   subr_closures_t		closures;
1147 
1148   hb_vector_t<const parsed_cs_str_t*>     cached_charstrings;
1149   const parsed_cs_str_vec_t*              parsed_global_subrs;
1150   const hb_vector_t<parsed_cs_str_vec_t>* parsed_local_subrs;
1151 
1152   subr_remaps_t			remaps;
1153 
1154   private:
1155 
1156   parsed_cs_str_vec_t		parsed_charstrings;
1157   parsed_cs_str_vec_t		parsed_global_subrs_storage;
1158   hb_vector_t<parsed_cs_str_vec_t>  parsed_local_subrs_storage;
1159   typedef typename SUBRS::count_type subr_count_type;
1160 };
1161 
1162 } /* namespace CFF */
1163 
1164 HB_INTERNAL bool
1165 hb_plan_subset_cff_fdselect (const hb_subset_plan_t *plan,
1166 			    unsigned int fdCount,
1167 			    const CFF::FDSelect &src, /* IN */
1168 			    unsigned int &subset_fd_count /* OUT */,
1169 			    unsigned int &subset_fdselect_size /* OUT */,
1170 			    unsigned int &subset_fdselect_format /* OUT */,
1171 			    hb_vector_t<CFF::code_pair_t> &fdselect_ranges /* OUT */,
1172 			    hb_inc_bimap_t &fdmap /* OUT */);
1173 
1174 HB_INTERNAL bool
1175 hb_serialize_cff_fdselect (hb_serialize_context_t *c,
1176 			  unsigned int num_glyphs,
1177 			  const CFF::FDSelect &src,
1178 			  unsigned int fd_count,
1179 			  unsigned int fdselect_format,
1180 			  unsigned int size,
1181 			  const hb_vector_t<CFF::code_pair_t> &fdselect_ranges);
1182 
1183 #endif /* HB_SUBSET_CFF_COMMON_HH */
1184