• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Garret Rieger
25  */
26 
27 #ifndef HB_REPACKER_HH
28 #define HB_REPACKER_HH
29 
30 #include "hb-open-type.hh"
31 #include "hb-map.hh"
32 #include "hb-priority-queue.hh"
33 #include "hb-serialize.hh"
34 #include "hb-vector.hh"
35 
36 /*
37  * For a detailed writeup on the overflow resolution algorithm see:
38  * docs/repacker.md
39  */
40 
41 struct graph_t
42 {
43   struct vertex_t
44   {
vertex_tgraph_t::vertex_t45     vertex_t () :
46         distance (0),
47         space (0),
48         parents (),
49         start (0),
50         end (0),
51         priority(0) {}
52 
finigraph_t::vertex_t53     void fini () {
54       obj.fini ();
55       parents.fini ();
56     }
57 
58     hb_serialize_context_t::object_t obj;
59     int64_t distance;
60     int64_t space;
61     hb_vector_t<unsigned> parents;
62     unsigned start;
63     unsigned end;
64     unsigned priority;
65 
is_sharedgraph_t::vertex_t66     bool is_shared () const
67     {
68       return parents.length > 1;
69     }
70 
incoming_edgesgraph_t::vertex_t71     unsigned incoming_edges () const
72     {
73       return parents.length;
74     }
75 
remove_parentgraph_t::vertex_t76     void remove_parent (unsigned parent_index)
77     {
78       for (unsigned i = 0; i < parents.length; i++)
79       {
80         if (parents[i] != parent_index) continue;
81         parents.remove (i);
82         break;
83       }
84     }
85 
remap_parentsgraph_t::vertex_t86     void remap_parents (const hb_vector_t<unsigned>& id_map)
87     {
88       for (unsigned i = 0; i < parents.length; i++)
89         parents[i] = id_map[parents[i]];
90     }
91 
remap_parentgraph_t::vertex_t92     void remap_parent (unsigned old_index, unsigned new_index)
93     {
94       for (unsigned i = 0; i < parents.length; i++)
95       {
96         if (parents[i] == old_index)
97           parents[i] = new_index;
98       }
99     }
100 
is_leafgraph_t::vertex_t101     bool is_leaf () const
102     {
103       return !obj.links.length;
104     }
105 
raise_prioritygraph_t::vertex_t106     void raise_priority ()
107     {
108       priority++;
109     }
110 
modified_distancegraph_t::vertex_t111     int64_t modified_distance (unsigned order) const
112     {
113       // TODO(garretrieger): once priority is high enough, should try
114       // setting distance = 0 which will force to sort immediately after
115       // it's parent where possible.
116 
117       int64_t modified_distance =
118           hb_min (hb_max(distance + distance_modifier (), 0), 0x7FFFFFFFFF);
119       return (modified_distance << 22) | (0x003FFFFF & order);
120     }
121 
distance_modifiergraph_t::vertex_t122     int64_t distance_modifier () const
123     {
124       if (!priority) return 0;
125       int64_t table_size = obj.tail - obj.head;
126       return -(table_size - table_size / (1 << hb_min(priority, 16u)));
127     }
128   };
129 
130   struct overflow_record_t
131   {
132     unsigned parent;
133     unsigned child;
134   };
135 
136   /*
137    * A topological sorting of an object graph. Ordered
138    * in reverse serialization order (first object in the
139    * serialization is at the end of the list). This matches
140    * the 'packed' object stack used internally in the
141    * serializer
142    */
graph_tgraph_t143   graph_t (const hb_vector_t<hb_serialize_context_t::object_t *>& objects)
144       : parents_invalid (true),
145         distance_invalid (true),
146         positions_invalid (true),
147         successful (true)
148   {
149     num_roots_for_space_.push (1);
150     bool removed_nil = false;
151     for (unsigned i = 0; i < objects.length; i++)
152     {
153       // TODO(grieger): check all links point to valid objects.
154 
155       // If this graph came from a serialization buffer object 0 is the
156       // nil object. We don't need it for our purposes here so drop it.
157       if (i == 0 && !objects[i])
158       {
159         removed_nil = true;
160         continue;
161       }
162 
163       vertex_t* v = vertices_.push ();
164       if (check_success (!vertices_.in_error ()))
165         v->obj = *objects[i];
166       if (!removed_nil) continue;
167       for (unsigned i = 0; i < v->obj.links.length; i++)
168         // Fix indices to account for removed nil object.
169         v->obj.links[i].objidx--;
170     }
171   }
172 
~graph_tgraph_t173   ~graph_t ()
174   {
175     vertices_.fini_deep ();
176   }
177 
in_errorgraph_t178   bool in_error () const
179   {
180     return !successful ||
181         vertices_.in_error () ||
182         num_roots_for_space_.in_error ();
183   }
184 
rootgraph_t185   const vertex_t& root () const
186   {
187     return vertices_[root_idx ()];
188   }
189 
root_idxgraph_t190   unsigned root_idx () const
191   {
192     // Object graphs are in reverse order, the first object is at the end
193     // of the vector. Since the graph is topologically sorted it's safe to
194     // assume the first object has no incoming edges.
195     return vertices_.length - 1;
196   }
197 
objectgraph_t198   const hb_serialize_context_t::object_t& object(unsigned i) const
199   {
200     return vertices_[i].obj;
201   }
202 
203   /*
204    * serialize graph into the provided serialization buffer.
205    */
serializegraph_t206   void serialize (hb_serialize_context_t* c) const
207   {
208     c->start_serialize<void> ();
209     for (unsigned i = 0; i < vertices_.length; i++) {
210       c->push ();
211 
212       size_t size = vertices_[i].obj.tail - vertices_[i].obj.head;
213       char* start = c->allocate_size <char> (size);
214       if (!start) return;
215 
216       memcpy (start, vertices_[i].obj.head, size);
217 
218       for (const auto& link : vertices_[i].obj.links)
219         serialize_link (link, start, c);
220 
221       // All duplications are already encoded in the graph, so don't
222       // enable sharing during packing.
223       c->pop_pack (false);
224     }
225     c->end_serialize ();
226   }
227 
228   /*
229    * Generates a new topological sorting of graph using Kahn's
230    * algorithm: https://en.wikipedia.org/wiki/Topological_sorting#Algorithms
231    */
sort_kahngraph_t232   void sort_kahn ()
233   {
234     positions_invalid = true;
235 
236     if (vertices_.length <= 1) {
237       // Graph of 1 or less doesn't need sorting.
238       return;
239     }
240 
241     hb_vector_t<unsigned> queue;
242     hb_vector_t<vertex_t> sorted_graph;
243     if (unlikely (!check_success (sorted_graph.resize (vertices_.length)))) return;
244     hb_vector_t<unsigned> id_map;
245     if (unlikely (!check_success (id_map.resize (vertices_.length)))) return;
246 
247     hb_vector_t<unsigned> removed_edges;
248     if (unlikely (!check_success (removed_edges.resize (vertices_.length)))) return;
249     update_parents ();
250 
251     queue.push (root_idx ());
252     int new_id = vertices_.length - 1;
253 
254     while (!queue.in_error () && queue.length)
255     {
256       unsigned next_id = queue[0];
257       queue.remove (0);
258 
259       vertex_t& next = vertices_[next_id];
260       sorted_graph[new_id] = next;
261       id_map[next_id] = new_id--;
262 
263       for (const auto& link : next.obj.links) {
264         removed_edges[link.objidx]++;
265         if (!(vertices_[link.objidx].incoming_edges () - removed_edges[link.objidx]))
266           queue.push (link.objidx);
267       }
268     }
269 
270     check_success (!queue.in_error ());
271     check_success (!sorted_graph.in_error ());
272     if (!check_success (new_id == -1))
273       print_orphaned_nodes ();
274 
275     remap_all_obj_indices (id_map, &sorted_graph);
276 
277     hb_swap (vertices_, sorted_graph);
278     sorted_graph.fini_deep ();
279   }
280 
281   /*
282    * Generates a new topological sorting of graph ordered by the shortest
283    * distance to each node.
284    */
sort_shortest_distancegraph_t285   void sort_shortest_distance ()
286   {
287     positions_invalid = true;
288 
289     if (vertices_.length <= 1) {
290       // Graph of 1 or less doesn't need sorting.
291       return;
292     }
293 
294     update_distances ();
295 
296     hb_priority_queue_t queue;
297     hb_vector_t<vertex_t> sorted_graph;
298     if (unlikely (!check_success (sorted_graph.resize (vertices_.length)))) return;
299     hb_vector_t<unsigned> id_map;
300     if (unlikely (!check_success (id_map.resize (vertices_.length)))) return;
301 
302     hb_vector_t<unsigned> removed_edges;
303     if (unlikely (!check_success (removed_edges.resize (vertices_.length)))) return;
304     update_parents ();
305 
306     queue.insert (root ().modified_distance (0), root_idx ());
307     int new_id = root_idx ();
308     unsigned order = 1;
309     while (!queue.in_error () && !queue.is_empty ())
310     {
311       unsigned next_id = queue.pop_minimum().second;
312 
313       vertex_t& next = vertices_[next_id];
314       sorted_graph[new_id] = next;
315       id_map[next_id] = new_id--;
316 
317       for (const auto& link : next.obj.links) {
318         removed_edges[link.objidx]++;
319         if (!(vertices_[link.objidx].incoming_edges () - removed_edges[link.objidx]))
320           // Add the order that the links were encountered to the priority.
321           // This ensures that ties between priorities objects are broken in a consistent
322           // way. More specifically this is set up so that if a set of objects have the same
323           // distance they'll be added to the topological order in the order that they are
324           // referenced from the parent object.
325           queue.insert (vertices_[link.objidx].modified_distance (order++),
326                         link.objidx);
327       }
328     }
329 
330     check_success (!queue.in_error ());
331     check_success (!sorted_graph.in_error ());
332     if (!check_success (new_id == -1))
333       print_orphaned_nodes ();
334 
335     remap_all_obj_indices (id_map, &sorted_graph);
336 
337     hb_swap (vertices_, sorted_graph);
338     sorted_graph.fini_deep ();
339   }
340 
341   /*
342    * Assign unique space numbers to each connected subgraph of 32 bit offset(s).
343    */
assign_32bit_spacesgraph_t344   bool assign_32bit_spaces ()
345   {
346     unsigned root_index = root_idx ();
347     hb_set_t visited;
348     hb_set_t roots;
349     for (unsigned i = 0; i <= root_index; i++)
350     {
351       for (auto& l : vertices_[i].obj.links)
352       {
353         if (l.width == 4 && !l.is_signed)
354         {
355           roots.add (l.objidx);
356           find_subgraph (l.objidx, visited);
357         }
358       }
359     }
360 
361     // Mark everything not in the subgraphs of 32 bit roots as visited.
362     // This prevents 32 bit subgraphs from being connected via nodes not in the 32 bit subgraphs.
363     visited.invert ();
364 
365     if (!roots) return false;
366 
367     while (roots)
368     {
369       unsigned next = HB_SET_VALUE_INVALID;
370       if (!roots.next (&next)) break;
371 
372       hb_set_t connected_roots;
373       find_connected_nodes (next, roots, visited, connected_roots);
374       isolate_subgraph (connected_roots);
375 
376       unsigned next_space = this->next_space ();
377       num_roots_for_space_.push (0);
378       for (unsigned root : connected_roots)
379       {
380         DEBUG_MSG (SUBSET_REPACK, nullptr, "Subgraph %u gets space %u", root, next_space);
381         vertices_[root].space = next_space;
382         num_roots_for_space_[next_space] = num_roots_for_space_[next_space] + 1;
383         distance_invalid = true;
384         positions_invalid = true;
385       }
386 
387       // TODO(grieger): special case for GSUB/GPOS use extension promotions to move 16 bit space
388       //                into the 32 bit space as needed, instead of using isolation.
389     }
390 
391     return true;
392   }
393 
394   /*
395    * Isolates the subgraph of nodes reachable from root. Any links to nodes in the subgraph
396    * that originate from outside of the subgraph will be removed by duplicating the linked to
397    * object.
398    *
399    * Indices stored in roots will be updated if any of the roots are duplicated to new indices.
400    */
isolate_subgraphgraph_t401   bool isolate_subgraph (hb_set_t& roots)
402   {
403     update_parents ();
404     hb_hashmap_t<unsigned, unsigned> subgraph;
405 
406     // incoming edges to root_idx should be all 32 bit in length so we don't need to de-dup these
407     // set the subgraph incoming edge count to match all of root_idx's incoming edges
408     hb_set_t parents;
409     for (unsigned root_idx : roots)
410     {
411       subgraph.set (root_idx, wide_parents (root_idx, parents));
412       find_subgraph (root_idx, subgraph);
413     }
414 
415     unsigned original_root_idx = root_idx ();
416     hb_hashmap_t<unsigned, unsigned> index_map;
417     bool made_changes = false;
418     for (auto entry : subgraph.iter ())
419     {
420       const auto& node = vertices_[entry.first];
421       unsigned subgraph_incoming_edges = entry.second;
422 
423       if (subgraph_incoming_edges < node.incoming_edges ())
424       {
425         // Only  de-dup objects with incoming links from outside the subgraph.
426         made_changes = true;
427         duplicate_subgraph (entry.first, index_map);
428       }
429     }
430 
431     if (!made_changes)
432       return false;
433 
434     if (original_root_idx != root_idx ()
435         && parents.has (original_root_idx))
436     {
437       // If the root idx has changed since parents was determined, update root idx in parents
438       parents.add (root_idx ());
439       parents.del (original_root_idx);
440     }
441 
442     auto new_subgraph =
443         + subgraph.keys ()
444         | hb_map([&] (unsigned node_idx) {
445           if (index_map.has (node_idx)) return index_map[node_idx];
446           return node_idx;
447         })
448         ;
449 
450     remap_obj_indices (index_map, new_subgraph);
451     remap_obj_indices (index_map, parents.iter (), true);
452 
453     // Update roots set with new indices as needed.
454     unsigned next = HB_SET_VALUE_INVALID;
455     while (roots.next (&next))
456     {
457       if (index_map.has (next))
458       {
459         roots.del (next);
460         roots.add (index_map[next]);
461       }
462     }
463 
464     return true;
465   }
466 
find_subgraphgraph_t467   void find_subgraph (unsigned node_idx, hb_hashmap_t<unsigned, unsigned>& subgraph)
468   {
469     for (const auto& link : vertices_[node_idx].obj.links)
470     {
471       if (subgraph.has (link.objidx))
472       {
473         subgraph.set (link.objidx, subgraph[link.objidx] + 1);
474         continue;
475       }
476       subgraph.set (link.objidx, 1);
477       find_subgraph (link.objidx, subgraph);
478     }
479   }
480 
find_subgraphgraph_t481   void find_subgraph (unsigned node_idx, hb_set_t& subgraph)
482   {
483     if (subgraph.has (node_idx)) return;
484     subgraph.add (node_idx);
485     for (const auto& link : vertices_[node_idx].obj.links)
486       find_subgraph (link.objidx, subgraph);
487   }
488 
489   /*
490    * duplicates all nodes in the subgraph reachable from node_idx. Does not re-assign
491    * links. index_map is updated with mappings from old id to new id. If a duplication has already
492    * been performed for a given index, then it will be skipped.
493    */
duplicate_subgraphgraph_t494   void duplicate_subgraph (unsigned node_idx, hb_hashmap_t<unsigned, unsigned>& index_map)
495   {
496     if (index_map.has (node_idx))
497       return;
498 
499     index_map.set (node_idx, duplicate (node_idx));
500     for (const auto& l : object (node_idx).links) {
501       duplicate_subgraph (l.objidx, index_map);
502     }
503   }
504 
505   /*
506    * Creates a copy of node_idx and returns it's new index.
507    */
duplicategraph_t508   unsigned duplicate (unsigned node_idx)
509   {
510     positions_invalid = true;
511     distance_invalid = true;
512 
513     auto* clone = vertices_.push ();
514     auto& child = vertices_[node_idx];
515     if (vertices_.in_error ()) {
516       return -1;
517     }
518 
519     clone->obj.head = child.obj.head;
520     clone->obj.tail = child.obj.tail;
521     clone->distance = child.distance;
522     clone->space = child.space;
523     clone->parents.reset ();
524 
525     unsigned clone_idx = vertices_.length - 2;
526     for (const auto& l : child.obj.links)
527     {
528       clone->obj.links.push (l);
529       vertices_[l.objidx].parents.push (clone_idx);
530     }
531 
532     check_success (!clone->obj.links.in_error ());
533 
534     // The last object is the root of the graph, so swap back the root to the end.
535     // The root's obj idx does change, however since it's root nothing else refers to it.
536     // all other obj idx's will be unaffected.
537     vertex_t root = vertices_[vertices_.length - 2];
538     vertices_[clone_idx] = *clone;
539     vertices_[vertices_.length - 1] = root;
540 
541     // Since the root moved, update the parents arrays of all children on the root.
542     for (const auto& l : root.obj.links)
543       vertices_[l.objidx].remap_parent (root_idx () - 1, root_idx ());
544 
545     return clone_idx;
546   }
547 
548   /*
549    * Creates a copy of child and re-assigns the link from
550    * parent to the clone. The copy is a shallow copy, objects
551    * linked from child are not duplicated.
552    */
duplicategraph_t553   bool duplicate (unsigned parent_idx, unsigned child_idx)
554   {
555     update_parents ();
556 
557     unsigned links_to_child = 0;
558     for (const auto& l : vertices_[parent_idx].obj.links)
559     {
560       if (l.objidx == child_idx) links_to_child++;
561     }
562 
563     if (vertices_[child_idx].incoming_edges () <= links_to_child)
564     {
565       // Can't duplicate this node, doing so would orphan the original one as all remaining links
566       // to child are from parent.
567       DEBUG_MSG (SUBSET_REPACK, nullptr, "  Not duplicating %d => %d",
568                  parent_idx, child_idx);
569       return false;
570     }
571 
572     DEBUG_MSG (SUBSET_REPACK, nullptr, "  Duplicating %d => %d",
573                parent_idx, child_idx);
574 
575     unsigned clone_idx = duplicate (child_idx);
576     if (clone_idx == (unsigned) -1) return false;
577     // duplicate shifts the root node idx, so if parent_idx was root update it.
578     if (parent_idx == clone_idx) parent_idx++;
579 
580     auto& parent = vertices_[parent_idx];
581     for (unsigned i = 0; i < parent.obj.links.length; i++)
582     {
583       auto& l = parent.obj.links[i];
584       if (l.objidx != child_idx)
585         continue;
586 
587       reassign_link (l, parent_idx, clone_idx);
588     }
589 
590     return true;
591   }
592 
593   /*
594    * Raises the sorting priority of all children.
595    */
raise_childrens_prioritygraph_t596   void raise_childrens_priority (unsigned parent_idx)
597   {
598     DEBUG_MSG (SUBSET_REPACK, nullptr, "  Raising priority of all children of %d",
599                parent_idx);
600     // This operation doesn't change ordering until a sort is run, so no need
601     // to invalidate positions. It does not change graph structure so no need
602     // to update distances or edge counts.
603     auto& parent = vertices_[parent_idx].obj;
604     for (unsigned i = 0; i < parent.links.length; i++)
605       vertices_[parent.links[i].objidx].raise_priority ();
606   }
607 
608   /*
609    * Will any offsets overflow on graph when it's serialized?
610    */
will_overflowgraph_t611   bool will_overflow (hb_vector_t<overflow_record_t>* overflows = nullptr)
612   {
613     if (overflows) overflows->resize (0);
614     update_positions ();
615 
616     for (int parent_idx = vertices_.length - 1; parent_idx >= 0; parent_idx--)
617     {
618       for (const auto& link : vertices_[parent_idx].obj.links)
619       {
620         int64_t offset = compute_offset (parent_idx, link);
621         if (is_valid_offset (offset, link))
622           continue;
623 
624         if (!overflows) return true;
625 
626         overflow_record_t r;
627         r.parent = parent_idx;
628         r.child = link.objidx;
629         overflows->push (r);
630       }
631     }
632 
633     if (!overflows) return false;
634     return overflows->length;
635   }
636 
print_orphaned_nodesgraph_t637   void print_orphaned_nodes ()
638   {
639     if (!DEBUG_ENABLED(SUBSET_REPACK)) return;
640 
641     DEBUG_MSG (SUBSET_REPACK, nullptr, "Graph is not fully connected.");
642     parents_invalid = true;
643     update_parents();
644 
645     for (unsigned i = 0; i < root_idx (); i++)
646     {
647       const auto& v = vertices_[i];
648       if (!v.parents)
649         DEBUG_MSG (SUBSET_REPACK, nullptr, "Node %u is orphaned.", i);
650     }
651   }
652 
print_overflowsgraph_t653   void print_overflows (const hb_vector_t<overflow_record_t>& overflows)
654   {
655     if (!DEBUG_ENABLED(SUBSET_REPACK)) return;
656 
657     update_parents ();
658     for (const auto& o : overflows)
659     {
660       const auto& parent = vertices_[o.parent];
661       const auto& child = vertices_[o.child];
662       DEBUG_MSG (SUBSET_REPACK, nullptr,
663                  "  overflow from "
664                  "%4d (%4d in, %4d out, space %2d) => "
665                  "%4d (%4d in, %4d out, space %2d)",
666                  o.parent,
667                  parent.incoming_edges (),
668                  parent.obj.links.length,
669                  space_for (o.parent),
670                  o.child,
671                  child.incoming_edges (),
672                  child.obj.links.length,
673                  space_for (o.child));
674     }
675   }
676 
num_roots_for_spacegraph_t677   unsigned num_roots_for_space (unsigned space) const
678   {
679     return num_roots_for_space_[space];
680   }
681 
next_spacegraph_t682   unsigned next_space () const
683   {
684     return num_roots_for_space_.length;
685   }
686 
move_to_new_spacegraph_t687   void move_to_new_space (unsigned index)
688   {
689     auto& node = vertices_[index];
690     num_roots_for_space_.push (1);
691     num_roots_for_space_[node.space] = num_roots_for_space_[node.space] - 1;
692     node.space = num_roots_for_space_.length - 1;
693   }
694 
space_forgraph_t695   unsigned space_for (unsigned index, unsigned* root = nullptr) const
696   {
697     const auto& node = vertices_[index];
698     if (node.space)
699     {
700       if (root != nullptr)
701         *root = index;
702       return node.space;
703     }
704 
705     if (!node.parents)
706     {
707       if (root)
708         *root = index;
709       return 0;
710     }
711 
712     return space_for (node.parents[0], root);
713   }
714 
err_other_errorgraph_t715   void err_other_error () { this->successful = false; }
716 
717  private:
718 
719   /*
720    * Returns the numbers of incoming edges that are 32bits wide.
721    */
wide_parentsgraph_t722   unsigned wide_parents (unsigned node_idx, hb_set_t& parents) const
723   {
724     unsigned count = 0;
725     hb_set_t visited;
726     for (unsigned p : vertices_[node_idx].parents)
727     {
728       if (visited.has (p)) continue;
729       visited.add (p);
730 
731       for (const auto& l : vertices_[p].obj.links)
732       {
733         if (l.objidx == node_idx && l.width == 4 && !l.is_signed)
734         {
735           count++;
736           parents.add (p);
737         }
738       }
739     }
740     return count;
741   }
742 
check_successgraph_t743   bool check_success (bool success)
744   { return this->successful && (success || (err_other_error (), false)); }
745 
746   /*
747    * Creates a map from objid to # of incoming edges.
748    */
update_parentsgraph_t749   void update_parents ()
750   {
751     if (!parents_invalid) return;
752 
753     for (unsigned i = 0; i < vertices_.length; i++)
754       vertices_[i].parents.reset ();
755 
756     for (unsigned p = 0; p < vertices_.length; p++)
757     {
758       for (auto& l : vertices_[p].obj.links)
759       {
760         vertices_[l.objidx].parents.push (p);
761       }
762     }
763 
764     parents_invalid = false;
765   }
766 
767   /*
768    * compute the serialized start and end positions for each vertex.
769    */
update_positionsgraph_t770   void update_positions ()
771   {
772     if (!positions_invalid) return;
773 
774     unsigned current_pos = 0;
775     for (int i = root_idx (); i >= 0; i--)
776     {
777       auto& v = vertices_[i];
778       v.start = current_pos;
779       current_pos += v.obj.tail - v.obj.head;
780       v.end = current_pos;
781     }
782 
783     positions_invalid = false;
784   }
785 
786   /*
787    * Finds the distance to each object in the graph
788    * from the initial node.
789    */
update_distancesgraph_t790   void update_distances ()
791   {
792     if (!distance_invalid) return;
793 
794     // Uses Dijkstra's algorithm to find all of the shortest distances.
795     // https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
796     //
797     // Implementation Note:
798     // Since our priority queue doesn't support fast priority decreases
799     // we instead just add new entries into the queue when a priority changes.
800     // Redundant ones are filtered out later on by the visited set.
801     // According to https://www3.cs.stonybrook.edu/~rezaul/papers/TR-07-54.pdf
802     // for practical performance this is faster then using a more advanced queue
803     // (such as a fibonaacci queue) with a fast decrease priority.
804     for (unsigned i = 0; i < vertices_.length; i++)
805     {
806       if (i == vertices_.length - 1)
807         vertices_[i].distance = 0;
808       else
809         vertices_[i].distance = hb_int_max (int64_t);
810     }
811 
812     hb_priority_queue_t queue;
813     queue.insert (0, vertices_.length - 1);
814 
815     hb_vector_t<bool> visited;
816     visited.resize (vertices_.length);
817 
818     while (!queue.in_error () && !queue.is_empty ())
819     {
820       unsigned next_idx = queue.pop_minimum ().second;
821       if (visited[next_idx]) continue;
822       const auto& next = vertices_[next_idx];
823       int64_t next_distance = vertices_[next_idx].distance;
824       visited[next_idx] = true;
825 
826       for (const auto& link : next.obj.links)
827       {
828         if (visited[link.objidx]) continue;
829 
830         const auto& child = vertices_[link.objidx].obj;
831         unsigned link_width = link.width ? link.width : 4; // treat virtual offsets as 32 bits wide
832         int64_t child_weight = (child.tail - child.head) +
833                                ((int64_t) 1 << (link_width * 8)) * (vertices_[link.objidx].space + 1);
834         int64_t child_distance = next_distance + child_weight;
835 
836         if (child_distance < vertices_[link.objidx].distance)
837         {
838           vertices_[link.objidx].distance = child_distance;
839           queue.insert (child_distance, link.objidx);
840         }
841       }
842     }
843 
844     check_success (!queue.in_error ());
845     if (!check_success (queue.is_empty ()))
846     {
847       print_orphaned_nodes ();
848       return;
849     }
850 
851     distance_invalid = false;
852   }
853 
compute_offsetgraph_t854   int64_t compute_offset (
855       unsigned parent_idx,
856       const hb_serialize_context_t::object_t::link_t& link) const
857   {
858     const auto& parent = vertices_[parent_idx];
859     const auto& child = vertices_[link.objidx];
860     int64_t offset = 0;
861     switch ((hb_serialize_context_t::whence_t) link.whence) {
862       case hb_serialize_context_t::whence_t::Head:
863         offset = child.start - parent.start; break;
864       case hb_serialize_context_t::whence_t::Tail:
865         offset = child.start - parent.end; break;
866       case hb_serialize_context_t::whence_t::Absolute:
867         offset = child.start; break;
868     }
869 
870     assert (offset >= link.bias);
871     offset -= link.bias;
872     return offset;
873   }
874 
is_valid_offsetgraph_t875   bool is_valid_offset (int64_t offset,
876                         const hb_serialize_context_t::object_t::link_t& link) const
877   {
878     if (unlikely (!link.width))
879       // Virtual links can't overflow.
880       return link.is_signed || offset >= 0;
881 
882     if (link.is_signed)
883     {
884       if (link.width == 4)
885         return offset >= -((int64_t) 1 << 31) && offset < ((int64_t) 1 << 31);
886       else
887         return offset >= -(1 << 15) && offset < (1 << 15);
888     }
889     else
890     {
891       if (link.width == 4)
892         return offset >= 0 && offset < ((int64_t) 1 << 32);
893       else if (link.width == 3)
894         return offset >= 0 && offset < ((int32_t) 1 << 24);
895       else
896         return offset >= 0 && offset < (1 << 16);
897     }
898   }
899 
900   /*
901    * Updates a link in the graph to point to a different object. Corrects the
902    * parents vector on the previous and new child nodes.
903    */
reassign_linkgraph_t904   void reassign_link (hb_serialize_context_t::object_t::link_t& link,
905                       unsigned parent_idx,
906                       unsigned new_idx)
907   {
908     unsigned old_idx = link.objidx;
909     link.objidx = new_idx;
910     vertices_[old_idx].remove_parent (parent_idx);
911     vertices_[new_idx].parents.push (parent_idx);
912   }
913 
914   /*
915    * Updates all objidx's in all links using the provided mapping. Corrects incoming edge counts.
916    */
917   template<typename Iterator, hb_requires (hb_is_iterator (Iterator))>
remap_obj_indicesgraph_t918   void remap_obj_indices (const hb_hashmap_t<unsigned, unsigned>& id_map,
919                           Iterator subgraph,
920                           bool only_wide = false)
921   {
922     if (!id_map) return;
923     for (unsigned i : subgraph)
924     {
925       for (unsigned j = 0; j < vertices_[i].obj.links.length; j++)
926       {
927         auto& link = vertices_[i].obj.links[j];
928         if (!id_map.has (link.objidx)) continue;
929         if (only_wide && !(link.width == 4 && !link.is_signed)) continue;
930 
931         reassign_link (link, i, id_map[link.objidx]);
932       }
933     }
934   }
935 
936   /*
937    * Updates all objidx's in all links using the provided mapping.
938    */
remap_all_obj_indicesgraph_t939   void remap_all_obj_indices (const hb_vector_t<unsigned>& id_map,
940                               hb_vector_t<vertex_t>* sorted_graph) const
941   {
942     for (unsigned i = 0; i < sorted_graph->length; i++)
943     {
944       (*sorted_graph)[i].remap_parents (id_map);
945       for (unsigned j = 0; j < (*sorted_graph)[i].obj.links.length; j++)
946       {
947         auto& link = (*sorted_graph)[i].obj.links[j];
948         link.objidx = id_map[link.objidx];
949       }
950     }
951   }
952 
953   template <typename O> void
serialize_link_of_typegraph_t954   serialize_link_of_type (const hb_serialize_context_t::object_t::link_t& link,
955                           char* head,
956                           hb_serialize_context_t* c) const
957   {
958     OT::Offset<O>* offset = reinterpret_cast<OT::Offset<O>*> (head + link.position);
959     *offset = 0;
960     c->add_link (*offset,
961                  // serializer has an extra nil object at the start of the
962                  // object array. So all id's are +1 of what our id's are.
963                  link.objidx + 1,
964                  (hb_serialize_context_t::whence_t) link.whence,
965                  link.bias);
966   }
967 
serialize_linkgraph_t968   void serialize_link (const hb_serialize_context_t::object_t::link_t& link,
969                  char* head,
970                  hb_serialize_context_t* c) const
971   {
972     switch (link.width)
973     {
974     case 0:
975       // Virtual links aren't serialized.
976       return;
977     case 4:
978       if (link.is_signed)
979       {
980         serialize_link_of_type<OT::HBINT32> (link, head, c);
981       } else {
982         serialize_link_of_type<OT::HBUINT32> (link, head, c);
983       }
984       return;
985     case 2:
986       if (link.is_signed)
987       {
988         serialize_link_of_type<OT::HBINT16> (link, head, c);
989       } else {
990         serialize_link_of_type<OT::HBUINT16> (link, head, c);
991       }
992       return;
993     case 3:
994       serialize_link_of_type<OT::HBUINT24> (link, head, c);
995       return;
996     default:
997       // Unexpected link width.
998       assert (0);
999     }
1000   }
1001 
1002   /*
1003    * Finds all nodes in targets that are reachable from start_idx, nodes in visited will be skipped.
1004    * For this search the graph is treated as being undirected.
1005    *
1006    * Connected targets will be added to connected and removed from targets. All visited nodes
1007    * will be added to visited.
1008    */
find_connected_nodesgraph_t1009   void find_connected_nodes (unsigned start_idx,
1010                              hb_set_t& targets,
1011                              hb_set_t& visited,
1012                              hb_set_t& connected)
1013   {
1014     if (visited.has (start_idx)) return;
1015     visited.add (start_idx);
1016 
1017     if (targets.has (start_idx))
1018     {
1019       targets.del (start_idx);
1020       connected.add (start_idx);
1021     }
1022 
1023     const auto& v = vertices_[start_idx];
1024 
1025     // Graph is treated as undirected so search children and parents of start_idx
1026     for (const auto& l : v.obj.links)
1027       find_connected_nodes (l.objidx, targets, visited, connected);
1028 
1029     for (unsigned p : v.parents)
1030       find_connected_nodes (p, targets, visited, connected);
1031   }
1032 
1033  public:
1034   // TODO(garretrieger): make private, will need to move most of offset overflow code into graph.
1035   hb_vector_t<vertex_t> vertices_;
1036  private:
1037   bool parents_invalid;
1038   bool distance_invalid;
1039   bool positions_invalid;
1040   bool successful;
1041   hb_vector_t<unsigned> num_roots_for_space_;
1042 };
1043 
_try_isolating_subgraphs(const hb_vector_t<graph_t::overflow_record_t> & overflows,graph_t & sorted_graph)1044 static bool _try_isolating_subgraphs (const hb_vector_t<graph_t::overflow_record_t>& overflows,
1045                                       graph_t& sorted_graph)
1046 {
1047   for (int i = overflows.length - 1; i >= 0; i--)
1048   {
1049     const graph_t::overflow_record_t& r = overflows[i];
1050     unsigned root = 0;
1051     unsigned space = sorted_graph.space_for (r.parent, &root);
1052     if (!space) continue;
1053     if (sorted_graph.num_roots_for_space (space) <= 1) continue;
1054 
1055     DEBUG_MSG (SUBSET_REPACK, nullptr, "Overflow in space %d moving subgraph %d to space %d.",
1056                space,
1057                root,
1058                sorted_graph.next_space ());
1059 
1060     hb_set_t roots;
1061     roots.add (root);
1062     sorted_graph.isolate_subgraph (roots);
1063     for (unsigned new_root : roots)
1064       sorted_graph.move_to_new_space (new_root);
1065     return true;
1066   }
1067   return false;
1068 }
1069 
_process_overflows(const hb_vector_t<graph_t::overflow_record_t> & overflows,hb_set_t & priority_bumped_parents,graph_t & sorted_graph)1070 static bool _process_overflows (const hb_vector_t<graph_t::overflow_record_t>& overflows,
1071                                 hb_set_t& priority_bumped_parents,
1072                                 graph_t& sorted_graph)
1073 {
1074   bool resolution_attempted = false;
1075 
1076   // Try resolving the furthest overflows first.
1077   for (int i = overflows.length - 1; i >= 0; i--)
1078   {
1079     const graph_t::overflow_record_t& r = overflows[i];
1080     const auto& child = sorted_graph.vertices_[r.child];
1081     if (child.is_shared ())
1082     {
1083       // The child object is shared, we may be able to eliminate the overflow
1084       // by duplicating it.
1085       if (!sorted_graph.duplicate (r.parent, r.child)) continue;
1086       return true;
1087     }
1088 
1089     if (child.is_leaf () && !priority_bumped_parents.has (r.parent))
1090     {
1091       // This object is too far from it's parent, attempt to move it closer.
1092       //
1093       // TODO(garretrieger): initially limiting this to leaf's since they can be
1094       //                     moved closer with fewer consequences. However, this can
1095       //                     likely can be used for non-leafs as well.
1096       // TODO(garretrieger): add a maximum priority, don't try to raise past this.
1097       // TODO(garretrieger): also try lowering priority of the parent. Make it
1098       //                     get placed further up in the ordering, closer to it's children.
1099       //                     this is probably preferable if the total size of the parent object
1100       //                     is < then the total size of the children (and the parent can be moved).
1101       //                     Since in that case moving the parent will cause a smaller increase in
1102       //                     the length of other offsets.
1103       sorted_graph.raise_childrens_priority (r.parent);
1104       priority_bumped_parents.add (r.parent);
1105       resolution_attempted = true;
1106       continue;
1107     }
1108 
1109     // TODO(garretrieger): add additional offset resolution strategies
1110     // - Promotion to extension lookups.
1111     // - Table splitting.
1112   }
1113 
1114   return resolution_attempted;
1115 }
1116 
1117 /*
1118  * Attempts to modify the topological sorting of the provided object graph to
1119  * eliminate offset overflows in the links between objects of the graph. If a
1120  * non-overflowing ordering is found the updated graph is serialized it into the
1121  * provided serialization context.
1122  *
1123  * If necessary the structure of the graph may be modified in ways that do not
1124  * affect the functionality of the graph. For example shared objects may be
1125  * duplicated.
1126  *
1127  * For a detailed writeup describing how the algorithm operates see:
1128  * docs/repacker.md
1129  */
1130 inline void
hb_resolve_overflows(const hb_vector_t<hb_serialize_context_t::object_t * > & packed,hb_tag_t table_tag,hb_serialize_context_t * c,unsigned max_rounds=10)1131 hb_resolve_overflows (const hb_vector_t<hb_serialize_context_t::object_t *>& packed,
1132                       hb_tag_t table_tag,
1133                       hb_serialize_context_t* c,
1134                       unsigned max_rounds = 10) {
1135   // Kahn sort is ~twice as fast as shortest distance sort and works for many fonts
1136   // so try it first to save time.
1137   graph_t sorted_graph (packed);
1138   sorted_graph.sort_kahn ();
1139   if (!sorted_graph.will_overflow ())
1140   {
1141     sorted_graph.serialize (c);
1142     return;
1143   }
1144 
1145   sorted_graph.sort_shortest_distance ();
1146 
1147   if ((table_tag == HB_OT_TAG_GPOS
1148        ||  table_tag == HB_OT_TAG_GSUB)
1149       && sorted_graph.will_overflow ())
1150   {
1151     DEBUG_MSG (SUBSET_REPACK, nullptr, "Assigning spaces to 32 bit subgraphs.");
1152     if (sorted_graph.assign_32bit_spaces ())
1153       sorted_graph.sort_shortest_distance ();
1154   }
1155 
1156   unsigned round = 0;
1157   hb_vector_t<graph_t::overflow_record_t> overflows;
1158   // TODO(garretrieger): select a good limit for max rounds.
1159   while (!sorted_graph.in_error ()
1160          && sorted_graph.will_overflow (&overflows)
1161          && round++ < max_rounds) {
1162     DEBUG_MSG (SUBSET_REPACK, nullptr, "=== Overflow resolution round %d ===", round);
1163     sorted_graph.print_overflows (overflows);
1164 
1165     hb_set_t priority_bumped_parents;
1166 
1167     if (!_try_isolating_subgraphs (overflows, sorted_graph))
1168     {
1169       if (!_process_overflows (overflows, priority_bumped_parents, sorted_graph))
1170       {
1171         DEBUG_MSG (SUBSET_REPACK, nullptr, "No resolution available :(");
1172         break;
1173       }
1174     }
1175 
1176     sorted_graph.sort_shortest_distance ();
1177   }
1178 
1179   if (sorted_graph.in_error ())
1180   {
1181     c->err (HB_SERIALIZE_ERROR_OTHER);
1182     return;
1183   }
1184 
1185   if (sorted_graph.will_overflow ())
1186   {
1187     c->err (HB_SERIALIZE_ERROR_OFFSET_OVERFLOW);
1188     DEBUG_MSG (SUBSET_REPACK, nullptr, "Offset overflow resolution failed.");
1189     return;
1190   }
1191   sorted_graph.serialize (c);
1192 }
1193 
1194 #endif /* HB_REPACKER_HH */
1195