• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import logging
2import os
3from collections import defaultdict, namedtuple
4from functools import reduce
5from itertools import chain
6from math import log2
7from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple
8
9from fontTools.config import OPTIONS
10from fontTools.misc.intTools import bit_count, bit_indices
11from fontTools.ttLib import TTFont
12from fontTools.ttLib.tables import otBase, otTables
13
14log = logging.getLogger(__name__)
15
16COMPRESSION_LEVEL = OPTIONS[f"{__name__}:COMPRESSION_LEVEL"]
17
18# Kept because ufo2ft depends on it, to be removed once ufo2ft uses the config instead
19# https://github.com/fonttools/fonttools/issues/2592
20GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE"
21GPOS_COMPACT_MODE_DEFAULT = str(COMPRESSION_LEVEL.default)
22
23
24def _compression_level_from_env() -> int:
25    env_level = GPOS_COMPACT_MODE_DEFAULT
26    if GPOS_COMPACT_MODE_ENV_KEY in os.environ:
27        import warnings
28
29        warnings.warn(
30            f"'{GPOS_COMPACT_MODE_ENV_KEY}' environment variable is deprecated. "
31            "Please set the 'fontTools.otlLib.optimize.gpos:COMPRESSION_LEVEL' option "
32            "in TTFont.cfg.",
33            DeprecationWarning,
34        )
35
36        env_level = os.environ[GPOS_COMPACT_MODE_ENV_KEY]
37    if len(env_level) == 1 and env_level in "0123456789":
38        return int(env_level)
39    raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={env_level}")
40
41
42def compact(font: TTFont, level: int) -> TTFont:
43    # Ideal plan:
44    #  1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable
45    #     https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable
46    #  2. Extract glyph-glyph kerning and class-kerning from all present subtables
47    #  3. Regroup into different subtable arrangements
48    #  4. Put back into the lookup
49    #
50    # Actual implementation:
51    #  2. Only class kerning is optimized currently
52    #  3. If the input kerning is already in several subtables, the subtables
53    #     are not grouped together first; instead each subtable is treated
54    #     independently, so currently this step is:
55    #     Split existing subtables into more smaller subtables
56    gpos = font["GPOS"]
57    for lookup in gpos.table.LookupList.Lookup:
58        if lookup.LookupType == 2:
59            compact_lookup(font, level, lookup)
60        elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2:
61            compact_ext_lookup(font, level, lookup)
62    return font
63
64
65def compact_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None:
66    new_subtables = compact_pair_pos(font, level, lookup.SubTable)
67    lookup.SubTable = new_subtables
68    lookup.SubTableCount = len(new_subtables)
69
70
71def compact_ext_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None:
72    new_subtables = compact_pair_pos(
73        font, level, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable]
74    )
75    new_ext_subtables = []
76    for subtable in new_subtables:
77        ext_subtable = otTables.ExtensionPos()
78        ext_subtable.Format = 1
79        ext_subtable.ExtSubTable = subtable
80        new_ext_subtables.append(ext_subtable)
81    lookup.SubTable = new_ext_subtables
82    lookup.SubTableCount = len(new_ext_subtables)
83
84
85def compact_pair_pos(
86    font: TTFont, level: int, subtables: Sequence[otTables.PairPos]
87) -> Sequence[otTables.PairPos]:
88    new_subtables = []
89    for subtable in subtables:
90        if subtable.Format == 1:
91            # Not doing anything to Format 1 (yet?)
92            new_subtables.append(subtable)
93        elif subtable.Format == 2:
94            new_subtables.extend(compact_class_pairs(font, level, subtable))
95    return new_subtables
96
97
98def compact_class_pairs(
99    font: TTFont, level: int, subtable: otTables.PairPos
100) -> List[otTables.PairPos]:
101    from fontTools.otlLib.builder import buildPairPosClassesSubtable
102
103    subtables = []
104    classes1: DefaultDict[int, List[str]] = defaultdict(list)
105    for g in subtable.Coverage.glyphs:
106        classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g)
107    classes2: DefaultDict[int, List[str]] = defaultdict(list)
108    for g, i in subtable.ClassDef2.classDefs.items():
109        classes2[i].append(g)
110    all_pairs = {}
111    for i, class1 in enumerate(subtable.Class1Record):
112        for j, class2 in enumerate(class1.Class2Record):
113            if is_really_zero(class2):
114                continue
115            all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = (
116                getattr(class2, "Value1", None),
117                getattr(class2, "Value2", None),
118            )
119    grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost(font, all_pairs, level)
120    for pairs in grouped_pairs:
121        subtables.append(buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap()))
122    return subtables
123
124
125def is_really_zero(class2: otTables.Class2Record) -> bool:
126    v1 = getattr(class2, "Value1", None)
127    v2 = getattr(class2, "Value2", None)
128    return (v1 is None or v1.getEffectiveFormat() == 0) and (
129        v2 is None or v2.getEffectiveFormat() == 0
130    )
131
132
133Pairs = Dict[
134    Tuple[Tuple[str, ...], Tuple[str, ...]],
135    Tuple[otBase.ValueRecord, otBase.ValueRecord],
136]
137
138# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958
139def _getClassRanges(glyphIDs: Iterable[int]):
140    glyphIDs = sorted(glyphIDs)
141    last = glyphIDs[0]
142    ranges = [[last]]
143    for glyphID in glyphIDs[1:]:
144        if glyphID != last + 1:
145            ranges[-1].append(last)
146            ranges.append([glyphID])
147        last = glyphID
148    ranges[-1].append(last)
149    return ranges, glyphIDs[0], glyphIDs[-1]
150
151
152# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989
153def _classDef_bytes(
154    class_data: List[Tuple[List[Tuple[int, int]], int, int]],
155    class_ids: List[int],
156    coverage=False,
157):
158    if not class_ids:
159        return 0
160    first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]]
161    range_count = len(first_ranges)
162    for i in class_ids[1:]:
163        data = class_data[i]
164        range_count += len(data[0])
165        min_glyph_id = min(min_glyph_id, data[1])
166        max_glyph_id = max(max_glyph_id, data[2])
167    glyphCount = max_glyph_id - min_glyph_id + 1
168    # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1
169    format1_bytes = 6 + glyphCount * 2
170    # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2
171    format2_bytes = 4 + range_count * 6
172    return min(format1_bytes, format2_bytes)
173
174
175ClusteringContext = namedtuple(
176    "ClusteringContext",
177    [
178        "lines",
179        "all_class1",
180        "all_class1_data",
181        "all_class2_data",
182        "valueFormat1_bytes",
183        "valueFormat2_bytes",
184    ],
185)
186
187
188class Cluster:
189    # TODO(Python 3.7): Turn this into a dataclass
190    # ctx: ClusteringContext
191    # indices: int
192    # Caches
193    # TODO(Python 3.8): use functools.cached_property instead of the
194    # manually cached properties, and remove the cache fields listed below.
195    # _indices: Optional[List[int]] = None
196    # _column_indices: Optional[List[int]] = None
197    # _cost: Optional[int] = None
198
199    __slots__ = "ctx", "indices_bitmask", "_indices", "_column_indices", "_cost"
200
201    def __init__(self, ctx: ClusteringContext, indices_bitmask: int):
202        self.ctx = ctx
203        self.indices_bitmask = indices_bitmask
204        self._indices = None
205        self._column_indices = None
206        self._cost = None
207
208    @property
209    def indices(self):
210        if self._indices is None:
211            self._indices = bit_indices(self.indices_bitmask)
212        return self._indices
213
214    @property
215    def column_indices(self):
216        if self._column_indices is None:
217            # Indices of columns that have a 1 in at least 1 line
218            #   => binary OR all the lines
219            bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices))
220            self._column_indices = bit_indices(bitmask)
221        return self._column_indices
222
223    @property
224    def width(self):
225        # Add 1 because Class2=0 cannot be used but needs to be encoded.
226        return len(self.column_indices) + 1
227
228    @property
229    def cost(self):
230        if self._cost is None:
231            self._cost = (
232                # 2 bytes to store the offset to this subtable in the Lookup table above
233                2
234                # Contents of the subtable
235                # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
236                # uint16	posFormat	Format identifier: format = 2
237                + 2
238                # Offset16	coverageOffset	Offset to Coverage table, from beginning of PairPos subtable.
239                + 2
240                + self.coverage_bytes
241                # uint16	valueFormat1	ValueRecord definition — for the first glyph of the pair (may be zero).
242                + 2
243                # uint16	valueFormat2	ValueRecord definition — for the second glyph of the pair (may be zero).
244                + 2
245                # Offset16	classDef1Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair.
246                + 2
247                + self.classDef1_bytes
248                # Offset16	classDef2Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair.
249                + 2
250                + self.classDef2_bytes
251                # uint16	class1Count	Number of classes in classDef1 table — includes Class 0.
252                + 2
253                # uint16	class2Count	Number of classes in classDef2 table — includes Class 0.
254                + 2
255                # Class1Record	class1Records[class1Count]	Array of Class1 records, ordered by classes in classDef1.
256                + (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes)
257                * len(self.indices)
258                * self.width
259            )
260        return self._cost
261
262    @property
263    def coverage_bytes(self):
264        format1_bytes = (
265            # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1
266            # uint16	coverageFormat	Format identifier — format = 1
267            # uint16	glyphCount	Number of glyphs in the glyph array
268            4
269            # uint16	glyphArray[glyphCount]	Array of glyph IDs — in numerical order
270            + sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2
271        )
272        ranges = sorted(
273            chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices)
274        )
275        merged_range_count = 0
276        last = None
277        for (start, end) in ranges:
278            if last is not None and start != last + 1:
279                merged_range_count += 1
280            last = end
281        format2_bytes = (
282            # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2
283            # uint16	coverageFormat	Format identifier — format = 2
284            # uint16	rangeCount	Number of RangeRecords
285            4
286            # RangeRecord	rangeRecords[rangeCount]	Array of glyph ranges — ordered by startGlyphID.
287            # uint16	startGlyphID	First glyph ID in the range
288            # uint16	endGlyphID	Last glyph ID in the range
289            # uint16	startCoverageIndex	Coverage Index of first glyph ID in range
290            + merged_range_count * 6
291        )
292        return min(format1_bytes, format2_bytes)
293
294    @property
295    def classDef1_bytes(self):
296        # We can skip encoding one of the Class1 definitions, and use
297        # Class1=0 to represent it instead, because Class1 is gated by the
298        # Coverage definition. Use Class1=0 for the highest byte savings.
299        # Going through all options takes too long, pick the biggest class
300        # = what happens in otlLib.builder.ClassDefBuilder.classes()
301        biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i]))
302        return _classDef_bytes(
303            self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index]
304        )
305
306    @property
307    def classDef2_bytes(self):
308        # All Class2 need to be encoded because we can't use Class2=0
309        return _classDef_bytes(self.ctx.all_class2_data, self.column_indices)
310
311
312def cluster_pairs_by_class2_coverage_custom_cost(
313    font: TTFont,
314    pairs: Pairs,
315    compression: int = 5,
316) -> List[Pairs]:
317    if not pairs:
318        # The subtable was actually empty?
319        return [pairs]
320
321    # Sorted for reproducibility/determinism
322    all_class1 = sorted(set(pair[0] for pair in pairs))
323    all_class2 = sorted(set(pair[1] for pair in pairs))
324
325    # Use Python's big ints for binary vectors representing each line
326    lines = [
327        sum(
328            1 << i if (class1, class2) in pairs else 0
329            for i, class2 in enumerate(all_class2)
330        )
331        for class1 in all_class1
332    ]
333
334    # Map glyph names to ids and work with ints throughout for ClassDef formats
335    name_to_id = font.getReverseGlyphMap()
336    # Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id)
337    all_class1_data = [
338        _getClassRanges(name_to_id[name] for name in cls) for cls in all_class1
339    ]
340    all_class2_data = [
341        _getClassRanges(name_to_id[name] for name in cls) for cls in all_class2
342    ]
343
344    format1 = 0
345    format2 = 0
346    for pair, value in pairs.items():
347        format1 |= value[0].getEffectiveFormat() if value[0] else 0
348        format2 |= value[1].getEffectiveFormat() if value[1] else 0
349    valueFormat1_bytes = bit_count(format1) * 2
350    valueFormat2_bytes = bit_count(format2) * 2
351
352    ctx = ClusteringContext(
353        lines,
354        all_class1,
355        all_class1_data,
356        all_class2_data,
357        valueFormat1_bytes,
358        valueFormat2_bytes,
359    )
360
361    cluster_cache: Dict[int, Cluster] = {}
362
363    def make_cluster(indices: int) -> Cluster:
364        cluster = cluster_cache.get(indices, None)
365        if cluster is not None:
366            return cluster
367        cluster = Cluster(ctx, indices)
368        cluster_cache[indices] = cluster
369        return cluster
370
371    def merge(cluster: Cluster, other: Cluster) -> Cluster:
372        return make_cluster(cluster.indices_bitmask | other.indices_bitmask)
373
374    # Agglomerative clustering by hand, checking the cost gain of the new
375    # cluster against the previously separate clusters
376    # Start with 1 cluster per line
377    # cluster = set of lines = new subtable
378    clusters = [make_cluster(1 << i) for i in range(len(lines))]
379
380    # Cost of 1 cluster with everything
381    # `(1 << len) - 1` gives a bitmask full of 1's of length `len`
382    cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost
383    log.debug(f"        len(clusters) = {len(clusters)}")
384
385    while len(clusters) > 1:
386        lowest_cost_change = None
387        best_cluster_index = None
388        best_other_index = None
389        best_merged = None
390        for i, cluster in enumerate(clusters):
391            for j, other in enumerate(clusters[i + 1 :]):
392                merged = merge(cluster, other)
393                cost_change = merged.cost - cluster.cost - other.cost
394                if lowest_cost_change is None or cost_change < lowest_cost_change:
395                    lowest_cost_change = cost_change
396                    best_cluster_index = i
397                    best_other_index = i + 1 + j
398                    best_merged = merged
399        assert lowest_cost_change is not None
400        assert best_cluster_index is not None
401        assert best_other_index is not None
402        assert best_merged is not None
403
404        # If the best merge we found is still taking down the file size, then
405        # there's no question: we must do it, because it's beneficial in both
406        # ways (lower file size and lower number of subtables).  However, if the
407        # best merge we found is not reducing file size anymore, then we need to
408        # look at the other stop criteria = the compression factor.
409        if lowest_cost_change > 0:
410            # Stop critera: check whether we should keep merging.
411            # Compute size reduction brought by splitting
412            cost_after_splitting = sum(c.cost for c in clusters)
413            # size_reduction so that after = before * (1 - size_reduction)
414            # E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2
415            size_reduction = 1 - cost_after_splitting / cost_before_splitting
416
417            # Force more merging by taking into account the compression number.
418            # Target behaviour: compression number = 1 to 9, default 5 like gzip
419            #   - 1 = accept to add 1 subtable to reduce size by 50%
420            #   - 5 = accept to add 5 subtables to reduce size by 50%
421            # See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691
422            # Given the size reduction we have achieved so far, compute how many
423            # new subtables are acceptable.
424            max_new_subtables = -log2(1 - size_reduction) * compression
425            log.debug(
426                f"            len(clusters) = {len(clusters):3d}    size_reduction={size_reduction:5.2f}    max_new_subtables={max_new_subtables}",
427            )
428            if compression == 9:
429                # Override level 9 to mean: create any number of subtables
430                max_new_subtables = len(clusters)
431
432            # If we have managed to take the number of new subtables below the
433            # threshold, then we can stop.
434            if len(clusters) <= max_new_subtables + 1:
435                break
436
437        # No reason to stop yet, do the merge and move on to the next.
438        del clusters[best_other_index]
439        clusters[best_cluster_index] = best_merged
440
441    # All clusters are final; turn bitmasks back into the "Pairs" format
442    pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict)
443    for pair, values in pairs.items():
444        pairs_by_class1[pair[0]][pair] = values
445    pairs_groups: List[Pairs] = []
446    for cluster in clusters:
447        pairs_group: Pairs = dict()
448        for i in cluster.indices:
449            class1 = all_class1[i]
450            pairs_group.update(pairs_by_class1[class1])
451        pairs_groups.append(pairs_group)
452    return pairs_groups
453