1import logging 2import os 3from collections import defaultdict, namedtuple 4from functools import reduce 5from itertools import chain 6from math import log2 7from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple 8 9from fontTools.config import OPTIONS 10from fontTools.misc.intTools import bit_count, bit_indices 11from fontTools.ttLib import TTFont 12from fontTools.ttLib.tables import otBase, otTables 13 14log = logging.getLogger(__name__) 15 16COMPRESSION_LEVEL = OPTIONS[f"{__name__}:COMPRESSION_LEVEL"] 17 18# Kept because ufo2ft depends on it, to be removed once ufo2ft uses the config instead 19# https://github.com/fonttools/fonttools/issues/2592 20GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE" 21GPOS_COMPACT_MODE_DEFAULT = str(COMPRESSION_LEVEL.default) 22 23 24def _compression_level_from_env() -> int: 25 env_level = GPOS_COMPACT_MODE_DEFAULT 26 if GPOS_COMPACT_MODE_ENV_KEY in os.environ: 27 import warnings 28 29 warnings.warn( 30 f"'{GPOS_COMPACT_MODE_ENV_KEY}' environment variable is deprecated. " 31 "Please set the 'fontTools.otlLib.optimize.gpos:COMPRESSION_LEVEL' option " 32 "in TTFont.cfg.", 33 DeprecationWarning, 34 ) 35 36 env_level = os.environ[GPOS_COMPACT_MODE_ENV_KEY] 37 if len(env_level) == 1 and env_level in "0123456789": 38 return int(env_level) 39 raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={env_level}") 40 41 42def compact(font: TTFont, level: int) -> TTFont: 43 # Ideal plan: 44 # 1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable 45 # https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable 46 # 2. Extract glyph-glyph kerning and class-kerning from all present subtables 47 # 3. Regroup into different subtable arrangements 48 # 4. Put back into the lookup 49 # 50 # Actual implementation: 51 # 2. Only class kerning is optimized currently 52 # 3. If the input kerning is already in several subtables, the subtables 53 # are not grouped together first; instead each subtable is treated 54 # independently, so currently this step is: 55 # Split existing subtables into more smaller subtables 56 gpos = font["GPOS"] 57 for lookup in gpos.table.LookupList.Lookup: 58 if lookup.LookupType == 2: 59 compact_lookup(font, level, lookup) 60 elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2: 61 compact_ext_lookup(font, level, lookup) 62 return font 63 64 65def compact_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None: 66 new_subtables = compact_pair_pos(font, level, lookup.SubTable) 67 lookup.SubTable = new_subtables 68 lookup.SubTableCount = len(new_subtables) 69 70 71def compact_ext_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None: 72 new_subtables = compact_pair_pos( 73 font, level, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable] 74 ) 75 new_ext_subtables = [] 76 for subtable in new_subtables: 77 ext_subtable = otTables.ExtensionPos() 78 ext_subtable.Format = 1 79 ext_subtable.ExtSubTable = subtable 80 new_ext_subtables.append(ext_subtable) 81 lookup.SubTable = new_ext_subtables 82 lookup.SubTableCount = len(new_ext_subtables) 83 84 85def compact_pair_pos( 86 font: TTFont, level: int, subtables: Sequence[otTables.PairPos] 87) -> Sequence[otTables.PairPos]: 88 new_subtables = [] 89 for subtable in subtables: 90 if subtable.Format == 1: 91 # Not doing anything to Format 1 (yet?) 92 new_subtables.append(subtable) 93 elif subtable.Format == 2: 94 new_subtables.extend(compact_class_pairs(font, level, subtable)) 95 return new_subtables 96 97 98def compact_class_pairs( 99 font: TTFont, level: int, subtable: otTables.PairPos 100) -> List[otTables.PairPos]: 101 from fontTools.otlLib.builder import buildPairPosClassesSubtable 102 103 subtables = [] 104 classes1: DefaultDict[int, List[str]] = defaultdict(list) 105 for g in subtable.Coverage.glyphs: 106 classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g) 107 classes2: DefaultDict[int, List[str]] = defaultdict(list) 108 for g, i in subtable.ClassDef2.classDefs.items(): 109 classes2[i].append(g) 110 all_pairs = {} 111 for i, class1 in enumerate(subtable.Class1Record): 112 for j, class2 in enumerate(class1.Class2Record): 113 if is_really_zero(class2): 114 continue 115 all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = ( 116 getattr(class2, "Value1", None), 117 getattr(class2, "Value2", None), 118 ) 119 grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost(font, all_pairs, level) 120 for pairs in grouped_pairs: 121 subtables.append(buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap())) 122 return subtables 123 124 125def is_really_zero(class2: otTables.Class2Record) -> bool: 126 v1 = getattr(class2, "Value1", None) 127 v2 = getattr(class2, "Value2", None) 128 return (v1 is None or v1.getEffectiveFormat() == 0) and ( 129 v2 is None or v2.getEffectiveFormat() == 0 130 ) 131 132 133Pairs = Dict[ 134 Tuple[Tuple[str, ...], Tuple[str, ...]], 135 Tuple[otBase.ValueRecord, otBase.ValueRecord], 136] 137 138# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958 139def _getClassRanges(glyphIDs: Iterable[int]): 140 glyphIDs = sorted(glyphIDs) 141 last = glyphIDs[0] 142 ranges = [[last]] 143 for glyphID in glyphIDs[1:]: 144 if glyphID != last + 1: 145 ranges[-1].append(last) 146 ranges.append([glyphID]) 147 last = glyphID 148 ranges[-1].append(last) 149 return ranges, glyphIDs[0], glyphIDs[-1] 150 151 152# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989 153def _classDef_bytes( 154 class_data: List[Tuple[List[Tuple[int, int]], int, int]], 155 class_ids: List[int], 156 coverage=False, 157): 158 if not class_ids: 159 return 0 160 first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]] 161 range_count = len(first_ranges) 162 for i in class_ids[1:]: 163 data = class_data[i] 164 range_count += len(data[0]) 165 min_glyph_id = min(min_glyph_id, data[1]) 166 max_glyph_id = max(max_glyph_id, data[2]) 167 glyphCount = max_glyph_id - min_glyph_id + 1 168 # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1 169 format1_bytes = 6 + glyphCount * 2 170 # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2 171 format2_bytes = 4 + range_count * 6 172 return min(format1_bytes, format2_bytes) 173 174 175ClusteringContext = namedtuple( 176 "ClusteringContext", 177 [ 178 "lines", 179 "all_class1", 180 "all_class1_data", 181 "all_class2_data", 182 "valueFormat1_bytes", 183 "valueFormat2_bytes", 184 ], 185) 186 187 188class Cluster: 189 # TODO(Python 3.7): Turn this into a dataclass 190 # ctx: ClusteringContext 191 # indices: int 192 # Caches 193 # TODO(Python 3.8): use functools.cached_property instead of the 194 # manually cached properties, and remove the cache fields listed below. 195 # _indices: Optional[List[int]] = None 196 # _column_indices: Optional[List[int]] = None 197 # _cost: Optional[int] = None 198 199 __slots__ = "ctx", "indices_bitmask", "_indices", "_column_indices", "_cost" 200 201 def __init__(self, ctx: ClusteringContext, indices_bitmask: int): 202 self.ctx = ctx 203 self.indices_bitmask = indices_bitmask 204 self._indices = None 205 self._column_indices = None 206 self._cost = None 207 208 @property 209 def indices(self): 210 if self._indices is None: 211 self._indices = bit_indices(self.indices_bitmask) 212 return self._indices 213 214 @property 215 def column_indices(self): 216 if self._column_indices is None: 217 # Indices of columns that have a 1 in at least 1 line 218 # => binary OR all the lines 219 bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices)) 220 self._column_indices = bit_indices(bitmask) 221 return self._column_indices 222 223 @property 224 def width(self): 225 # Add 1 because Class2=0 cannot be used but needs to be encoded. 226 return len(self.column_indices) + 1 227 228 @property 229 def cost(self): 230 if self._cost is None: 231 self._cost = ( 232 # 2 bytes to store the offset to this subtable in the Lookup table above 233 2 234 # Contents of the subtable 235 # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment 236 # uint16 posFormat Format identifier: format = 2 237 + 2 238 # Offset16 coverageOffset Offset to Coverage table, from beginning of PairPos subtable. 239 + 2 240 + self.coverage_bytes 241 # uint16 valueFormat1 ValueRecord definition — for the first glyph of the pair (may be zero). 242 + 2 243 # uint16 valueFormat2 ValueRecord definition — for the second glyph of the pair (may be zero). 244 + 2 245 # Offset16 classDef1Offset Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair. 246 + 2 247 + self.classDef1_bytes 248 # Offset16 classDef2Offset Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair. 249 + 2 250 + self.classDef2_bytes 251 # uint16 class1Count Number of classes in classDef1 table — includes Class 0. 252 + 2 253 # uint16 class2Count Number of classes in classDef2 table — includes Class 0. 254 + 2 255 # Class1Record class1Records[class1Count] Array of Class1 records, ordered by classes in classDef1. 256 + (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes) 257 * len(self.indices) 258 * self.width 259 ) 260 return self._cost 261 262 @property 263 def coverage_bytes(self): 264 format1_bytes = ( 265 # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1 266 # uint16 coverageFormat Format identifier — format = 1 267 # uint16 glyphCount Number of glyphs in the glyph array 268 4 269 # uint16 glyphArray[glyphCount] Array of glyph IDs — in numerical order 270 + sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2 271 ) 272 ranges = sorted( 273 chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices) 274 ) 275 merged_range_count = 0 276 last = None 277 for (start, end) in ranges: 278 if last is not None and start != last + 1: 279 merged_range_count += 1 280 last = end 281 format2_bytes = ( 282 # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2 283 # uint16 coverageFormat Format identifier — format = 2 284 # uint16 rangeCount Number of RangeRecords 285 4 286 # RangeRecord rangeRecords[rangeCount] Array of glyph ranges — ordered by startGlyphID. 287 # uint16 startGlyphID First glyph ID in the range 288 # uint16 endGlyphID Last glyph ID in the range 289 # uint16 startCoverageIndex Coverage Index of first glyph ID in range 290 + merged_range_count * 6 291 ) 292 return min(format1_bytes, format2_bytes) 293 294 @property 295 def classDef1_bytes(self): 296 # We can skip encoding one of the Class1 definitions, and use 297 # Class1=0 to represent it instead, because Class1 is gated by the 298 # Coverage definition. Use Class1=0 for the highest byte savings. 299 # Going through all options takes too long, pick the biggest class 300 # = what happens in otlLib.builder.ClassDefBuilder.classes() 301 biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i])) 302 return _classDef_bytes( 303 self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index] 304 ) 305 306 @property 307 def classDef2_bytes(self): 308 # All Class2 need to be encoded because we can't use Class2=0 309 return _classDef_bytes(self.ctx.all_class2_data, self.column_indices) 310 311 312def cluster_pairs_by_class2_coverage_custom_cost( 313 font: TTFont, 314 pairs: Pairs, 315 compression: int = 5, 316) -> List[Pairs]: 317 if not pairs: 318 # The subtable was actually empty? 319 return [pairs] 320 321 # Sorted for reproducibility/determinism 322 all_class1 = sorted(set(pair[0] for pair in pairs)) 323 all_class2 = sorted(set(pair[1] for pair in pairs)) 324 325 # Use Python's big ints for binary vectors representing each line 326 lines = [ 327 sum( 328 1 << i if (class1, class2) in pairs else 0 329 for i, class2 in enumerate(all_class2) 330 ) 331 for class1 in all_class1 332 ] 333 334 # Map glyph names to ids and work with ints throughout for ClassDef formats 335 name_to_id = font.getReverseGlyphMap() 336 # Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id) 337 all_class1_data = [ 338 _getClassRanges(name_to_id[name] for name in cls) for cls in all_class1 339 ] 340 all_class2_data = [ 341 _getClassRanges(name_to_id[name] for name in cls) for cls in all_class2 342 ] 343 344 format1 = 0 345 format2 = 0 346 for pair, value in pairs.items(): 347 format1 |= value[0].getEffectiveFormat() if value[0] else 0 348 format2 |= value[1].getEffectiveFormat() if value[1] else 0 349 valueFormat1_bytes = bit_count(format1) * 2 350 valueFormat2_bytes = bit_count(format2) * 2 351 352 ctx = ClusteringContext( 353 lines, 354 all_class1, 355 all_class1_data, 356 all_class2_data, 357 valueFormat1_bytes, 358 valueFormat2_bytes, 359 ) 360 361 cluster_cache: Dict[int, Cluster] = {} 362 363 def make_cluster(indices: int) -> Cluster: 364 cluster = cluster_cache.get(indices, None) 365 if cluster is not None: 366 return cluster 367 cluster = Cluster(ctx, indices) 368 cluster_cache[indices] = cluster 369 return cluster 370 371 def merge(cluster: Cluster, other: Cluster) -> Cluster: 372 return make_cluster(cluster.indices_bitmask | other.indices_bitmask) 373 374 # Agglomerative clustering by hand, checking the cost gain of the new 375 # cluster against the previously separate clusters 376 # Start with 1 cluster per line 377 # cluster = set of lines = new subtable 378 clusters = [make_cluster(1 << i) for i in range(len(lines))] 379 380 # Cost of 1 cluster with everything 381 # `(1 << len) - 1` gives a bitmask full of 1's of length `len` 382 cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost 383 log.debug(f" len(clusters) = {len(clusters)}") 384 385 while len(clusters) > 1: 386 lowest_cost_change = None 387 best_cluster_index = None 388 best_other_index = None 389 best_merged = None 390 for i, cluster in enumerate(clusters): 391 for j, other in enumerate(clusters[i + 1 :]): 392 merged = merge(cluster, other) 393 cost_change = merged.cost - cluster.cost - other.cost 394 if lowest_cost_change is None or cost_change < lowest_cost_change: 395 lowest_cost_change = cost_change 396 best_cluster_index = i 397 best_other_index = i + 1 + j 398 best_merged = merged 399 assert lowest_cost_change is not None 400 assert best_cluster_index is not None 401 assert best_other_index is not None 402 assert best_merged is not None 403 404 # If the best merge we found is still taking down the file size, then 405 # there's no question: we must do it, because it's beneficial in both 406 # ways (lower file size and lower number of subtables). However, if the 407 # best merge we found is not reducing file size anymore, then we need to 408 # look at the other stop criteria = the compression factor. 409 if lowest_cost_change > 0: 410 # Stop critera: check whether we should keep merging. 411 # Compute size reduction brought by splitting 412 cost_after_splitting = sum(c.cost for c in clusters) 413 # size_reduction so that after = before * (1 - size_reduction) 414 # E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2 415 size_reduction = 1 - cost_after_splitting / cost_before_splitting 416 417 # Force more merging by taking into account the compression number. 418 # Target behaviour: compression number = 1 to 9, default 5 like gzip 419 # - 1 = accept to add 1 subtable to reduce size by 50% 420 # - 5 = accept to add 5 subtables to reduce size by 50% 421 # See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691 422 # Given the size reduction we have achieved so far, compute how many 423 # new subtables are acceptable. 424 max_new_subtables = -log2(1 - size_reduction) * compression 425 log.debug( 426 f" len(clusters) = {len(clusters):3d} size_reduction={size_reduction:5.2f} max_new_subtables={max_new_subtables}", 427 ) 428 if compression == 9: 429 # Override level 9 to mean: create any number of subtables 430 max_new_subtables = len(clusters) 431 432 # If we have managed to take the number of new subtables below the 433 # threshold, then we can stop. 434 if len(clusters) <= max_new_subtables + 1: 435 break 436 437 # No reason to stop yet, do the merge and move on to the next. 438 del clusters[best_other_index] 439 clusters[best_cluster_index] = best_merged 440 441 # All clusters are final; turn bitmasks back into the "Pairs" format 442 pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict) 443 for pair, values in pairs.items(): 444 pairs_by_class1[pair[0]][pair] = values 445 pairs_groups: List[Pairs] = [] 446 for cluster in clusters: 447 pairs_group: Pairs = dict() 448 for i in cluster.indices: 449 class1 = all_class1[i] 450 pairs_group.update(pairs_by_class1[class1]) 451 pairs_groups.append(pairs_group) 452 return pairs_groups 453