1# -*- coding: utf-8 -*- 2 3"""T2CharString operator specializer and generalizer.""" 4 5from __future__ import print_function, division, absolute_import 6from fontTools.misc.py23 import * 7 8 9def stringToProgram(string): 10 if isinstance(string, basestring): 11 string = string.split() 12 program = [] 13 for token in string: 14 try: 15 token = int(token) 16 except ValueError: 17 try: 18 token = float(token) 19 except ValueError: 20 pass 21 program.append(token) 22 return program 23 24 25def programToString(program): 26 return ' '.join(str(x) for x in program) 27 28 29def programToCommands(program): 30 """Takes a T2CharString program list and returns list of commands. 31 Each command is a two-tuple of commandname,arg-list. The commandname might 32 be empty string if no commandname shall be emitted (used for glyph width, 33 hintmask/cntrmask argument, as well as stray arguments at the end of the 34 program (¯\_(ツ)_/¯).""" 35 36 width = None 37 commands = [] 38 stack = [] 39 it = iter(program) 40 for token in it: 41 if not isinstance(token, basestring): 42 stack.append(token) 43 continue 44 45 if width is None and token in {'hstem', 'hstemhm', 'vstem', 'vstemhm', 46 'cntrmask', 'hintmask', 47 'hmoveto', 'vmoveto', 'rmoveto', 48 'endchar'}: 49 parity = token in {'hmoveto', 'vmoveto'} 50 if stack and (len(stack) % 2) ^ parity: 51 width = stack.pop(0) 52 commands.append(('', [width])) 53 54 if token in {'hintmask', 'cntrmask'}: 55 if stack: 56 commands.append(('', stack)) 57 commands.append((token, [])) 58 commands.append(('', [next(it)])) 59 else: 60 commands.append((token,stack)) 61 stack = [] 62 if stack: 63 commands.append(('', stack)) 64 return commands 65 66 67def commandsToProgram(commands): 68 """Takes a commands list as returned by programToCommands() and converts 69 it back to a T2CharString program list.""" 70 program = [] 71 for op,args in commands: 72 program.extend(args) 73 if op: 74 program.append(op) 75 return program 76 77 78def _everyN(el, n): 79 """Group the list el into groups of size n""" 80 if len(el) % n != 0: raise ValueError(el) 81 for i in range(0, len(el), n): 82 yield el[i:i+n] 83 84 85class _GeneralizerDecombinerCommandsMap(object): 86 87 @staticmethod 88 def rmoveto(args): 89 if len(args) != 2: raise ValueError(args) 90 yield ('rmoveto', args) 91 @staticmethod 92 def hmoveto(args): 93 if len(args) != 1: raise ValueError(args) 94 yield ('rmoveto', [args[0], 0]) 95 @staticmethod 96 def vmoveto(args): 97 if len(args) != 1: raise ValueError(args) 98 yield ('rmoveto', [0, args[0]]) 99 100 @staticmethod 101 def rlineto(args): 102 if not args: raise ValueError(args) 103 for args in _everyN(args, 2): 104 yield ('rlineto', args) 105 @staticmethod 106 def hlineto(args): 107 if not args: raise ValueError(args) 108 it = iter(args) 109 try: 110 while True: 111 yield ('rlineto', [next(it), 0]) 112 yield ('rlineto', [0, next(it)]) 113 except StopIteration: 114 pass 115 @staticmethod 116 def vlineto(args): 117 if not args: raise ValueError(args) 118 it = iter(args) 119 try: 120 while True: 121 yield ('rlineto', [0, next(it)]) 122 yield ('rlineto', [next(it), 0]) 123 except StopIteration: 124 pass 125 @staticmethod 126 def rrcurveto(args): 127 if not args: raise ValueError(args) 128 for args in _everyN(args, 6): 129 yield ('rrcurveto', args) 130 @staticmethod 131 def hhcurveto(args): 132 if len(args) < 4 or len(args) % 4 > 1: raise ValueError(args) 133 if len(args) % 2 == 1: 134 yield ('rrcurveto', [args[1], args[0], args[2], args[3], args[4], 0]) 135 args = args[5:] 136 for args in _everyN(args, 4): 137 yield ('rrcurveto', [args[0], 0, args[1], args[2], args[3], 0]) 138 @staticmethod 139 def vvcurveto(args): 140 if len(args) < 4 or len(args) % 4 > 1: raise ValueError(args) 141 if len(args) % 2 == 1: 142 yield ('rrcurveto', [args[0], args[1], args[2], args[3], 0, args[4]]) 143 args = args[5:] 144 for args in _everyN(args, 4): 145 yield ('rrcurveto', [0, args[0], args[1], args[2], 0, args[3]]) 146 @staticmethod 147 def hvcurveto(args): 148 if len(args) < 4 or len(args) % 8 not in {0,1,4,5}: raise ValueError(args) 149 last_args = None 150 if len(args) % 2 == 1: 151 lastStraight = len(args) % 8 == 5 152 args, last_args = args[:-5], args[-5:] 153 it = _everyN(args, 4) 154 try: 155 while True: 156 args = next(it) 157 yield ('rrcurveto', [args[0], 0, args[1], args[2], 0, args[3]]) 158 args = next(it) 159 yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], 0]) 160 except StopIteration: 161 pass 162 if last_args: 163 args = last_args 164 if lastStraight: 165 yield ('rrcurveto', [args[0], 0, args[1], args[2], args[4], args[3]]) 166 else: 167 yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], args[4]]) 168 @staticmethod 169 def vhcurveto(args): 170 if len(args) < 4 or len(args) % 8 not in {0,1,4,5}: raise ValueError(args) 171 last_args = None 172 if len(args) % 2 == 1: 173 lastStraight = len(args) % 8 == 5 174 args, last_args = args[:-5], args[-5:] 175 it = _everyN(args, 4) 176 try: 177 while True: 178 args = next(it) 179 yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], 0]) 180 args = next(it) 181 yield ('rrcurveto', [args[0], 0, args[1], args[2], 0, args[3]]) 182 except StopIteration: 183 pass 184 if last_args: 185 args = last_args 186 if lastStraight: 187 yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], args[4]]) 188 else: 189 yield ('rrcurveto', [args[0], 0, args[1], args[2], args[4], args[3]]) 190 191 @staticmethod 192 def rcurveline(args): 193 if len(args) < 8 or len(args) % 6 != 2: raise ValueError(args) 194 args, last_args = args[:-2], args[-2:] 195 for args in _everyN(args, 6): 196 yield ('rrcurveto', args) 197 yield ('rlineto', last_args) 198 @staticmethod 199 def rlinecurve(args): 200 if len(args) < 8 or len(args) % 2 != 0: raise ValueError(args) 201 args, last_args = args[:-6], args[-6:] 202 for args in _everyN(args, 2): 203 yield ('rlineto', args) 204 yield ('rrcurveto', last_args) 205 206 207def generalizeCommands(commands, ignoreErrors=False): 208 result = [] 209 mapping = _GeneralizerDecombinerCommandsMap 210 for op,args in commands: 211 func = getattr(mapping, op, None) 212 if not func: 213 result.append((op,args)) 214 continue 215 try: 216 for command in func(args): 217 result.append(command) 218 except ValueError: 219 if ignoreErrors: 220 # Store op as data, such that consumers of commands do not have to 221 # deal with incorrect number of arguments. 222 result.append(('', args)) 223 result.append(('', [op])) 224 else: 225 raise 226 return result 227 228def generalizeProgram(program, **kwargs): 229 return commandsToProgram(generalizeCommands(programToCommands(program), **kwargs)) 230 231 232def _categorizeVector(v): 233 """ 234 Takes X,Y vector v and returns one of r, h, v, or 0 depending on which 235 of X and/or Y are zero, plus tuple of nonzero ones. If both are zero, 236 it returns a single zero still. 237 238 >>> _categorizeVector((0,0)) 239 ('0', (0,)) 240 >>> _categorizeVector((1,0)) 241 ('h', (1,)) 242 >>> _categorizeVector((0,2)) 243 ('v', (2,)) 244 >>> _categorizeVector((1,2)) 245 ('r', (1, 2)) 246 """ 247 if not v[0]: 248 if not v[1]: 249 return '0', v[:1] 250 else: 251 return 'v', v[1:] 252 else: 253 if not v[1]: 254 return 'h', v[:1] 255 else: 256 return 'r', v 257 258def _mergeCategories(a, b): 259 if a == '0': return b 260 if b == '0': return a 261 if a == b: return a 262 return None 263 264def _negateCategory(a): 265 if a == 'h': return 'v' 266 if a == 'v': return 'h' 267 assert a in '0r' 268 return a 269 270def specializeCommands(commands, 271 ignoreErrors=False, 272 generalizeFirst=True, 273 preserveTopology=False, 274 maxstack=48): 275 276 # We perform several rounds of optimizations. They are carefully ordered and are: 277 # 278 # 0. Generalize commands. 279 # This ensures that they are in our expected simple form, with each line/curve only 280 # having arguments for one segment, and using the generic form (rlineto/rrcurveto). 281 # If caller is sure the input is in this form, they can turn off generalization to 282 # save time. 283 # 284 # 1. Combine successive rmoveto operations. 285 # 286 # 2. Specialize rmoveto/rlineto/rrcurveto operators into horizontal/vertical variants. 287 # We specialize into some, made-up, variants as well, which simplifies following 288 # passes. 289 # 290 # 3. Merge or delete redundant operations, to the extent requested. 291 # OpenType spec declares point numbers in CFF undefined. As such, we happily 292 # change topology. If client relies on point numbers (in GPOS anchors, or for 293 # hinting purposes(what?)) they can turn this off. 294 # 295 # 4. Peephole optimization to revert back some of the h/v variants back into their 296 # original "relative" operator (rline/rrcurveto) if that saves a byte. 297 # 298 # 5. Combine adjacent operators when possible, minding not to go over max stack size. 299 # 300 # 6. Resolve any remaining made-up operators into real operators. 301 # 302 # I have convinced myself that this produces optimal bytecode (except for, possibly 303 # one byte each time maxstack size prohibits combining.) YMMV, but you'd be wrong. :-) 304 # A dynamic-programming approach can do the same but would be significantly slower. 305 306 307 # 0. Generalize commands. 308 if generalizeFirst: 309 commands = generalizeCommands(commands, ignoreErrors=ignoreErrors) 310 else: 311 commands = list(commands) # Make copy since we modify in-place later. 312 313 # 1. Combine successive rmoveto operations. 314 for i in range(len(commands)-1, 0, -1): 315 if 'rmoveto' == commands[i][0] == commands[i-1][0]: 316 v1, v2 = commands[i-1][1], commands[i][1] 317 commands[i-1] = ('rmoveto', [v1[0]+v2[0], v1[1]+v2[1]]) 318 del commands[i] 319 320 # 2. Specialize rmoveto/rlineto/rrcurveto operators into horizontal/vertical variants. 321 # 322 # We, in fact, specialize into more, made-up, variants that special-case when both 323 # X and Y components are zero. This simplifies the following optimization passes. 324 # This case is rare, but OCD does not let me skip it. 325 # 326 # After this round, we will have four variants that use the following mnemonics: 327 # 328 # - 'r' for relative, ie. non-zero X and non-zero Y, 329 # - 'h' for horizontal, ie. zero X and non-zero Y, 330 # - 'v' for vertical, ie. non-zero X and zero Y, 331 # - '0' for zeros, ie. zero X and zero Y. 332 # 333 # The '0' pseudo-operators are not part of the spec, but help simplify the following 334 # optimization rounds. We resolve them at the end. So, after this, we will have four 335 # moveto and four lineto variants: 336 # 337 # - 0moveto, 0lineto 338 # - hmoveto, hlineto 339 # - vmoveto, vlineto 340 # - rmoveto, rlineto 341 # 342 # and sixteen curveto variants. For example, a '0hcurveto' operator means a curve 343 # dx0,dy0,dx1,dy1,dx2,dy2,dx3,dy3 where dx0, dx1, and dy3 are zero but not dx3. 344 # An 'rvcurveto' means dx3 is zero but not dx0,dy0,dy3. 345 # 346 # There are nine different variants of curves without the '0'. Those nine map exactly 347 # to the existing curve variants in the spec: rrcurveto, and the four variants hhcurveto, 348 # vvcurveto, hvcurveto, and vhcurveto each cover two cases, one with an odd number of 349 # arguments and one without. Eg. an hhcurveto with an extra argument (odd number of 350 # arguments) is in fact an rhcurveto. The operators in the spec are designed such that 351 # all four of rhcurveto, rvcurveto, hrcurveto, and vrcurveto are encodable for one curve. 352 # 353 # Of the curve types with '0', the 00curveto is equivalent to a lineto variant. The rest 354 # of the curve types with a 0 need to be encoded as a h or v variant. Ie. a '0' can be 355 # thought of a "don't care" and can be used as either an 'h' or a 'v'. As such, we always 356 # encode a number 0 as argument when we use a '0' variant. Later on, we can just substitute 357 # the '0' with either 'h' or 'v' and it works. 358 # 359 # When we get to curve splines however, things become more complicated... XXX finish this. 360 # There's one more complexity with splines. If one side of the spline is not horizontal or 361 # vertical (or zero), ie. if it's 'r', then it limits which spline types we can encode. 362 # Only hhcurveto and vvcurveto operators can encode a spline starting with 'r', and 363 # only hvcurveto and vhcurveto operators can encode a spline ending with 'r'. 364 # This limits our merge opportunities later. 365 # 366 for i in range(len(commands)): 367 op,args = commands[i] 368 369 if op in {'rmoveto', 'rlineto'}: 370 c, args = _categorizeVector(args) 371 commands[i] = c+op[1:], args 372 continue 373 374 if op == 'rrcurveto': 375 c1, args1 = _categorizeVector(args[:2]) 376 c2, args2 = _categorizeVector(args[-2:]) 377 commands[i] = c1+c2+'curveto', args1+args[2:4]+args2 378 continue 379 380 # 3. Merge or delete redundant operations, to the extent requested. 381 # 382 # TODO 383 # A 0moveto that comes before all other path operations can be removed. 384 # though I find conflicting evidence for this. 385 # 386 # TODO 387 # "If hstem and vstem hints are both declared at the beginning of a 388 # CharString, and this sequence is followed directly by the hintmask or 389 # cntrmask operators, then the vstem hint operator (or, if applicable, 390 # the vstemhm operator) need not be included." 391 # 392 # "The sequence and form of a CFF2 CharString program may be represented as: 393 # {hs* vs* cm* hm* mt subpath}? {mt subpath}*" 394 # 395 # https://www.microsoft.com/typography/otspec/cff2charstr.htm#section3.1 396 # 397 # For Type2 CharStrings the sequence is: 398 # w? {hs* vs* cm* hm* mt subpath}? {mt subpath}* endchar" 399 400 401 # Some other redundancies change topology (point numbers). 402 if not preserveTopology: 403 for i in range(len(commands)-1, -1, -1): 404 op, args = commands[i] 405 406 # A 00curveto is demoted to a (specialized) lineto. 407 if op == '00curveto': 408 assert len(args) == 4 409 c, args = _categorizeVector(args[1:3]) 410 op = c+'lineto' 411 commands[i] = op, args 412 # and then... 413 414 # A 0lineto can be deleted. 415 if op == '0lineto': 416 del commands[i] 417 continue 418 419 # Merge adjacent hlineto's and vlineto's. 420 if (i and op in {'hlineto', 'vlineto'} and 421 (op == commands[i-1][0]) and 422 (not isinstance(args[0], list))): 423 _, other_args = commands[i-1] 424 assert len(args) == 1 and len(other_args) == 1 425 commands[i-1] = (op, [other_args[0]+args[0]]) 426 del commands[i] 427 continue 428 429 # 4. Peephole optimization to revert back some of the h/v variants back into their 430 # original "relative" operator (rline/rrcurveto) if that saves a byte. 431 for i in range(1, len(commands)-1): 432 op,args = commands[i] 433 prv,nxt = commands[i-1][0], commands[i+1][0] 434 435 if op in {'0lineto', 'hlineto', 'vlineto'} and prv == nxt == 'rlineto': 436 assert len(args) == 1 437 args = [0, args[0]] if op[0] == 'v' else [args[0], 0] 438 commands[i] = ('rlineto', args) 439 continue 440 441 if op[2:] == 'curveto' and len(args) == 5 and prv == nxt == 'rrcurveto': 442 assert (op[0] == 'r') ^ (op[1] == 'r') 443 if op[0] == 'v': 444 pos = 0 445 elif op[0] != 'r': 446 pos = 1 447 elif op[1] == 'v': 448 pos = 4 449 else: 450 pos = 5 451 # Insert, while maintaining the type of args (can be tuple or list). 452 args = args[:pos] + type(args)((0,)) + args[pos:] 453 commands[i] = ('rrcurveto', args) 454 continue 455 456 # 5. Combine adjacent operators when possible, minding not to go over max stack size. 457 for i in range(len(commands)-1, 0, -1): 458 op1,args1 = commands[i-1] 459 op2,args2 = commands[i] 460 new_op = None 461 462 # Merge logic... 463 if {op1, op2} <= {'rlineto', 'rrcurveto'}: 464 if op1 == op2: 465 new_op = op1 466 else: 467 if op2 == 'rrcurveto' and len(args2) == 6: 468 new_op = 'rlinecurve' 469 elif len(args2) == 2: 470 new_op = 'rcurveline' 471 472 elif (op1, op2) in {('rlineto', 'rlinecurve'), ('rrcurveto', 'rcurveline')}: 473 new_op = op2 474 475 elif {op1, op2} == {'vlineto', 'hlineto'}: 476 new_op = op1 477 478 elif 'curveto' == op1[2:] == op2[2:]: 479 d0, d1 = op1[:2] 480 d2, d3 = op2[:2] 481 482 if d1 == 'r' or d2 == 'r' or d0 == d3 == 'r': 483 continue 484 485 d = _mergeCategories(d1, d2) 486 if d is None: continue 487 if d0 == 'r': 488 d = _mergeCategories(d, d3) 489 if d is None: continue 490 new_op = 'r'+d+'curveto' 491 elif d3 == 'r': 492 d0 = _mergeCategories(d0, _negateCategory(d)) 493 if d0 is None: continue 494 new_op = d0+'r'+'curveto' 495 else: 496 d0 = _mergeCategories(d0, d3) 497 if d0 is None: continue 498 new_op = d0+d+'curveto' 499 500 # Make sure the stack depth does not exceed (maxstack - 1), so 501 # that subroutinizer can insert subroutine calls at any point. 502 if new_op and len(args1) + len(args2) < maxstack: 503 commands[i-1] = (new_op, args1+args2) 504 del commands[i] 505 506 # 6. Resolve any remaining made-up operators into real operators. 507 for i in range(len(commands)): 508 op,args = commands[i] 509 510 if op in {'0moveto', '0lineto'}: 511 commands[i] = 'h'+op[1:], args 512 continue 513 514 if op[2:] == 'curveto' and op[:2] not in {'rr', 'hh', 'vv', 'vh', 'hv'}: 515 op0, op1 = op[:2] 516 if (op0 == 'r') ^ (op1 == 'r'): 517 assert len(args) % 2 == 1 518 if op0 == '0': op0 = 'h' 519 if op1 == '0': op1 = 'h' 520 if op0 == 'r': op0 = op1 521 if op1 == 'r': op1 = _negateCategory(op0) 522 assert {op0,op1} <= {'h','v'}, (op0, op1) 523 524 if len(args) % 2: 525 if op0 != op1: # vhcurveto / hvcurveto 526 if (op0 == 'h') ^ (len(args) % 8 == 1): 527 # Swap last two args order 528 args = args[:-2]+args[-1:]+args[-2:-1] 529 else: # hhcurveto / vvcurveto 530 if op0 == 'h': # hhcurveto 531 # Swap first two args order 532 args = args[1:2]+args[:1]+args[2:] 533 534 commands[i] = op0+op1+'curveto', args 535 continue 536 537 return commands 538 539def specializeProgram(program, **kwargs): 540 return commandsToProgram(specializeCommands(programToCommands(program), **kwargs)) 541 542 543if __name__ == '__main__': 544 import sys 545 if len(sys.argv) == 1: 546 import doctest 547 sys.exit(doctest.testmod().failed) 548 program = stringToProgram(sys.argv[1:]) 549 print("Program:"); print(programToString(program)) 550 commands = programToCommands(program) 551 print("Commands:"); print(commands) 552 program2 = commandsToProgram(commands) 553 print("Program from commands:"); print(programToString(program2)) 554 assert program == program2 555 print("Generalized program:"); print(programToString(generalizeProgram(program))) 556 print("Specialized program:"); print(programToString(specializeProgram(program))) 557 558