1from fontTools.misc import sstruct 2from fontTools.misc.roundTools import otRound 3from fontTools.misc.textTools import safeEval, num2binary, binary2num 4from fontTools.ttLib.tables import DefaultTable 5import bisect 6import logging 7 8 9log = logging.getLogger(__name__) 10 11# panose classification 12 13panoseFormat = """ 14 bFamilyType: B 15 bSerifStyle: B 16 bWeight: B 17 bProportion: B 18 bContrast: B 19 bStrokeVariation: B 20 bArmStyle: B 21 bLetterForm: B 22 bMidline: B 23 bXHeight: B 24""" 25 26class Panose(object): 27 28 def toXML(self, writer, ttFont): 29 formatstring, names, fixes = sstruct.getformat(panoseFormat) 30 for name in names: 31 writer.simpletag(name, value=getattr(self, name)) 32 writer.newline() 33 34 def fromXML(self, name, attrs, content, ttFont): 35 setattr(self, name, safeEval(attrs["value"])) 36 37 38# 'sfnt' OS/2 and Windows Metrics table - 'OS/2' 39 40OS2_format_0 = """ 41 > # big endian 42 version: H # version 43 xAvgCharWidth: h # average character width 44 usWeightClass: H # degree of thickness of strokes 45 usWidthClass: H # aspect ratio 46 fsType: H # type flags 47 ySubscriptXSize: h # subscript horizontal font size 48 ySubscriptYSize: h # subscript vertical font size 49 ySubscriptXOffset: h # subscript x offset 50 ySubscriptYOffset: h # subscript y offset 51 ySuperscriptXSize: h # superscript horizontal font size 52 ySuperscriptYSize: h # superscript vertical font size 53 ySuperscriptXOffset: h # superscript x offset 54 ySuperscriptYOffset: h # superscript y offset 55 yStrikeoutSize: h # strikeout size 56 yStrikeoutPosition: h # strikeout position 57 sFamilyClass: h # font family class and subclass 58 panose: 10s # panose classification number 59 ulUnicodeRange1: L # character range 60 ulUnicodeRange2: L # character range 61 ulUnicodeRange3: L # character range 62 ulUnicodeRange4: L # character range 63 achVendID: 4s # font vendor identification 64 fsSelection: H # font selection flags 65 usFirstCharIndex: H # first unicode character index 66 usLastCharIndex: H # last unicode character index 67 sTypoAscender: h # typographic ascender 68 sTypoDescender: h # typographic descender 69 sTypoLineGap: h # typographic line gap 70 usWinAscent: H # Windows ascender 71 usWinDescent: H # Windows descender 72""" 73 74OS2_format_1_addition = """ 75 ulCodePageRange1: L 76 ulCodePageRange2: L 77""" 78 79OS2_format_2_addition = OS2_format_1_addition + """ 80 sxHeight: h 81 sCapHeight: h 82 usDefaultChar: H 83 usBreakChar: H 84 usMaxContext: H 85""" 86 87OS2_format_5_addition = OS2_format_2_addition + """ 88 usLowerOpticalPointSize: H 89 usUpperOpticalPointSize: H 90""" 91 92bigendian = " > # big endian\n" 93 94OS2_format_1 = OS2_format_0 + OS2_format_1_addition 95OS2_format_2 = OS2_format_0 + OS2_format_2_addition 96OS2_format_5 = OS2_format_0 + OS2_format_5_addition 97OS2_format_1_addition = bigendian + OS2_format_1_addition 98OS2_format_2_addition = bigendian + OS2_format_2_addition 99OS2_format_5_addition = bigendian + OS2_format_5_addition 100 101 102class table_O_S_2f_2(DefaultTable.DefaultTable): 103 104 """the OS/2 table""" 105 106 dependencies = ["head"] 107 108 def decompile(self, data, ttFont): 109 dummy, data = sstruct.unpack2(OS2_format_0, data, self) 110 111 if self.version == 1: 112 dummy, data = sstruct.unpack2(OS2_format_1_addition, data, self) 113 elif self.version in (2, 3, 4): 114 dummy, data = sstruct.unpack2(OS2_format_2_addition, data, self) 115 elif self.version == 5: 116 dummy, data = sstruct.unpack2(OS2_format_5_addition, data, self) 117 self.usLowerOpticalPointSize /= 20 118 self.usUpperOpticalPointSize /= 20 119 elif self.version != 0: 120 from fontTools import ttLib 121 raise ttLib.TTLibError("unknown format for OS/2 table: version %s" % self.version) 122 if len(data): 123 log.warning("too much 'OS/2' table data") 124 125 self.panose = sstruct.unpack(panoseFormat, self.panose, Panose()) 126 127 def compile(self, ttFont): 128 self.updateFirstAndLastCharIndex(ttFont) 129 panose = self.panose 130 head = ttFont["head"] 131 if (self.fsSelection & 1) and not (head.macStyle & 1<<1): 132 log.warning("fsSelection bit 0 (italic) and " 133 "head table macStyle bit 1 (italic) should match") 134 if (self.fsSelection & 1<<5) and not (head.macStyle & 1): 135 log.warning("fsSelection bit 5 (bold) and " 136 "head table macStyle bit 0 (bold) should match") 137 if (self.fsSelection & 1<<6) and (self.fsSelection & 1 + (1<<5)): 138 log.warning("fsSelection bit 6 (regular) is set, " 139 "bits 0 (italic) and 5 (bold) must be clear") 140 if self.version < 4 and self.fsSelection & 0b1110000000: 141 log.warning("fsSelection bits 7, 8 and 9 are only defined in " 142 "OS/2 table version 4 and up: version %s", self.version) 143 self.panose = sstruct.pack(panoseFormat, self.panose) 144 if self.version == 0: 145 data = sstruct.pack(OS2_format_0, self) 146 elif self.version == 1: 147 data = sstruct.pack(OS2_format_1, self) 148 elif self.version in (2, 3, 4): 149 data = sstruct.pack(OS2_format_2, self) 150 elif self.version == 5: 151 d = self.__dict__.copy() 152 d['usLowerOpticalPointSize'] = round(self.usLowerOpticalPointSize * 20) 153 d['usUpperOpticalPointSize'] = round(self.usUpperOpticalPointSize * 20) 154 data = sstruct.pack(OS2_format_5, d) 155 else: 156 from fontTools import ttLib 157 raise ttLib.TTLibError("unknown format for OS/2 table: version %s" % self.version) 158 self.panose = panose 159 return data 160 161 def toXML(self, writer, ttFont): 162 writer.comment( 163 "The fields 'usFirstCharIndex' and 'usLastCharIndex'\n" 164 "will be recalculated by the compiler") 165 writer.newline() 166 if self.version == 1: 167 format = OS2_format_1 168 elif self.version in (2, 3, 4): 169 format = OS2_format_2 170 elif self.version == 5: 171 format = OS2_format_5 172 else: 173 format = OS2_format_0 174 formatstring, names, fixes = sstruct.getformat(format) 175 for name in names: 176 value = getattr(self, name) 177 if name=="panose": 178 writer.begintag("panose") 179 writer.newline() 180 value.toXML(writer, ttFont) 181 writer.endtag("panose") 182 elif name in ("ulUnicodeRange1", "ulUnicodeRange2", 183 "ulUnicodeRange3", "ulUnicodeRange4", 184 "ulCodePageRange1", "ulCodePageRange2"): 185 writer.simpletag(name, value=num2binary(value)) 186 elif name in ("fsType", "fsSelection"): 187 writer.simpletag(name, value=num2binary(value, 16)) 188 elif name == "achVendID": 189 writer.simpletag(name, value=repr(value)[1:-1]) 190 else: 191 writer.simpletag(name, value=value) 192 writer.newline() 193 194 def fromXML(self, name, attrs, content, ttFont): 195 if name == "panose": 196 self.panose = panose = Panose() 197 for element in content: 198 if isinstance(element, tuple): 199 name, attrs, content = element 200 panose.fromXML(name, attrs, content, ttFont) 201 elif name in ("ulUnicodeRange1", "ulUnicodeRange2", 202 "ulUnicodeRange3", "ulUnicodeRange4", 203 "ulCodePageRange1", "ulCodePageRange2", 204 "fsType", "fsSelection"): 205 setattr(self, name, binary2num(attrs["value"])) 206 elif name == "achVendID": 207 setattr(self, name, safeEval("'''" + attrs["value"] + "'''")) 208 else: 209 setattr(self, name, safeEval(attrs["value"])) 210 211 def updateFirstAndLastCharIndex(self, ttFont): 212 if 'cmap' not in ttFont: 213 return 214 codes = set() 215 for table in getattr(ttFont['cmap'], 'tables', []): 216 if table.isUnicode(): 217 codes.update(table.cmap.keys()) 218 if codes: 219 minCode = min(codes) 220 maxCode = max(codes) 221 # USHORT cannot hold codepoints greater than 0xFFFF 222 self.usFirstCharIndex = min(0xFFFF, minCode) 223 self.usLastCharIndex = min(0xFFFF, maxCode) 224 225 # misspelled attributes kept for legacy reasons 226 227 @property 228 def usMaxContex(self): 229 return self.usMaxContext 230 231 @usMaxContex.setter 232 def usMaxContex(self, value): 233 self.usMaxContext = value 234 235 @property 236 def fsFirstCharIndex(self): 237 return self.usFirstCharIndex 238 239 @fsFirstCharIndex.setter 240 def fsFirstCharIndex(self, value): 241 self.usFirstCharIndex = value 242 243 @property 244 def fsLastCharIndex(self): 245 return self.usLastCharIndex 246 247 @fsLastCharIndex.setter 248 def fsLastCharIndex(self, value): 249 self.usLastCharIndex = value 250 251 def getUnicodeRanges(self): 252 """ Return the set of 'ulUnicodeRange*' bits currently enabled. """ 253 bits = set() 254 ul1, ul2 = self.ulUnicodeRange1, self.ulUnicodeRange2 255 ul3, ul4 = self.ulUnicodeRange3, self.ulUnicodeRange4 256 for i in range(32): 257 if ul1 & (1 << i): 258 bits.add(i) 259 if ul2 & (1 << i): 260 bits.add(i + 32) 261 if ul3 & (1 << i): 262 bits.add(i + 64) 263 if ul4 & (1 << i): 264 bits.add(i + 96) 265 return bits 266 267 def setUnicodeRanges(self, bits): 268 """ Set the 'ulUnicodeRange*' fields to the specified 'bits'. """ 269 ul1, ul2, ul3, ul4 = 0, 0, 0, 0 270 for bit in bits: 271 if 0 <= bit < 32: 272 ul1 |= (1 << bit) 273 elif 32 <= bit < 64: 274 ul2 |= (1 << (bit - 32)) 275 elif 64 <= bit < 96: 276 ul3 |= (1 << (bit - 64)) 277 elif 96 <= bit < 123: 278 ul4 |= (1 << (bit - 96)) 279 else: 280 raise ValueError('expected 0 <= int <= 122, found: %r' % bit) 281 self.ulUnicodeRange1, self.ulUnicodeRange2 = ul1, ul2 282 self.ulUnicodeRange3, self.ulUnicodeRange4 = ul3, ul4 283 284 def recalcUnicodeRanges(self, ttFont, pruneOnly=False): 285 """ Intersect the codepoints in the font's Unicode cmap subtables with 286 the Unicode block ranges defined in the OpenType specification (v1.7), 287 and set the respective 'ulUnicodeRange*' bits if there is at least ONE 288 intersection. 289 If 'pruneOnly' is True, only clear unused bits with NO intersection. 290 """ 291 unicodes = set() 292 for table in ttFont['cmap'].tables: 293 if table.isUnicode(): 294 unicodes.update(table.cmap.keys()) 295 if pruneOnly: 296 empty = intersectUnicodeRanges(unicodes, inverse=True) 297 bits = self.getUnicodeRanges() - empty 298 else: 299 bits = intersectUnicodeRanges(unicodes) 300 self.setUnicodeRanges(bits) 301 return bits 302 303 def recalcAvgCharWidth(self, ttFont): 304 """Recalculate xAvgCharWidth using metrics from ttFont's 'hmtx' table. 305 306 Set it to 0 if the unlikely event 'hmtx' table is not found. 307 """ 308 avg_width = 0 309 hmtx = ttFont.get("hmtx") 310 if hmtx: 311 widths = [m[0] for m in hmtx.metrics.values() if m[0] > 0] 312 avg_width = otRound(sum(widths) / len(widths)) 313 self.xAvgCharWidth = avg_width 314 return avg_width 315 316 317# Unicode ranges data from the OpenType OS/2 table specification v1.7 318 319OS2_UNICODE_RANGES = ( 320 (('Basic Latin', (0x0000, 0x007F)),), 321 (('Latin-1 Supplement', (0x0080, 0x00FF)),), 322 (('Latin Extended-A', (0x0100, 0x017F)),), 323 (('Latin Extended-B', (0x0180, 0x024F)),), 324 (('IPA Extensions', (0x0250, 0x02AF)), 325 ('Phonetic Extensions', (0x1D00, 0x1D7F)), 326 ('Phonetic Extensions Supplement', (0x1D80, 0x1DBF))), 327 (('Spacing Modifier Letters', (0x02B0, 0x02FF)), 328 ('Modifier Tone Letters', (0xA700, 0xA71F))), 329 (('Combining Diacritical Marks', (0x0300, 0x036F)), 330 ('Combining Diacritical Marks Supplement', (0x1DC0, 0x1DFF))), 331 (('Greek and Coptic', (0x0370, 0x03FF)),), 332 (('Coptic', (0x2C80, 0x2CFF)),), 333 (('Cyrillic', (0x0400, 0x04FF)), 334 ('Cyrillic Supplement', (0x0500, 0x052F)), 335 ('Cyrillic Extended-A', (0x2DE0, 0x2DFF)), 336 ('Cyrillic Extended-B', (0xA640, 0xA69F))), 337 (('Armenian', (0x0530, 0x058F)),), 338 (('Hebrew', (0x0590, 0x05FF)),), 339 (('Vai', (0xA500, 0xA63F)),), 340 (('Arabic', (0x0600, 0x06FF)), 341 ('Arabic Supplement', (0x0750, 0x077F))), 342 (('NKo', (0x07C0, 0x07FF)),), 343 (('Devanagari', (0x0900, 0x097F)),), 344 (('Bengali', (0x0980, 0x09FF)),), 345 (('Gurmukhi', (0x0A00, 0x0A7F)),), 346 (('Gujarati', (0x0A80, 0x0AFF)),), 347 (('Oriya', (0x0B00, 0x0B7F)),), 348 (('Tamil', (0x0B80, 0x0BFF)),), 349 (('Telugu', (0x0C00, 0x0C7F)),), 350 (('Kannada', (0x0C80, 0x0CFF)),), 351 (('Malayalam', (0x0D00, 0x0D7F)),), 352 (('Thai', (0x0E00, 0x0E7F)),), 353 (('Lao', (0x0E80, 0x0EFF)),), 354 (('Georgian', (0x10A0, 0x10FF)), 355 ('Georgian Supplement', (0x2D00, 0x2D2F))), 356 (('Balinese', (0x1B00, 0x1B7F)),), 357 (('Hangul Jamo', (0x1100, 0x11FF)),), 358 (('Latin Extended Additional', (0x1E00, 0x1EFF)), 359 ('Latin Extended-C', (0x2C60, 0x2C7F)), 360 ('Latin Extended-D', (0xA720, 0xA7FF))), 361 (('Greek Extended', (0x1F00, 0x1FFF)),), 362 (('General Punctuation', (0x2000, 0x206F)), 363 ('Supplemental Punctuation', (0x2E00, 0x2E7F))), 364 (('Superscripts And Subscripts', (0x2070, 0x209F)),), 365 (('Currency Symbols', (0x20A0, 0x20CF)),), 366 (('Combining Diacritical Marks For Symbols', (0x20D0, 0x20FF)),), 367 (('Letterlike Symbols', (0x2100, 0x214F)),), 368 (('Number Forms', (0x2150, 0x218F)),), 369 (('Arrows', (0x2190, 0x21FF)), 370 ('Supplemental Arrows-A', (0x27F0, 0x27FF)), 371 ('Supplemental Arrows-B', (0x2900, 0x297F)), 372 ('Miscellaneous Symbols and Arrows', (0x2B00, 0x2BFF))), 373 (('Mathematical Operators', (0x2200, 0x22FF)), 374 ('Supplemental Mathematical Operators', (0x2A00, 0x2AFF)), 375 ('Miscellaneous Mathematical Symbols-A', (0x27C0, 0x27EF)), 376 ('Miscellaneous Mathematical Symbols-B', (0x2980, 0x29FF))), 377 (('Miscellaneous Technical', (0x2300, 0x23FF)),), 378 (('Control Pictures', (0x2400, 0x243F)),), 379 (('Optical Character Recognition', (0x2440, 0x245F)),), 380 (('Enclosed Alphanumerics', (0x2460, 0x24FF)),), 381 (('Box Drawing', (0x2500, 0x257F)),), 382 (('Block Elements', (0x2580, 0x259F)),), 383 (('Geometric Shapes', (0x25A0, 0x25FF)),), 384 (('Miscellaneous Symbols', (0x2600, 0x26FF)),), 385 (('Dingbats', (0x2700, 0x27BF)),), 386 (('CJK Symbols And Punctuation', (0x3000, 0x303F)),), 387 (('Hiragana', (0x3040, 0x309F)),), 388 (('Katakana', (0x30A0, 0x30FF)), 389 ('Katakana Phonetic Extensions', (0x31F0, 0x31FF))), 390 (('Bopomofo', (0x3100, 0x312F)), 391 ('Bopomofo Extended', (0x31A0, 0x31BF))), 392 (('Hangul Compatibility Jamo', (0x3130, 0x318F)),), 393 (('Phags-pa', (0xA840, 0xA87F)),), 394 (('Enclosed CJK Letters And Months', (0x3200, 0x32FF)),), 395 (('CJK Compatibility', (0x3300, 0x33FF)),), 396 (('Hangul Syllables', (0xAC00, 0xD7AF)),), 397 (('Non-Plane 0 *', (0xD800, 0xDFFF)),), 398 (('Phoenician', (0x10900, 0x1091F)),), 399 (('CJK Unified Ideographs', (0x4E00, 0x9FFF)), 400 ('CJK Radicals Supplement', (0x2E80, 0x2EFF)), 401 ('Kangxi Radicals', (0x2F00, 0x2FDF)), 402 ('Ideographic Description Characters', (0x2FF0, 0x2FFF)), 403 ('CJK Unified Ideographs Extension A', (0x3400, 0x4DBF)), 404 ('CJK Unified Ideographs Extension B', (0x20000, 0x2A6DF)), 405 ('Kanbun', (0x3190, 0x319F))), 406 (('Private Use Area (plane 0)', (0xE000, 0xF8FF)),), 407 (('CJK Strokes', (0x31C0, 0x31EF)), 408 ('CJK Compatibility Ideographs', (0xF900, 0xFAFF)), 409 ('CJK Compatibility Ideographs Supplement', (0x2F800, 0x2FA1F))), 410 (('Alphabetic Presentation Forms', (0xFB00, 0xFB4F)),), 411 (('Arabic Presentation Forms-A', (0xFB50, 0xFDFF)),), 412 (('Combining Half Marks', (0xFE20, 0xFE2F)),), 413 (('Vertical Forms', (0xFE10, 0xFE1F)), 414 ('CJK Compatibility Forms', (0xFE30, 0xFE4F))), 415 (('Small Form Variants', (0xFE50, 0xFE6F)),), 416 (('Arabic Presentation Forms-B', (0xFE70, 0xFEFF)),), 417 (('Halfwidth And Fullwidth Forms', (0xFF00, 0xFFEF)),), 418 (('Specials', (0xFFF0, 0xFFFF)),), 419 (('Tibetan', (0x0F00, 0x0FFF)),), 420 (('Syriac', (0x0700, 0x074F)),), 421 (('Thaana', (0x0780, 0x07BF)),), 422 (('Sinhala', (0x0D80, 0x0DFF)),), 423 (('Myanmar', (0x1000, 0x109F)),), 424 (('Ethiopic', (0x1200, 0x137F)), 425 ('Ethiopic Supplement', (0x1380, 0x139F)), 426 ('Ethiopic Extended', (0x2D80, 0x2DDF))), 427 (('Cherokee', (0x13A0, 0x13FF)),), 428 (('Unified Canadian Aboriginal Syllabics', (0x1400, 0x167F)),), 429 (('Ogham', (0x1680, 0x169F)),), 430 (('Runic', (0x16A0, 0x16FF)),), 431 (('Khmer', (0x1780, 0x17FF)), 432 ('Khmer Symbols', (0x19E0, 0x19FF))), 433 (('Mongolian', (0x1800, 0x18AF)),), 434 (('Braille Patterns', (0x2800, 0x28FF)),), 435 (('Yi Syllables', (0xA000, 0xA48F)), 436 ('Yi Radicals', (0xA490, 0xA4CF))), 437 (('Tagalog', (0x1700, 0x171F)), 438 ('Hanunoo', (0x1720, 0x173F)), 439 ('Buhid', (0x1740, 0x175F)), 440 ('Tagbanwa', (0x1760, 0x177F))), 441 (('Old Italic', (0x10300, 0x1032F)),), 442 (('Gothic', (0x10330, 0x1034F)),), 443 (('Deseret', (0x10400, 0x1044F)),), 444 (('Byzantine Musical Symbols', (0x1D000, 0x1D0FF)), 445 ('Musical Symbols', (0x1D100, 0x1D1FF)), 446 ('Ancient Greek Musical Notation', (0x1D200, 0x1D24F))), 447 (('Mathematical Alphanumeric Symbols', (0x1D400, 0x1D7FF)),), 448 (('Private Use (plane 15)', (0xF0000, 0xFFFFD)), 449 ('Private Use (plane 16)', (0x100000, 0x10FFFD))), 450 (('Variation Selectors', (0xFE00, 0xFE0F)), 451 ('Variation Selectors Supplement', (0xE0100, 0xE01EF))), 452 (('Tags', (0xE0000, 0xE007F)),), 453 (('Limbu', (0x1900, 0x194F)),), 454 (('Tai Le', (0x1950, 0x197F)),), 455 (('New Tai Lue', (0x1980, 0x19DF)),), 456 (('Buginese', (0x1A00, 0x1A1F)),), 457 (('Glagolitic', (0x2C00, 0x2C5F)),), 458 (('Tifinagh', (0x2D30, 0x2D7F)),), 459 (('Yijing Hexagram Symbols', (0x4DC0, 0x4DFF)),), 460 (('Syloti Nagri', (0xA800, 0xA82F)),), 461 (('Linear B Syllabary', (0x10000, 0x1007F)), 462 ('Linear B Ideograms', (0x10080, 0x100FF)), 463 ('Aegean Numbers', (0x10100, 0x1013F))), 464 (('Ancient Greek Numbers', (0x10140, 0x1018F)),), 465 (('Ugaritic', (0x10380, 0x1039F)),), 466 (('Old Persian', (0x103A0, 0x103DF)),), 467 (('Shavian', (0x10450, 0x1047F)),), 468 (('Osmanya', (0x10480, 0x104AF)),), 469 (('Cypriot Syllabary', (0x10800, 0x1083F)),), 470 (('Kharoshthi', (0x10A00, 0x10A5F)),), 471 (('Tai Xuan Jing Symbols', (0x1D300, 0x1D35F)),), 472 (('Cuneiform', (0x12000, 0x123FF)), 473 ('Cuneiform Numbers and Punctuation', (0x12400, 0x1247F))), 474 (('Counting Rod Numerals', (0x1D360, 0x1D37F)),), 475 (('Sundanese', (0x1B80, 0x1BBF)),), 476 (('Lepcha', (0x1C00, 0x1C4F)),), 477 (('Ol Chiki', (0x1C50, 0x1C7F)),), 478 (('Saurashtra', (0xA880, 0xA8DF)),), 479 (('Kayah Li', (0xA900, 0xA92F)),), 480 (('Rejang', (0xA930, 0xA95F)),), 481 (('Cham', (0xAA00, 0xAA5F)),), 482 (('Ancient Symbols', (0x10190, 0x101CF)),), 483 (('Phaistos Disc', (0x101D0, 0x101FF)),), 484 (('Carian', (0x102A0, 0x102DF)), 485 ('Lycian', (0x10280, 0x1029F)), 486 ('Lydian', (0x10920, 0x1093F))), 487 (('Domino Tiles', (0x1F030, 0x1F09F)), 488 ('Mahjong Tiles', (0x1F000, 0x1F02F))), 489) 490 491 492_unicodeStarts = [] 493_unicodeValues = [None] 494 495def _getUnicodeRanges(): 496 # build the ranges of codepoints for each unicode range bit, and cache result 497 if not _unicodeStarts: 498 unicodeRanges = [ 499 (start, (stop, bit)) for bit, blocks in enumerate(OS2_UNICODE_RANGES) 500 for _, (start, stop) in blocks] 501 for start, (stop, bit) in sorted(unicodeRanges): 502 _unicodeStarts.append(start) 503 _unicodeValues.append((stop, bit)) 504 return _unicodeStarts, _unicodeValues 505 506 507def intersectUnicodeRanges(unicodes, inverse=False): 508 """ Intersect a sequence of (int) Unicode codepoints with the Unicode block 509 ranges defined in the OpenType specification v1.7, and return the set of 510 'ulUnicodeRanges' bits for which there is at least ONE intersection. 511 If 'inverse' is True, return the the bits for which there is NO intersection. 512 513 >>> intersectUnicodeRanges([0x0410]) == {9} 514 True 515 >>> intersectUnicodeRanges([0x0410, 0x1F000]) == {9, 57, 122} 516 True 517 >>> intersectUnicodeRanges([0x0410, 0x1F000], inverse=True) == ( 518 ... set(range(len(OS2_UNICODE_RANGES))) - {9, 57, 122}) 519 True 520 """ 521 unicodes = set(unicodes) 522 unicodestarts, unicodevalues = _getUnicodeRanges() 523 bits = set() 524 for code in unicodes: 525 stop, bit = unicodevalues[bisect.bisect(unicodestarts, code)] 526 if code <= stop: 527 bits.add(bit) 528 # The spec says that bit 57 ("Non Plane 0") implies that there's 529 # at least one codepoint beyond the BMP; so I also include all 530 # the non-BMP codepoints here 531 if any(0x10000 <= code < 0x110000 for code in unicodes): 532 bits.add(57) 533 return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits 534 535 536if __name__ == "__main__": 537 import doctest, sys 538 sys.exit(doctest.testmod().failed) 539