1from fontTools.misc import sstruct 2from fontTools.misc.textTools import safeEval, num2binary, binary2num 3from fontTools.ttLib.tables import DefaultTable 4import bisect 5import logging 6 7 8log = logging.getLogger(__name__) 9 10# panose classification 11 12panoseFormat = """ 13 bFamilyType: B 14 bSerifStyle: B 15 bWeight: B 16 bProportion: B 17 bContrast: B 18 bStrokeVariation: B 19 bArmStyle: B 20 bLetterForm: B 21 bMidline: B 22 bXHeight: B 23""" 24 25class Panose(object): 26 27 def toXML(self, writer, ttFont): 28 formatstring, names, fixes = sstruct.getformat(panoseFormat) 29 for name in names: 30 writer.simpletag(name, value=getattr(self, name)) 31 writer.newline() 32 33 def fromXML(self, name, attrs, content, ttFont): 34 setattr(self, name, safeEval(attrs["value"])) 35 36 37# 'sfnt' OS/2 and Windows Metrics table - 'OS/2' 38 39OS2_format_0 = """ 40 > # big endian 41 version: H # version 42 xAvgCharWidth: h # average character width 43 usWeightClass: H # degree of thickness of strokes 44 usWidthClass: H # aspect ratio 45 fsType: H # type flags 46 ySubscriptXSize: h # subscript horizontal font size 47 ySubscriptYSize: h # subscript vertical font size 48 ySubscriptXOffset: h # subscript x offset 49 ySubscriptYOffset: h # subscript y offset 50 ySuperscriptXSize: h # superscript horizontal font size 51 ySuperscriptYSize: h # superscript vertical font size 52 ySuperscriptXOffset: h # superscript x offset 53 ySuperscriptYOffset: h # superscript y offset 54 yStrikeoutSize: h # strikeout size 55 yStrikeoutPosition: h # strikeout position 56 sFamilyClass: h # font family class and subclass 57 panose: 10s # panose classification number 58 ulUnicodeRange1: L # character range 59 ulUnicodeRange2: L # character range 60 ulUnicodeRange3: L # character range 61 ulUnicodeRange4: L # character range 62 achVendID: 4s # font vendor identification 63 fsSelection: H # font selection flags 64 usFirstCharIndex: H # first unicode character index 65 usLastCharIndex: H # last unicode character index 66 sTypoAscender: h # typographic ascender 67 sTypoDescender: h # typographic descender 68 sTypoLineGap: h # typographic line gap 69 usWinAscent: H # Windows ascender 70 usWinDescent: H # Windows descender 71""" 72 73OS2_format_1_addition = """ 74 ulCodePageRange1: L 75 ulCodePageRange2: L 76""" 77 78OS2_format_2_addition = OS2_format_1_addition + """ 79 sxHeight: h 80 sCapHeight: h 81 usDefaultChar: H 82 usBreakChar: H 83 usMaxContext: H 84""" 85 86OS2_format_5_addition = OS2_format_2_addition + """ 87 usLowerOpticalPointSize: H 88 usUpperOpticalPointSize: H 89""" 90 91bigendian = " > # big endian\n" 92 93OS2_format_1 = OS2_format_0 + OS2_format_1_addition 94OS2_format_2 = OS2_format_0 + OS2_format_2_addition 95OS2_format_5 = OS2_format_0 + OS2_format_5_addition 96OS2_format_1_addition = bigendian + OS2_format_1_addition 97OS2_format_2_addition = bigendian + OS2_format_2_addition 98OS2_format_5_addition = bigendian + OS2_format_5_addition 99 100 101class table_O_S_2f_2(DefaultTable.DefaultTable): 102 103 """the OS/2 table""" 104 105 dependencies = ["head"] 106 107 def decompile(self, data, ttFont): 108 dummy, data = sstruct.unpack2(OS2_format_0, data, self) 109 110 if self.version == 1: 111 dummy, data = sstruct.unpack2(OS2_format_1_addition, data, self) 112 elif self.version in (2, 3, 4): 113 dummy, data = sstruct.unpack2(OS2_format_2_addition, data, self) 114 elif self.version == 5: 115 dummy, data = sstruct.unpack2(OS2_format_5_addition, data, self) 116 self.usLowerOpticalPointSize /= 20 117 self.usUpperOpticalPointSize /= 20 118 elif self.version != 0: 119 from fontTools import ttLib 120 raise ttLib.TTLibError("unknown format for OS/2 table: version %s" % self.version) 121 if len(data): 122 log.warning("too much 'OS/2' table data") 123 124 self.panose = sstruct.unpack(panoseFormat, self.panose, Panose()) 125 126 def compile(self, ttFont): 127 self.updateFirstAndLastCharIndex(ttFont) 128 panose = self.panose 129 head = ttFont["head"] 130 if (self.fsSelection & 1) and not (head.macStyle & 1<<1): 131 log.warning("fsSelection bit 0 (italic) and " 132 "head table macStyle bit 1 (italic) should match") 133 if (self.fsSelection & 1<<5) and not (head.macStyle & 1): 134 log.warning("fsSelection bit 5 (bold) and " 135 "head table macStyle bit 0 (bold) should match") 136 if (self.fsSelection & 1<<6) and (self.fsSelection & 1 + (1<<5)): 137 log.warning("fsSelection bit 6 (regular) is set, " 138 "bits 0 (italic) and 5 (bold) must be clear") 139 if self.version < 4 and self.fsSelection & 0b1110000000: 140 log.warning("fsSelection bits 7, 8 and 9 are only defined in " 141 "OS/2 table version 4 and up: version %s", self.version) 142 self.panose = sstruct.pack(panoseFormat, self.panose) 143 if self.version == 0: 144 data = sstruct.pack(OS2_format_0, self) 145 elif self.version == 1: 146 data = sstruct.pack(OS2_format_1, self) 147 elif self.version in (2, 3, 4): 148 data = sstruct.pack(OS2_format_2, self) 149 elif self.version == 5: 150 d = self.__dict__.copy() 151 d['usLowerOpticalPointSize'] = round(self.usLowerOpticalPointSize * 20) 152 d['usUpperOpticalPointSize'] = round(self.usUpperOpticalPointSize * 20) 153 data = sstruct.pack(OS2_format_5, d) 154 else: 155 from fontTools import ttLib 156 raise ttLib.TTLibError("unknown format for OS/2 table: version %s" % self.version) 157 self.panose = panose 158 return data 159 160 def toXML(self, writer, ttFont): 161 writer.comment( 162 "The fields 'usFirstCharIndex' and 'usLastCharIndex'\n" 163 "will be recalculated by the compiler") 164 writer.newline() 165 if self.version == 1: 166 format = OS2_format_1 167 elif self.version in (2, 3, 4): 168 format = OS2_format_2 169 elif self.version == 5: 170 format = OS2_format_5 171 else: 172 format = OS2_format_0 173 formatstring, names, fixes = sstruct.getformat(format) 174 for name in names: 175 value = getattr(self, name) 176 if name=="panose": 177 writer.begintag("panose") 178 writer.newline() 179 value.toXML(writer, ttFont) 180 writer.endtag("panose") 181 elif name in ("ulUnicodeRange1", "ulUnicodeRange2", 182 "ulUnicodeRange3", "ulUnicodeRange4", 183 "ulCodePageRange1", "ulCodePageRange2"): 184 writer.simpletag(name, value=num2binary(value)) 185 elif name in ("fsType", "fsSelection"): 186 writer.simpletag(name, value=num2binary(value, 16)) 187 elif name == "achVendID": 188 writer.simpletag(name, value=repr(value)[1:-1]) 189 else: 190 writer.simpletag(name, value=value) 191 writer.newline() 192 193 def fromXML(self, name, attrs, content, ttFont): 194 if name == "panose": 195 self.panose = panose = Panose() 196 for element in content: 197 if isinstance(element, tuple): 198 name, attrs, content = element 199 panose.fromXML(name, attrs, content, ttFont) 200 elif name in ("ulUnicodeRange1", "ulUnicodeRange2", 201 "ulUnicodeRange3", "ulUnicodeRange4", 202 "ulCodePageRange1", "ulCodePageRange2", 203 "fsType", "fsSelection"): 204 setattr(self, name, binary2num(attrs["value"])) 205 elif name == "achVendID": 206 setattr(self, name, safeEval("'''" + attrs["value"] + "'''")) 207 else: 208 setattr(self, name, safeEval(attrs["value"])) 209 210 def updateFirstAndLastCharIndex(self, ttFont): 211 if 'cmap' not in ttFont: 212 return 213 codes = set() 214 for table in getattr(ttFont['cmap'], 'tables', []): 215 if table.isUnicode(): 216 codes.update(table.cmap.keys()) 217 if codes: 218 minCode = min(codes) 219 maxCode = max(codes) 220 # USHORT cannot hold codepoints greater than 0xFFFF 221 self.usFirstCharIndex = min(0xFFFF, minCode) 222 self.usLastCharIndex = min(0xFFFF, maxCode) 223 224 # misspelled attributes kept for legacy reasons 225 226 @property 227 def usMaxContex(self): 228 return self.usMaxContext 229 230 @usMaxContex.setter 231 def usMaxContex(self, value): 232 self.usMaxContext = value 233 234 @property 235 def fsFirstCharIndex(self): 236 return self.usFirstCharIndex 237 238 @fsFirstCharIndex.setter 239 def fsFirstCharIndex(self, value): 240 self.usFirstCharIndex = value 241 242 @property 243 def fsLastCharIndex(self): 244 return self.usLastCharIndex 245 246 @fsLastCharIndex.setter 247 def fsLastCharIndex(self, value): 248 self.usLastCharIndex = value 249 250 def getUnicodeRanges(self): 251 """ Return the set of 'ulUnicodeRange*' bits currently enabled. """ 252 bits = set() 253 ul1, ul2 = self.ulUnicodeRange1, self.ulUnicodeRange2 254 ul3, ul4 = self.ulUnicodeRange3, self.ulUnicodeRange4 255 for i in range(32): 256 if ul1 & (1 << i): 257 bits.add(i) 258 if ul2 & (1 << i): 259 bits.add(i + 32) 260 if ul3 & (1 << i): 261 bits.add(i + 64) 262 if ul4 & (1 << i): 263 bits.add(i + 96) 264 return bits 265 266 def setUnicodeRanges(self, bits): 267 """ Set the 'ulUnicodeRange*' fields to the specified 'bits'. """ 268 ul1, ul2, ul3, ul4 = 0, 0, 0, 0 269 for bit in bits: 270 if 0 <= bit < 32: 271 ul1 |= (1 << bit) 272 elif 32 <= bit < 64: 273 ul2 |= (1 << (bit - 32)) 274 elif 64 <= bit < 96: 275 ul3 |= (1 << (bit - 64)) 276 elif 96 <= bit < 123: 277 ul4 |= (1 << (bit - 96)) 278 else: 279 raise ValueError('expected 0 <= int <= 122, found: %r' % bit) 280 self.ulUnicodeRange1, self.ulUnicodeRange2 = ul1, ul2 281 self.ulUnicodeRange3, self.ulUnicodeRange4 = ul3, ul4 282 283 def recalcUnicodeRanges(self, ttFont, pruneOnly=False): 284 """ Intersect the codepoints in the font's Unicode cmap subtables with 285 the Unicode block ranges defined in the OpenType specification (v1.7), 286 and set the respective 'ulUnicodeRange*' bits if there is at least ONE 287 intersection. 288 If 'pruneOnly' is True, only clear unused bits with NO intersection. 289 """ 290 unicodes = set() 291 for table in ttFont['cmap'].tables: 292 if table.isUnicode(): 293 unicodes.update(table.cmap.keys()) 294 if pruneOnly: 295 empty = intersectUnicodeRanges(unicodes, inverse=True) 296 bits = self.getUnicodeRanges() - empty 297 else: 298 bits = intersectUnicodeRanges(unicodes) 299 self.setUnicodeRanges(bits) 300 return bits 301 302 303# Unicode ranges data from the OpenType OS/2 table specification v1.7 304 305OS2_UNICODE_RANGES = ( 306 (('Basic Latin', (0x0000, 0x007F)),), 307 (('Latin-1 Supplement', (0x0080, 0x00FF)),), 308 (('Latin Extended-A', (0x0100, 0x017F)),), 309 (('Latin Extended-B', (0x0180, 0x024F)),), 310 (('IPA Extensions', (0x0250, 0x02AF)), 311 ('Phonetic Extensions', (0x1D00, 0x1D7F)), 312 ('Phonetic Extensions Supplement', (0x1D80, 0x1DBF))), 313 (('Spacing Modifier Letters', (0x02B0, 0x02FF)), 314 ('Modifier Tone Letters', (0xA700, 0xA71F))), 315 (('Combining Diacritical Marks', (0x0300, 0x036F)), 316 ('Combining Diacritical Marks Supplement', (0x1DC0, 0x1DFF))), 317 (('Greek and Coptic', (0x0370, 0x03FF)),), 318 (('Coptic', (0x2C80, 0x2CFF)),), 319 (('Cyrillic', (0x0400, 0x04FF)), 320 ('Cyrillic Supplement', (0x0500, 0x052F)), 321 ('Cyrillic Extended-A', (0x2DE0, 0x2DFF)), 322 ('Cyrillic Extended-B', (0xA640, 0xA69F))), 323 (('Armenian', (0x0530, 0x058F)),), 324 (('Hebrew', (0x0590, 0x05FF)),), 325 (('Vai', (0xA500, 0xA63F)),), 326 (('Arabic', (0x0600, 0x06FF)), 327 ('Arabic Supplement', (0x0750, 0x077F))), 328 (('NKo', (0x07C0, 0x07FF)),), 329 (('Devanagari', (0x0900, 0x097F)),), 330 (('Bengali', (0x0980, 0x09FF)),), 331 (('Gurmukhi', (0x0A00, 0x0A7F)),), 332 (('Gujarati', (0x0A80, 0x0AFF)),), 333 (('Oriya', (0x0B00, 0x0B7F)),), 334 (('Tamil', (0x0B80, 0x0BFF)),), 335 (('Telugu', (0x0C00, 0x0C7F)),), 336 (('Kannada', (0x0C80, 0x0CFF)),), 337 (('Malayalam', (0x0D00, 0x0D7F)),), 338 (('Thai', (0x0E00, 0x0E7F)),), 339 (('Lao', (0x0E80, 0x0EFF)),), 340 (('Georgian', (0x10A0, 0x10FF)), 341 ('Georgian Supplement', (0x2D00, 0x2D2F))), 342 (('Balinese', (0x1B00, 0x1B7F)),), 343 (('Hangul Jamo', (0x1100, 0x11FF)),), 344 (('Latin Extended Additional', (0x1E00, 0x1EFF)), 345 ('Latin Extended-C', (0x2C60, 0x2C7F)), 346 ('Latin Extended-D', (0xA720, 0xA7FF))), 347 (('Greek Extended', (0x1F00, 0x1FFF)),), 348 (('General Punctuation', (0x2000, 0x206F)), 349 ('Supplemental Punctuation', (0x2E00, 0x2E7F))), 350 (('Superscripts And Subscripts', (0x2070, 0x209F)),), 351 (('Currency Symbols', (0x20A0, 0x20CF)),), 352 (('Combining Diacritical Marks For Symbols', (0x20D0, 0x20FF)),), 353 (('Letterlike Symbols', (0x2100, 0x214F)),), 354 (('Number Forms', (0x2150, 0x218F)),), 355 (('Arrows', (0x2190, 0x21FF)), 356 ('Supplemental Arrows-A', (0x27F0, 0x27FF)), 357 ('Supplemental Arrows-B', (0x2900, 0x297F)), 358 ('Miscellaneous Symbols and Arrows', (0x2B00, 0x2BFF))), 359 (('Mathematical Operators', (0x2200, 0x22FF)), 360 ('Supplemental Mathematical Operators', (0x2A00, 0x2AFF)), 361 ('Miscellaneous Mathematical Symbols-A', (0x27C0, 0x27EF)), 362 ('Miscellaneous Mathematical Symbols-B', (0x2980, 0x29FF))), 363 (('Miscellaneous Technical', (0x2300, 0x23FF)),), 364 (('Control Pictures', (0x2400, 0x243F)),), 365 (('Optical Character Recognition', (0x2440, 0x245F)),), 366 (('Enclosed Alphanumerics', (0x2460, 0x24FF)),), 367 (('Box Drawing', (0x2500, 0x257F)),), 368 (('Block Elements', (0x2580, 0x259F)),), 369 (('Geometric Shapes', (0x25A0, 0x25FF)),), 370 (('Miscellaneous Symbols', (0x2600, 0x26FF)),), 371 (('Dingbats', (0x2700, 0x27BF)),), 372 (('CJK Symbols And Punctuation', (0x3000, 0x303F)),), 373 (('Hiragana', (0x3040, 0x309F)),), 374 (('Katakana', (0x30A0, 0x30FF)), 375 ('Katakana Phonetic Extensions', (0x31F0, 0x31FF))), 376 (('Bopomofo', (0x3100, 0x312F)), 377 ('Bopomofo Extended', (0x31A0, 0x31BF))), 378 (('Hangul Compatibility Jamo', (0x3130, 0x318F)),), 379 (('Phags-pa', (0xA840, 0xA87F)),), 380 (('Enclosed CJK Letters And Months', (0x3200, 0x32FF)),), 381 (('CJK Compatibility', (0x3300, 0x33FF)),), 382 (('Hangul Syllables', (0xAC00, 0xD7AF)),), 383 (('Non-Plane 0 *', (0xD800, 0xDFFF)),), 384 (('Phoenician', (0x10900, 0x1091F)),), 385 (('CJK Unified Ideographs', (0x4E00, 0x9FFF)), 386 ('CJK Radicals Supplement', (0x2E80, 0x2EFF)), 387 ('Kangxi Radicals', (0x2F00, 0x2FDF)), 388 ('Ideographic Description Characters', (0x2FF0, 0x2FFF)), 389 ('CJK Unified Ideographs Extension A', (0x3400, 0x4DBF)), 390 ('CJK Unified Ideographs Extension B', (0x20000, 0x2A6DF)), 391 ('Kanbun', (0x3190, 0x319F))), 392 (('Private Use Area (plane 0)', (0xE000, 0xF8FF)),), 393 (('CJK Strokes', (0x31C0, 0x31EF)), 394 ('CJK Compatibility Ideographs', (0xF900, 0xFAFF)), 395 ('CJK Compatibility Ideographs Supplement', (0x2F800, 0x2FA1F))), 396 (('Alphabetic Presentation Forms', (0xFB00, 0xFB4F)),), 397 (('Arabic Presentation Forms-A', (0xFB50, 0xFDFF)),), 398 (('Combining Half Marks', (0xFE20, 0xFE2F)),), 399 (('Vertical Forms', (0xFE10, 0xFE1F)), 400 ('CJK Compatibility Forms', (0xFE30, 0xFE4F))), 401 (('Small Form Variants', (0xFE50, 0xFE6F)),), 402 (('Arabic Presentation Forms-B', (0xFE70, 0xFEFF)),), 403 (('Halfwidth And Fullwidth Forms', (0xFF00, 0xFFEF)),), 404 (('Specials', (0xFFF0, 0xFFFF)),), 405 (('Tibetan', (0x0F00, 0x0FFF)),), 406 (('Syriac', (0x0700, 0x074F)),), 407 (('Thaana', (0x0780, 0x07BF)),), 408 (('Sinhala', (0x0D80, 0x0DFF)),), 409 (('Myanmar', (0x1000, 0x109F)),), 410 (('Ethiopic', (0x1200, 0x137F)), 411 ('Ethiopic Supplement', (0x1380, 0x139F)), 412 ('Ethiopic Extended', (0x2D80, 0x2DDF))), 413 (('Cherokee', (0x13A0, 0x13FF)),), 414 (('Unified Canadian Aboriginal Syllabics', (0x1400, 0x167F)),), 415 (('Ogham', (0x1680, 0x169F)),), 416 (('Runic', (0x16A0, 0x16FF)),), 417 (('Khmer', (0x1780, 0x17FF)), 418 ('Khmer Symbols', (0x19E0, 0x19FF))), 419 (('Mongolian', (0x1800, 0x18AF)),), 420 (('Braille Patterns', (0x2800, 0x28FF)),), 421 (('Yi Syllables', (0xA000, 0xA48F)), 422 ('Yi Radicals', (0xA490, 0xA4CF))), 423 (('Tagalog', (0x1700, 0x171F)), 424 ('Hanunoo', (0x1720, 0x173F)), 425 ('Buhid', (0x1740, 0x175F)), 426 ('Tagbanwa', (0x1760, 0x177F))), 427 (('Old Italic', (0x10300, 0x1032F)),), 428 (('Gothic', (0x10330, 0x1034F)),), 429 (('Deseret', (0x10400, 0x1044F)),), 430 (('Byzantine Musical Symbols', (0x1D000, 0x1D0FF)), 431 ('Musical Symbols', (0x1D100, 0x1D1FF)), 432 ('Ancient Greek Musical Notation', (0x1D200, 0x1D24F))), 433 (('Mathematical Alphanumeric Symbols', (0x1D400, 0x1D7FF)),), 434 (('Private Use (plane 15)', (0xF0000, 0xFFFFD)), 435 ('Private Use (plane 16)', (0x100000, 0x10FFFD))), 436 (('Variation Selectors', (0xFE00, 0xFE0F)), 437 ('Variation Selectors Supplement', (0xE0100, 0xE01EF))), 438 (('Tags', (0xE0000, 0xE007F)),), 439 (('Limbu', (0x1900, 0x194F)),), 440 (('Tai Le', (0x1950, 0x197F)),), 441 (('New Tai Lue', (0x1980, 0x19DF)),), 442 (('Buginese', (0x1A00, 0x1A1F)),), 443 (('Glagolitic', (0x2C00, 0x2C5F)),), 444 (('Tifinagh', (0x2D30, 0x2D7F)),), 445 (('Yijing Hexagram Symbols', (0x4DC0, 0x4DFF)),), 446 (('Syloti Nagri', (0xA800, 0xA82F)),), 447 (('Linear B Syllabary', (0x10000, 0x1007F)), 448 ('Linear B Ideograms', (0x10080, 0x100FF)), 449 ('Aegean Numbers', (0x10100, 0x1013F))), 450 (('Ancient Greek Numbers', (0x10140, 0x1018F)),), 451 (('Ugaritic', (0x10380, 0x1039F)),), 452 (('Old Persian', (0x103A0, 0x103DF)),), 453 (('Shavian', (0x10450, 0x1047F)),), 454 (('Osmanya', (0x10480, 0x104AF)),), 455 (('Cypriot Syllabary', (0x10800, 0x1083F)),), 456 (('Kharoshthi', (0x10A00, 0x10A5F)),), 457 (('Tai Xuan Jing Symbols', (0x1D300, 0x1D35F)),), 458 (('Cuneiform', (0x12000, 0x123FF)), 459 ('Cuneiform Numbers and Punctuation', (0x12400, 0x1247F))), 460 (('Counting Rod Numerals', (0x1D360, 0x1D37F)),), 461 (('Sundanese', (0x1B80, 0x1BBF)),), 462 (('Lepcha', (0x1C00, 0x1C4F)),), 463 (('Ol Chiki', (0x1C50, 0x1C7F)),), 464 (('Saurashtra', (0xA880, 0xA8DF)),), 465 (('Kayah Li', (0xA900, 0xA92F)),), 466 (('Rejang', (0xA930, 0xA95F)),), 467 (('Cham', (0xAA00, 0xAA5F)),), 468 (('Ancient Symbols', (0x10190, 0x101CF)),), 469 (('Phaistos Disc', (0x101D0, 0x101FF)),), 470 (('Carian', (0x102A0, 0x102DF)), 471 ('Lycian', (0x10280, 0x1029F)), 472 ('Lydian', (0x10920, 0x1093F))), 473 (('Domino Tiles', (0x1F030, 0x1F09F)), 474 ('Mahjong Tiles', (0x1F000, 0x1F02F))), 475) 476 477 478_unicodeStarts = [] 479_unicodeValues = [None] 480 481def _getUnicodeRanges(): 482 # build the ranges of codepoints for each unicode range bit, and cache result 483 if not _unicodeStarts: 484 unicodeRanges = [ 485 (start, (stop, bit)) for bit, blocks in enumerate(OS2_UNICODE_RANGES) 486 for _, (start, stop) in blocks] 487 for start, (stop, bit) in sorted(unicodeRanges): 488 _unicodeStarts.append(start) 489 _unicodeValues.append((stop, bit)) 490 return _unicodeStarts, _unicodeValues 491 492 493def intersectUnicodeRanges(unicodes, inverse=False): 494 """ Intersect a sequence of (int) Unicode codepoints with the Unicode block 495 ranges defined in the OpenType specification v1.7, and return the set of 496 'ulUnicodeRanges' bits for which there is at least ONE intersection. 497 If 'inverse' is True, return the the bits for which there is NO intersection. 498 499 >>> intersectUnicodeRanges([0x0410]) == {9} 500 True 501 >>> intersectUnicodeRanges([0x0410, 0x1F000]) == {9, 57, 122} 502 True 503 >>> intersectUnicodeRanges([0x0410, 0x1F000], inverse=True) == ( 504 ... set(range(len(OS2_UNICODE_RANGES))) - {9, 57, 122}) 505 True 506 """ 507 unicodes = set(unicodes) 508 unicodestarts, unicodevalues = _getUnicodeRanges() 509 bits = set() 510 for code in unicodes: 511 stop, bit = unicodevalues[bisect.bisect(unicodestarts, code)] 512 if code <= stop: 513 bits.add(bit) 514 # The spec says that bit 57 ("Non Plane 0") implies that there's 515 # at least one codepoint beyond the BMP; so I also include all 516 # the non-BMP codepoints here 517 if any(0x10000 <= code < 0x110000 for code in unicodes): 518 bits.add(57) 519 return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits 520 521 522if __name__ == "__main__": 523 import doctest, sys 524 sys.exit(doctest.testmod().failed) 525