1import collections.abc 2import re 3from typing import ( 4 Any, 5 Callable, 6 Dict, 7 List, 8 Mapping, 9 MutableMapping, 10 Optional, 11 Sequence, 12 Type, 13 Union, 14 IO, 15) 16import warnings 17from io import BytesIO 18from datetime import datetime 19from base64 import b64encode, b64decode 20from numbers import Integral 21from types import SimpleNamespace 22from functools import singledispatch 23 24from fontTools.misc import etree 25 26from fontTools.misc.py23 import tostr 27 28 29# By default, we 30# - deserialize <data> elements as bytes and 31# - serialize bytes as <data> elements. 32# Before, on Python 2, we 33# - deserialized <data> elements as plistlib.Data objects, in order to 34# distinguish them from the built-in str type (which is bytes on python2) 35# - serialized bytes as <string> elements (they must have only contained 36# ASCII characters in this case) 37# You can pass use_builtin_types=[True|False] to the load/dump etc. functions 38# to enforce a specific treatment. 39# NOTE that unicode type always maps to <string> element, and plistlib.Data 40# always maps to <data> element, regardless of use_builtin_types. 41USE_BUILTIN_TYPES = True 42 43XML_DECLARATION = b"""<?xml version='1.0' encoding='UTF-8'?>""" 44 45PLIST_DOCTYPE = ( 46 b'<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" ' 47 b'"http://www.apple.com/DTDs/PropertyList-1.0.dtd">' 48) 49 50 51# Date should conform to a subset of ISO 8601: 52# YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z' 53_date_parser = re.compile( 54 r"(?P<year>\d\d\d\d)" 55 r"(?:-(?P<month>\d\d)" 56 r"(?:-(?P<day>\d\d)" 57 r"(?:T(?P<hour>\d\d)" 58 r"(?::(?P<minute>\d\d)" 59 r"(?::(?P<second>\d\d))" 60 r"?)?)?)?)?Z", 61 re.ASCII, 62) 63 64 65def _date_from_string(s: str) -> datetime: 66 order = ("year", "month", "day", "hour", "minute", "second") 67 m = _date_parser.match(s) 68 if m is None: 69 raise ValueError(f"Expected ISO 8601 date string, but got '{s:r}'.") 70 gd = m.groupdict() 71 lst = [] 72 for key in order: 73 val = gd[key] 74 if val is None: 75 break 76 lst.append(int(val)) 77 # NOTE: mypy doesn't know that lst is 6 elements long. 78 return datetime(*lst) # type:ignore 79 80 81def _date_to_string(d: datetime) -> str: 82 return "%04d-%02d-%02dT%02d:%02d:%02dZ" % ( 83 d.year, 84 d.month, 85 d.day, 86 d.hour, 87 d.minute, 88 d.second, 89 ) 90 91 92class Data: 93 """Represents binary data when ``use_builtin_types=False.`` 94 95 This class wraps binary data loaded from a plist file when the 96 ``use_builtin_types`` argument to the loading function (:py:func:`fromtree`, 97 :py:func:`load`, :py:func:`loads`) is false. 98 99 The actual binary data is retrieved using the ``data`` attribute. 100 """ 101 102 def __init__(self, data: bytes) -> None: 103 if not isinstance(data, bytes): 104 raise TypeError("Expected bytes, found %s" % type(data).__name__) 105 self.data = data 106 107 @classmethod 108 def fromBase64(cls, data: Union[bytes, str]) -> "Data": 109 return cls(b64decode(data)) 110 111 def asBase64(self, maxlinelength: int = 76, indent_level: int = 1) -> bytes: 112 return _encode_base64( 113 self.data, maxlinelength=maxlinelength, indent_level=indent_level 114 ) 115 116 def __eq__(self, other: Any) -> bool: 117 if isinstance(other, self.__class__): 118 return self.data == other.data 119 elif isinstance(other, bytes): 120 return self.data == other 121 else: 122 return NotImplemented 123 124 def __repr__(self) -> str: 125 return "%s(%s)" % (self.__class__.__name__, repr(self.data)) 126 127 128def _encode_base64( 129 data: bytes, maxlinelength: Optional[int] = 76, indent_level: int = 1 130) -> bytes: 131 data = b64encode(data) 132 if data and maxlinelength: 133 # split into multiple lines right-justified to 'maxlinelength' chars 134 indent = b"\n" + b" " * indent_level 135 max_length = max(16, maxlinelength - len(indent)) 136 chunks = [] 137 for i in range(0, len(data), max_length): 138 chunks.append(indent) 139 chunks.append(data[i : i + max_length]) 140 chunks.append(indent) 141 data = b"".join(chunks) 142 return data 143 144 145# Mypy does not support recursive type aliases as of 0.782, Pylance does. 146# https://github.com/python/mypy/issues/731 147# https://devblogs.microsoft.com/python/pylance-introduces-five-new-features-that-enable-type-magic-for-python-developers/#1-support-for-recursive-type-aliases 148PlistEncodable = Union[ 149 bool, 150 bytes, 151 Data, 152 datetime, 153 float, 154 int, 155 Mapping[str, Any], 156 Sequence[Any], 157 str, 158] 159 160 161class PlistTarget: 162 """Event handler using the ElementTree Target API that can be 163 passed to a XMLParser to produce property list objects from XML. 164 It is based on the CPython plistlib module's _PlistParser class, 165 but does not use the expat parser. 166 167 >>> from fontTools.misc import etree 168 >>> parser = etree.XMLParser(target=PlistTarget()) 169 >>> result = etree.XML( 170 ... "<dict>" 171 ... " <key>something</key>" 172 ... " <string>blah</string>" 173 ... "</dict>", 174 ... parser=parser) 175 >>> result == {"something": "blah"} 176 True 177 178 Links: 179 https://github.com/python/cpython/blob/master/Lib/plistlib.py 180 http://lxml.de/parsing.html#the-target-parser-interface 181 """ 182 183 def __init__( 184 self, 185 use_builtin_types: Optional[bool] = None, 186 dict_type: Type[MutableMapping[str, Any]] = dict, 187 ) -> None: 188 self.stack: List[PlistEncodable] = [] 189 self.current_key: Optional[str] = None 190 self.root: Optional[PlistEncodable] = None 191 if use_builtin_types is None: 192 self._use_builtin_types = USE_BUILTIN_TYPES 193 else: 194 if use_builtin_types is False: 195 warnings.warn( 196 "Setting use_builtin_types to False is deprecated and will be " 197 "removed soon.", 198 DeprecationWarning, 199 ) 200 self._use_builtin_types = use_builtin_types 201 self._dict_type = dict_type 202 203 def start(self, tag: str, attrib: Mapping[str, str]) -> None: 204 self._data: List[str] = [] 205 handler = _TARGET_START_HANDLERS.get(tag) 206 if handler is not None: 207 handler(self) 208 209 def end(self, tag: str) -> None: 210 handler = _TARGET_END_HANDLERS.get(tag) 211 if handler is not None: 212 handler(self) 213 214 def data(self, data: str) -> None: 215 self._data.append(data) 216 217 def close(self) -> PlistEncodable: 218 if self.root is None: 219 raise ValueError("No root set.") 220 return self.root 221 222 # helpers 223 224 def add_object(self, value: PlistEncodable) -> None: 225 if self.current_key is not None: 226 stack_top = self.stack[-1] 227 if not isinstance(stack_top, collections.abc.MutableMapping): 228 raise ValueError("unexpected element: %r" % stack_top) 229 stack_top[self.current_key] = value 230 self.current_key = None 231 elif not self.stack: 232 # this is the root object 233 self.root = value 234 else: 235 stack_top = self.stack[-1] 236 if not isinstance(stack_top, list): 237 raise ValueError("unexpected element: %r" % stack_top) 238 stack_top.append(value) 239 240 def get_data(self) -> str: 241 data = "".join(self._data) 242 self._data = [] 243 return data 244 245 246# event handlers 247 248 249def start_dict(self: PlistTarget) -> None: 250 d = self._dict_type() 251 self.add_object(d) 252 self.stack.append(d) 253 254 255def end_dict(self: PlistTarget) -> None: 256 if self.current_key: 257 raise ValueError("missing value for key '%s'" % self.current_key) 258 self.stack.pop() 259 260 261def end_key(self: PlistTarget) -> None: 262 if self.current_key or not isinstance(self.stack[-1], collections.abc.Mapping): 263 raise ValueError("unexpected key") 264 self.current_key = self.get_data() 265 266 267def start_array(self: PlistTarget) -> None: 268 a: List[PlistEncodable] = [] 269 self.add_object(a) 270 self.stack.append(a) 271 272 273def end_array(self: PlistTarget) -> None: 274 self.stack.pop() 275 276 277def end_true(self: PlistTarget) -> None: 278 self.add_object(True) 279 280 281def end_false(self: PlistTarget) -> None: 282 self.add_object(False) 283 284 285def end_integer(self: PlistTarget) -> None: 286 self.add_object(int(self.get_data())) 287 288 289def end_real(self: PlistTarget) -> None: 290 self.add_object(float(self.get_data())) 291 292 293def end_string(self: PlistTarget) -> None: 294 self.add_object(self.get_data()) 295 296 297def end_data(self: PlistTarget) -> None: 298 if self._use_builtin_types: 299 self.add_object(b64decode(self.get_data())) 300 else: 301 self.add_object(Data.fromBase64(self.get_data())) 302 303 304def end_date(self: PlistTarget) -> None: 305 self.add_object(_date_from_string(self.get_data())) 306 307 308_TARGET_START_HANDLERS: Dict[str, Callable[[PlistTarget], None]] = { 309 "dict": start_dict, 310 "array": start_array, 311} 312 313_TARGET_END_HANDLERS: Dict[str, Callable[[PlistTarget], None]] = { 314 "dict": end_dict, 315 "array": end_array, 316 "key": end_key, 317 "true": end_true, 318 "false": end_false, 319 "integer": end_integer, 320 "real": end_real, 321 "string": end_string, 322 "data": end_data, 323 "date": end_date, 324} 325 326 327# functions to build element tree from plist data 328 329 330def _string_element(value: str, ctx: SimpleNamespace) -> etree.Element: 331 el = etree.Element("string") 332 el.text = value 333 return el 334 335 336def _bool_element(value: bool, ctx: SimpleNamespace) -> etree.Element: 337 if value: 338 return etree.Element("true") 339 return etree.Element("false") 340 341 342def _integer_element(value: int, ctx: SimpleNamespace) -> etree.Element: 343 if -1 << 63 <= value < 1 << 64: 344 el = etree.Element("integer") 345 el.text = "%d" % value 346 return el 347 raise OverflowError(value) 348 349 350def _real_element(value: float, ctx: SimpleNamespace) -> etree.Element: 351 el = etree.Element("real") 352 el.text = repr(value) 353 return el 354 355 356def _dict_element(d: Mapping[str, PlistEncodable], ctx: SimpleNamespace) -> etree.Element: 357 el = etree.Element("dict") 358 items = d.items() 359 if ctx.sort_keys: 360 items = sorted(items) # type: ignore 361 ctx.indent_level += 1 362 for key, value in items: 363 if not isinstance(key, str): 364 if ctx.skipkeys: 365 continue 366 raise TypeError("keys must be strings") 367 k = etree.SubElement(el, "key") 368 k.text = tostr(key, "utf-8") 369 el.append(_make_element(value, ctx)) 370 ctx.indent_level -= 1 371 return el 372 373 374def _array_element(array: Sequence[PlistEncodable], ctx: SimpleNamespace) -> etree.Element: 375 el = etree.Element("array") 376 if len(array) == 0: 377 return el 378 ctx.indent_level += 1 379 for value in array: 380 el.append(_make_element(value, ctx)) 381 ctx.indent_level -= 1 382 return el 383 384 385def _date_element(date: datetime, ctx: SimpleNamespace) -> etree.Element: 386 el = etree.Element("date") 387 el.text = _date_to_string(date) 388 return el 389 390 391def _data_element(data: bytes, ctx: SimpleNamespace) -> etree.Element: 392 el = etree.Element("data") 393 # NOTE: mypy is confused about whether el.text should be str or bytes. 394 el.text = _encode_base64( # type: ignore 395 data, 396 maxlinelength=(76 if ctx.pretty_print else None), 397 indent_level=ctx.indent_level, 398 ) 399 return el 400 401 402def _string_or_data_element(raw_bytes: bytes, ctx: SimpleNamespace) -> etree.Element: 403 if ctx.use_builtin_types: 404 return _data_element(raw_bytes, ctx) 405 else: 406 try: 407 string = raw_bytes.decode(encoding="ascii", errors="strict") 408 except UnicodeDecodeError: 409 raise ValueError( 410 "invalid non-ASCII bytes; use unicode string instead: %r" % raw_bytes 411 ) 412 return _string_element(string, ctx) 413 414 415# The following is probably not entirely correct. The signature should take `Any` 416# and return `NoReturn`. At the time of this writing, neither mypy nor Pyright 417# can deal with singledispatch properly and will apply the signature of the base 418# function to all others. Being slightly dishonest makes it type-check and return 419# usable typing information for the optimistic case. 420@singledispatch 421def _make_element(value: PlistEncodable, ctx: SimpleNamespace) -> etree.Element: 422 raise TypeError("unsupported type: %s" % type(value)) 423 424 425_make_element.register(str)(_string_element) 426_make_element.register(bool)(_bool_element) 427_make_element.register(Integral)(_integer_element) 428_make_element.register(float)(_real_element) 429_make_element.register(collections.abc.Mapping)(_dict_element) 430_make_element.register(list)(_array_element) 431_make_element.register(tuple)(_array_element) 432_make_element.register(datetime)(_date_element) 433_make_element.register(bytes)(_string_or_data_element) 434_make_element.register(bytearray)(_data_element) 435_make_element.register(Data)(lambda v, ctx: _data_element(v.data, ctx)) 436 437 438# Public functions to create element tree from plist-compatible python 439# data structures and viceversa, for use when (de)serializing GLIF xml. 440 441 442def totree( 443 value: PlistEncodable, 444 sort_keys: bool = True, 445 skipkeys: bool = False, 446 use_builtin_types: Optional[bool] = None, 447 pretty_print: bool = True, 448 indent_level: int = 1, 449) -> etree.Element: 450 """Convert a value derived from a plist into an XML tree. 451 452 Args: 453 value: Any kind of value to be serialized to XML. 454 sort_keys: Whether keys of dictionaries should be sorted. 455 skipkeys (bool): Whether to silently skip non-string dictionary 456 keys. 457 use_builtin_types (bool): If true, byte strings will be 458 encoded in Base-64 and wrapped in a ``data`` tag; if 459 false, they will be either stored as ASCII strings or an 460 exception raised if they cannot be decoded as such. Defaults 461 to ``True`` if not present. Deprecated. 462 pretty_print (bool): Whether to indent the output. 463 indent_level (int): Level of indentation when serializing. 464 465 Returns: an ``etree`` ``Element`` object. 466 467 Raises: 468 ``TypeError`` 469 if non-string dictionary keys are serialized 470 and ``skipkeys`` is false. 471 ``ValueError`` 472 if non-ASCII binary data is present 473 and `use_builtin_types` is false. 474 """ 475 if use_builtin_types is None: 476 use_builtin_types = USE_BUILTIN_TYPES 477 else: 478 use_builtin_types = use_builtin_types 479 context = SimpleNamespace( 480 sort_keys=sort_keys, 481 skipkeys=skipkeys, 482 use_builtin_types=use_builtin_types, 483 pretty_print=pretty_print, 484 indent_level=indent_level, 485 ) 486 return _make_element(value, context) 487 488 489def fromtree( 490 tree: etree.Element, 491 use_builtin_types: Optional[bool] = None, 492 dict_type: Type[MutableMapping[str, Any]] = dict, 493) -> Any: 494 """Convert an XML tree to a plist structure. 495 496 Args: 497 tree: An ``etree`` ``Element``. 498 use_builtin_types: If True, binary data is deserialized to 499 bytes strings. If False, it is wrapped in :py:class:`Data` 500 objects. Defaults to True if not provided. Deprecated. 501 dict_type: What type to use for dictionaries. 502 503 Returns: An object (usually a dictionary). 504 """ 505 target = PlistTarget(use_builtin_types=use_builtin_types, dict_type=dict_type) 506 for action, element in etree.iterwalk(tree, events=("start", "end")): 507 if action == "start": 508 target.start(element.tag, element.attrib) 509 elif action == "end": 510 # if there are no children, parse the leaf's data 511 if not len(element): 512 # always pass str, not None 513 target.data(element.text or "") 514 target.end(element.tag) 515 return target.close() 516 517 518# python3 plistlib API 519 520 521def load( 522 fp: IO[bytes], 523 use_builtin_types: Optional[bool] = None, 524 dict_type: Type[MutableMapping[str, Any]] = dict, 525) -> Any: 526 """Load a plist file into an object. 527 528 Args: 529 fp: An opened file. 530 use_builtin_types: If True, binary data is deserialized to 531 bytes strings. If False, it is wrapped in :py:class:`Data` 532 objects. Defaults to True if not provided. Deprecated. 533 dict_type: What type to use for dictionaries. 534 535 Returns: 536 An object (usually a dictionary) representing the top level of 537 the plist file. 538 """ 539 540 if not hasattr(fp, "read"): 541 raise AttributeError("'%s' object has no attribute 'read'" % type(fp).__name__) 542 target = PlistTarget(use_builtin_types=use_builtin_types, dict_type=dict_type) 543 parser = etree.XMLParser(target=target) 544 result = etree.parse(fp, parser=parser) 545 # lxml returns the target object directly, while ElementTree wraps 546 # it as the root of an ElementTree object 547 try: 548 return result.getroot() 549 except AttributeError: 550 return result 551 552 553def loads( 554 value: bytes, 555 use_builtin_types: Optional[bool] = None, 556 dict_type: Type[MutableMapping[str, Any]] = dict, 557) -> Any: 558 """Load a plist file from a string into an object. 559 560 Args: 561 value: A bytes string containing a plist. 562 use_builtin_types: If True, binary data is deserialized to 563 bytes strings. If False, it is wrapped in :py:class:`Data` 564 objects. Defaults to True if not provided. Deprecated. 565 dict_type: What type to use for dictionaries. 566 567 Returns: 568 An object (usually a dictionary) representing the top level of 569 the plist file. 570 """ 571 572 fp = BytesIO(value) 573 return load(fp, use_builtin_types=use_builtin_types, dict_type=dict_type) 574 575 576def dump( 577 value: PlistEncodable, 578 fp: IO[bytes], 579 sort_keys: bool = True, 580 skipkeys: bool = False, 581 use_builtin_types: Optional[bool] = None, 582 pretty_print: bool = True, 583) -> None: 584 """Write a Python object to a plist file. 585 586 Args: 587 value: An object to write. 588 fp: A file opened for writing. 589 sort_keys (bool): Whether keys of dictionaries should be sorted. 590 skipkeys (bool): Whether to silently skip non-string dictionary 591 keys. 592 use_builtin_types (bool): If true, byte strings will be 593 encoded in Base-64 and wrapped in a ``data`` tag; if 594 false, they will be either stored as ASCII strings or an 595 exception raised if they cannot be represented. Defaults 596 pretty_print (bool): Whether to indent the output. 597 indent_level (int): Level of indentation when serializing. 598 599 Raises: 600 ``TypeError`` 601 if non-string dictionary keys are serialized 602 and ``skipkeys`` is false. 603 ``ValueError`` 604 if non-representable binary data is present 605 and `use_builtin_types` is false. 606 """ 607 608 if not hasattr(fp, "write"): 609 raise AttributeError("'%s' object has no attribute 'write'" % type(fp).__name__) 610 root = etree.Element("plist", version="1.0") 611 el = totree( 612 value, 613 sort_keys=sort_keys, 614 skipkeys=skipkeys, 615 use_builtin_types=use_builtin_types, 616 pretty_print=pretty_print, 617 ) 618 root.append(el) 619 tree = etree.ElementTree(root) 620 # we write the doctype ourselves instead of using the 'doctype' argument 621 # of 'write' method, becuse lxml will force adding a '\n' even when 622 # pretty_print is False. 623 if pretty_print: 624 header = b"\n".join((XML_DECLARATION, PLIST_DOCTYPE, b"")) 625 else: 626 header = XML_DECLARATION + PLIST_DOCTYPE 627 fp.write(header) 628 tree.write( # type: ignore 629 fp, 630 encoding="utf-8", 631 pretty_print=pretty_print, 632 xml_declaration=False, 633 ) 634 635 636def dumps( 637 value: PlistEncodable, 638 sort_keys: bool = True, 639 skipkeys: bool = False, 640 use_builtin_types: Optional[bool] = None, 641 pretty_print: bool = True, 642) -> bytes: 643 """Write a Python object to a string in plist format. 644 645 Args: 646 value: An object to write. 647 sort_keys (bool): Whether keys of dictionaries should be sorted. 648 skipkeys (bool): Whether to silently skip non-string dictionary 649 keys. 650 use_builtin_types (bool): If true, byte strings will be 651 encoded in Base-64 and wrapped in a ``data`` tag; if 652 false, they will be either stored as strings or an 653 exception raised if they cannot be represented. Defaults 654 pretty_print (bool): Whether to indent the output. 655 indent_level (int): Level of indentation when serializing. 656 657 Returns: 658 string: A plist representation of the Python object. 659 660 Raises: 661 ``TypeError`` 662 if non-string dictionary keys are serialized 663 and ``skipkeys`` is false. 664 ``ValueError`` 665 if non-representable binary data is present 666 and `use_builtin_types` is false. 667 """ 668 fp = BytesIO() 669 dump( 670 value, 671 fp, 672 sort_keys=sort_keys, 673 skipkeys=skipkeys, 674 use_builtin_types=use_builtin_types, 675 pretty_print=pretty_print, 676 ) 677 return fp.getvalue() 678