• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
2
3The property list (.plist) file format is a simple XML pickle supporting
4basic object types, like dictionaries, lists, numbers and strings.
5Usually the top level object is a dictionary.
6
7To write out a plist file, use the dump(value, file)
8function. 'value' is the top level object, 'file' is
9a (writable) file object.
10
11To parse a plist from a file, use the load(file) function,
12with a (readable) file object as the only argument. It
13returns the top level object (again, usually a dictionary).
14
15To work with plist data in bytes objects, you can use loads()
16and dumps().
17
18Values can be strings, integers, floats, booleans, tuples, lists,
19dictionaries (but only with string keys), Data, bytes, bytearray, or
20datetime.datetime objects.
21
22Generate Plist example:
23
24    pl = dict(
25        aString = "Doodah",
26        aList = ["A", "B", 12, 32.1, [1, 2, 3]],
27        aFloat = 0.1,
28        anInt = 728,
29        aDict = dict(
30            anotherString = "<hello & hi there!>",
31            aUnicodeValue = "M\xe4ssig, Ma\xdf",
32            aTrueValue = True,
33            aFalseValue = False,
34        ),
35        someData = b"<binary gunk>",
36        someMoreData = b"<lots of binary gunk>" * 10,
37        aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
38    )
39    with open(fileName, 'wb') as fp:
40        dump(pl, fp)
41
42Parse Plist example:
43
44    with open(fileName, 'rb') as fp:
45        pl = load(fp)
46    print(pl["aKey"])
47"""
48__all__ = [
49    "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
50    "Data", "InvalidFileException", "FMT_XML", "FMT_BINARY",
51    "load", "dump", "loads", "dumps", "UID"
52]
53
54import binascii
55import codecs
56import contextlib
57import datetime
58import enum
59from io import BytesIO
60import itertools
61import os
62import re
63import struct
64from warnings import warn
65from xml.parsers.expat import ParserCreate
66
67
68PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
69globals().update(PlistFormat.__members__)
70
71
72#
73#
74# Deprecated functionality
75#
76#
77
78
79@contextlib.contextmanager
80def _maybe_open(pathOrFile, mode):
81    if isinstance(pathOrFile, str):
82        with open(pathOrFile, mode) as fp:
83            yield fp
84
85    else:
86        yield pathOrFile
87
88
89def readPlist(pathOrFile):
90    """
91    Read a .plist from a path or file. pathOrFile should either
92    be a file name, or a readable binary file object.
93
94    This function is deprecated, use load instead.
95    """
96    warn("The readPlist function is deprecated, use load() instead",
97        DeprecationWarning, 2)
98
99    with _maybe_open(pathOrFile, 'rb') as fp:
100        return load(fp, fmt=None, use_builtin_types=False)
101
102def writePlist(value, pathOrFile):
103    """
104    Write 'value' to a .plist file. 'pathOrFile' may either be a
105    file name or a (writable) file object.
106
107    This function is deprecated, use dump instead.
108    """
109    warn("The writePlist function is deprecated, use dump() instead",
110        DeprecationWarning, 2)
111    with _maybe_open(pathOrFile, 'wb') as fp:
112        dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
113
114
115def readPlistFromBytes(data):
116    """
117    Read a plist data from a bytes object. Return the root object.
118
119    This function is deprecated, use loads instead.
120    """
121    warn("The readPlistFromBytes function is deprecated, use loads() instead",
122        DeprecationWarning, 2)
123    return load(BytesIO(data), fmt=None, use_builtin_types=False)
124
125
126def writePlistToBytes(value):
127    """
128    Return 'value' as a plist-formatted bytes object.
129
130    This function is deprecated, use dumps instead.
131    """
132    warn("The writePlistToBytes function is deprecated, use dumps() instead",
133        DeprecationWarning, 2)
134    f = BytesIO()
135    dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
136    return f.getvalue()
137
138
139class Data:
140    """
141    Wrapper for binary data.
142
143    This class is deprecated, use a bytes object instead.
144    """
145
146    def __init__(self, data):
147        if not isinstance(data, bytes):
148            raise TypeError("data must be as bytes")
149        self.data = data
150
151    @classmethod
152    def fromBase64(cls, data):
153        # base64.decodebytes just calls binascii.a2b_base64;
154        # it seems overkill to use both base64 and binascii.
155        return cls(_decode_base64(data))
156
157    def asBase64(self, maxlinelength=76):
158        return _encode_base64(self.data, maxlinelength)
159
160    def __eq__(self, other):
161        if isinstance(other, self.__class__):
162            return self.data == other.data
163        elif isinstance(other, bytes):
164            return self.data == other
165        else:
166            return NotImplemented
167
168    def __repr__(self):
169        return "%s(%s)" % (self.__class__.__name__, repr(self.data))
170
171#
172#
173# End of deprecated functionality
174#
175#
176
177
178class UID:
179    def __init__(self, data):
180        if not isinstance(data, int):
181            raise TypeError("data must be an int")
182        if data >= 1 << 64:
183            raise ValueError("UIDs cannot be >= 2**64")
184        if data < 0:
185            raise ValueError("UIDs must be positive")
186        self.data = data
187
188    def __index__(self):
189        return self.data
190
191    def __repr__(self):
192        return "%s(%s)" % (self.__class__.__name__, repr(self.data))
193
194    def __reduce__(self):
195        return self.__class__, (self.data,)
196
197    def __eq__(self, other):
198        if not isinstance(other, UID):
199            return NotImplemented
200        return self.data == other.data
201
202    def __hash__(self):
203        return hash(self.data)
204
205
206#
207# XML support
208#
209
210
211# XML 'header'
212PLISTHEADER = b"""\
213<?xml version="1.0" encoding="UTF-8"?>
214<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
215"""
216
217
218# Regex to find any control chars, except for \t \n and \r
219_controlCharPat = re.compile(
220    r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
221    r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
222
223def _encode_base64(s, maxlinelength=76):
224    # copied from base64.encodebytes(), with added maxlinelength argument
225    maxbinsize = (maxlinelength//4)*3
226    pieces = []
227    for i in range(0, len(s), maxbinsize):
228        chunk = s[i : i + maxbinsize]
229        pieces.append(binascii.b2a_base64(chunk))
230    return b''.join(pieces)
231
232def _decode_base64(s):
233    if isinstance(s, str):
234        return binascii.a2b_base64(s.encode("utf-8"))
235
236    else:
237        return binascii.a2b_base64(s)
238
239# Contents should conform to a subset of ISO 8601
240# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'.  Smaller units
241# may be omitted with #  a loss of precision)
242_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
243
244
245def _date_from_string(s):
246    order = ('year', 'month', 'day', 'hour', 'minute', 'second')
247    gd = _dateParser.match(s).groupdict()
248    lst = []
249    for key in order:
250        val = gd[key]
251        if val is None:
252            break
253        lst.append(int(val))
254    return datetime.datetime(*lst)
255
256
257def _date_to_string(d):
258    return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
259        d.year, d.month, d.day,
260        d.hour, d.minute, d.second
261    )
262
263def _escape(text):
264    m = _controlCharPat.search(text)
265    if m is not None:
266        raise ValueError("strings can't contains control characters; "
267                         "use bytes instead")
268    text = text.replace("\r\n", "\n")       # convert DOS line endings
269    text = text.replace("\r", "\n")         # convert Mac line endings
270    text = text.replace("&", "&amp;")       # escape '&'
271    text = text.replace("<", "&lt;")        # escape '<'
272    text = text.replace(">", "&gt;")        # escape '>'
273    return text
274
275class _PlistParser:
276    def __init__(self, use_builtin_types, dict_type):
277        self.stack = []
278        self.current_key = None
279        self.root = None
280        self._use_builtin_types = use_builtin_types
281        self._dict_type = dict_type
282
283    def parse(self, fileobj):
284        self.parser = ParserCreate()
285        self.parser.StartElementHandler = self.handle_begin_element
286        self.parser.EndElementHandler = self.handle_end_element
287        self.parser.CharacterDataHandler = self.handle_data
288        self.parser.ParseFile(fileobj)
289        return self.root
290
291    def handle_begin_element(self, element, attrs):
292        self.data = []
293        handler = getattr(self, "begin_" + element, None)
294        if handler is not None:
295            handler(attrs)
296
297    def handle_end_element(self, element):
298        handler = getattr(self, "end_" + element, None)
299        if handler is not None:
300            handler()
301
302    def handle_data(self, data):
303        self.data.append(data)
304
305    def add_object(self, value):
306        if self.current_key is not None:
307            if not isinstance(self.stack[-1], type({})):
308                raise ValueError("unexpected element at line %d" %
309                                 self.parser.CurrentLineNumber)
310            self.stack[-1][self.current_key] = value
311            self.current_key = None
312        elif not self.stack:
313            # this is the root object
314            self.root = value
315        else:
316            if not isinstance(self.stack[-1], type([])):
317                raise ValueError("unexpected element at line %d" %
318                                 self.parser.CurrentLineNumber)
319            self.stack[-1].append(value)
320
321    def get_data(self):
322        data = ''.join(self.data)
323        self.data = []
324        return data
325
326    # element handlers
327
328    def begin_dict(self, attrs):
329        d = self._dict_type()
330        self.add_object(d)
331        self.stack.append(d)
332
333    def end_dict(self):
334        if self.current_key:
335            raise ValueError("missing value for key '%s' at line %d" %
336                             (self.current_key,self.parser.CurrentLineNumber))
337        self.stack.pop()
338
339    def end_key(self):
340        if self.current_key or not isinstance(self.stack[-1], type({})):
341            raise ValueError("unexpected key at line %d" %
342                             self.parser.CurrentLineNumber)
343        self.current_key = self.get_data()
344
345    def begin_array(self, attrs):
346        a = []
347        self.add_object(a)
348        self.stack.append(a)
349
350    def end_array(self):
351        self.stack.pop()
352
353    def end_true(self):
354        self.add_object(True)
355
356    def end_false(self):
357        self.add_object(False)
358
359    def end_integer(self):
360        self.add_object(int(self.get_data()))
361
362    def end_real(self):
363        self.add_object(float(self.get_data()))
364
365    def end_string(self):
366        self.add_object(self.get_data())
367
368    def end_data(self):
369        if self._use_builtin_types:
370            self.add_object(_decode_base64(self.get_data()))
371
372        else:
373            self.add_object(Data.fromBase64(self.get_data()))
374
375    def end_date(self):
376        self.add_object(_date_from_string(self.get_data()))
377
378
379class _DumbXMLWriter:
380    def __init__(self, file, indent_level=0, indent="\t"):
381        self.file = file
382        self.stack = []
383        self._indent_level = indent_level
384        self.indent = indent
385
386    def begin_element(self, element):
387        self.stack.append(element)
388        self.writeln("<%s>" % element)
389        self._indent_level += 1
390
391    def end_element(self, element):
392        assert self._indent_level > 0
393        assert self.stack.pop() == element
394        self._indent_level -= 1
395        self.writeln("</%s>" % element)
396
397    def simple_element(self, element, value=None):
398        if value is not None:
399            value = _escape(value)
400            self.writeln("<%s>%s</%s>" % (element, value, element))
401
402        else:
403            self.writeln("<%s/>" % element)
404
405    def writeln(self, line):
406        if line:
407            # plist has fixed encoding of utf-8
408
409            # XXX: is this test needed?
410            if isinstance(line, str):
411                line = line.encode('utf-8')
412            self.file.write(self._indent_level * self.indent)
413            self.file.write(line)
414        self.file.write(b'\n')
415
416
417class _PlistWriter(_DumbXMLWriter):
418    def __init__(
419            self, file, indent_level=0, indent=b"\t", writeHeader=1,
420            sort_keys=True, skipkeys=False):
421
422        if writeHeader:
423            file.write(PLISTHEADER)
424        _DumbXMLWriter.__init__(self, file, indent_level, indent)
425        self._sort_keys = sort_keys
426        self._skipkeys = skipkeys
427
428    def write(self, value):
429        self.writeln("<plist version=\"1.0\">")
430        self.write_value(value)
431        self.writeln("</plist>")
432
433    def write_value(self, value):
434        if isinstance(value, str):
435            self.simple_element("string", value)
436
437        elif value is True:
438            self.simple_element("true")
439
440        elif value is False:
441            self.simple_element("false")
442
443        elif isinstance(value, int):
444            if -1 << 63 <= value < 1 << 64:
445                self.simple_element("integer", "%d" % value)
446            else:
447                raise OverflowError(value)
448
449        elif isinstance(value, float):
450            self.simple_element("real", repr(value))
451
452        elif isinstance(value, dict):
453            self.write_dict(value)
454
455        elif isinstance(value, Data):
456            self.write_data(value)
457
458        elif isinstance(value, (bytes, bytearray)):
459            self.write_bytes(value)
460
461        elif isinstance(value, datetime.datetime):
462            self.simple_element("date", _date_to_string(value))
463
464        elif isinstance(value, (tuple, list)):
465            self.write_array(value)
466
467        else:
468            raise TypeError("unsupported type: %s" % type(value))
469
470    def write_data(self, data):
471        self.write_bytes(data.data)
472
473    def write_bytes(self, data):
474        self.begin_element("data")
475        self._indent_level -= 1
476        maxlinelength = max(
477            16,
478            76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
479
480        for line in _encode_base64(data, maxlinelength).split(b"\n"):
481            if line:
482                self.writeln(line)
483        self._indent_level += 1
484        self.end_element("data")
485
486    def write_dict(self, d):
487        if d:
488            self.begin_element("dict")
489            if self._sort_keys:
490                items = sorted(d.items())
491            else:
492                items = d.items()
493
494            for key, value in items:
495                if not isinstance(key, str):
496                    if self._skipkeys:
497                        continue
498                    raise TypeError("keys must be strings")
499                self.simple_element("key", key)
500                self.write_value(value)
501            self.end_element("dict")
502
503        else:
504            self.simple_element("dict")
505
506    def write_array(self, array):
507        if array:
508            self.begin_element("array")
509            for value in array:
510                self.write_value(value)
511            self.end_element("array")
512
513        else:
514            self.simple_element("array")
515
516
517def _is_fmt_xml(header):
518    prefixes = (b'<?xml', b'<plist')
519
520    for pfx in prefixes:
521        if header.startswith(pfx):
522            return True
523
524    # Also check for alternative XML encodings, this is slightly
525    # overkill because the Apple tools (and plistlib) will not
526    # generate files with these encodings.
527    for bom, encoding in (
528                (codecs.BOM_UTF8, "utf-8"),
529                (codecs.BOM_UTF16_BE, "utf-16-be"),
530                (codecs.BOM_UTF16_LE, "utf-16-le"),
531                # expat does not support utf-32
532                #(codecs.BOM_UTF32_BE, "utf-32-be"),
533                #(codecs.BOM_UTF32_LE, "utf-32-le"),
534            ):
535        if not header.startswith(bom):
536            continue
537
538        for start in prefixes:
539            prefix = bom + start.decode('ascii').encode(encoding)
540            if header[:len(prefix)] == prefix:
541                return True
542
543    return False
544
545#
546# Binary Plist
547#
548
549
550class InvalidFileException (ValueError):
551    def __init__(self, message="Invalid file"):
552        ValueError.__init__(self, message)
553
554_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
555
556_undefined = object()
557
558class _BinaryPlistParser:
559    """
560    Read or write a binary plist file, following the description of the binary
561    format.  Raise InvalidFileException in case of error, otherwise return the
562    root object.
563
564    see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
565    """
566    def __init__(self, use_builtin_types, dict_type):
567        self._use_builtin_types = use_builtin_types
568        self._dict_type = dict_type
569
570    def parse(self, fp):
571        try:
572            # The basic file format:
573            # HEADER
574            # object...
575            # refid->offset...
576            # TRAILER
577            self._fp = fp
578            self._fp.seek(-32, os.SEEK_END)
579            trailer = self._fp.read(32)
580            if len(trailer) != 32:
581                raise InvalidFileException()
582            (
583                offset_size, self._ref_size, num_objects, top_object,
584                offset_table_offset
585            ) = struct.unpack('>6xBBQQQ', trailer)
586            self._fp.seek(offset_table_offset)
587            self._object_offsets = self._read_ints(num_objects, offset_size)
588            self._objects = [_undefined] * num_objects
589            return self._read_object(top_object)
590
591        except (OSError, IndexError, struct.error, OverflowError,
592                UnicodeDecodeError):
593            raise InvalidFileException()
594
595    def _get_size(self, tokenL):
596        """ return the size of the next object."""
597        if tokenL == 0xF:
598            m = self._fp.read(1)[0] & 0x3
599            s = 1 << m
600            f = '>' + _BINARY_FORMAT[s]
601            return struct.unpack(f, self._fp.read(s))[0]
602
603        return tokenL
604
605    def _read_ints(self, n, size):
606        data = self._fp.read(size * n)
607        if size in _BINARY_FORMAT:
608            return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
609        else:
610            if not size or len(data) != size * n:
611                raise InvalidFileException()
612            return tuple(int.from_bytes(data[i: i + size], 'big')
613                         for i in range(0, size * n, size))
614
615    def _read_refs(self, n):
616        return self._read_ints(n, self._ref_size)
617
618    def _read_object(self, ref):
619        """
620        read the object by reference.
621
622        May recursively read sub-objects (content of an array/dict/set)
623        """
624        result = self._objects[ref]
625        if result is not _undefined:
626            return result
627
628        offset = self._object_offsets[ref]
629        self._fp.seek(offset)
630        token = self._fp.read(1)[0]
631        tokenH, tokenL = token & 0xF0, token & 0x0F
632
633        if token == 0x00:
634            result = None
635
636        elif token == 0x08:
637            result = False
638
639        elif token == 0x09:
640            result = True
641
642        # The referenced source code also mentions URL (0x0c, 0x0d) and
643        # UUID (0x0e), but neither can be generated using the Cocoa libraries.
644
645        elif token == 0x0f:
646            result = b''
647
648        elif tokenH == 0x10:  # int
649            result = int.from_bytes(self._fp.read(1 << tokenL),
650                                    'big', signed=tokenL >= 3)
651
652        elif token == 0x22: # real
653            result = struct.unpack('>f', self._fp.read(4))[0]
654
655        elif token == 0x23: # real
656            result = struct.unpack('>d', self._fp.read(8))[0]
657
658        elif token == 0x33:  # date
659            f = struct.unpack('>d', self._fp.read(8))[0]
660            # timestamp 0 of binary plists corresponds to 1/1/2001
661            # (year of Mac OS X 10.0), instead of 1/1/1970.
662            result = (datetime.datetime(2001, 1, 1) +
663                      datetime.timedelta(seconds=f))
664
665        elif tokenH == 0x40:  # data
666            s = self._get_size(tokenL)
667            if self._use_builtin_types:
668                result = self._fp.read(s)
669            else:
670                result = Data(self._fp.read(s))
671
672        elif tokenH == 0x50:  # ascii string
673            s = self._get_size(tokenL)
674            result =  self._fp.read(s).decode('ascii')
675
676        elif tokenH == 0x60:  # unicode string
677            s = self._get_size(tokenL)
678            result = self._fp.read(s * 2).decode('utf-16be')
679
680        elif tokenH == 0x80:  # UID
681            # used by Key-Archiver plist files
682            result = UID(int.from_bytes(self._fp.read(1 + tokenL), 'big'))
683
684        elif tokenH == 0xA0:  # array
685            s = self._get_size(tokenL)
686            obj_refs = self._read_refs(s)
687            result = []
688            self._objects[ref] = result
689            result.extend(self._read_object(x) for x in obj_refs)
690
691        # tokenH == 0xB0 is documented as 'ordset', but is not actually
692        # implemented in the Apple reference code.
693
694        # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
695        # plists.
696
697        elif tokenH == 0xD0:  # dict
698            s = self._get_size(tokenL)
699            key_refs = self._read_refs(s)
700            obj_refs = self._read_refs(s)
701            result = self._dict_type()
702            self._objects[ref] = result
703            for k, o in zip(key_refs, obj_refs):
704                result[self._read_object(k)] = self._read_object(o)
705
706        else:
707            raise InvalidFileException()
708
709        self._objects[ref] = result
710        return result
711
712def _count_to_size(count):
713    if count < 1 << 8:
714        return 1
715
716    elif count < 1 << 16:
717        return 2
718
719    elif count << 1 << 32:
720        return 4
721
722    else:
723        return 8
724
725_scalars = (str, int, float, datetime.datetime, bytes)
726
727class _BinaryPlistWriter (object):
728    def __init__(self, fp, sort_keys, skipkeys):
729        self._fp = fp
730        self._sort_keys = sort_keys
731        self._skipkeys = skipkeys
732
733    def write(self, value):
734
735        # Flattened object list:
736        self._objlist = []
737
738        # Mappings from object->objectid
739        # First dict has (type(object), object) as the key,
740        # second dict is used when object is not hashable and
741        # has id(object) as the key.
742        self._objtable = {}
743        self._objidtable = {}
744
745        # Create list of all objects in the plist
746        self._flatten(value)
747
748        # Size of object references in serialized containers
749        # depends on the number of objects in the plist.
750        num_objects = len(self._objlist)
751        self._object_offsets = [0]*num_objects
752        self._ref_size = _count_to_size(num_objects)
753
754        self._ref_format = _BINARY_FORMAT[self._ref_size]
755
756        # Write file header
757        self._fp.write(b'bplist00')
758
759        # Write object list
760        for obj in self._objlist:
761            self._write_object(obj)
762
763        # Write refnum->object offset table
764        top_object = self._getrefnum(value)
765        offset_table_offset = self._fp.tell()
766        offset_size = _count_to_size(offset_table_offset)
767        offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
768        self._fp.write(struct.pack(offset_format, *self._object_offsets))
769
770        # Write trailer
771        sort_version = 0
772        trailer = (
773            sort_version, offset_size, self._ref_size, num_objects,
774            top_object, offset_table_offset
775        )
776        self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
777
778    def _flatten(self, value):
779        # First check if the object is in the object table, not used for
780        # containers to ensure that two subcontainers with the same contents
781        # will be serialized as distinct values.
782        if isinstance(value, _scalars):
783            if (type(value), value) in self._objtable:
784                return
785
786        elif isinstance(value, Data):
787            if (type(value.data), value.data) in self._objtable:
788                return
789
790        elif id(value) in self._objidtable:
791            return
792
793        # Add to objectreference map
794        refnum = len(self._objlist)
795        self._objlist.append(value)
796        if isinstance(value, _scalars):
797            self._objtable[(type(value), value)] = refnum
798        elif isinstance(value, Data):
799            self._objtable[(type(value.data), value.data)] = refnum
800        else:
801            self._objidtable[id(value)] = refnum
802
803        # And finally recurse into containers
804        if isinstance(value, dict):
805            keys = []
806            values = []
807            items = value.items()
808            if self._sort_keys:
809                items = sorted(items)
810
811            for k, v in items:
812                if not isinstance(k, str):
813                    if self._skipkeys:
814                        continue
815                    raise TypeError("keys must be strings")
816                keys.append(k)
817                values.append(v)
818
819            for o in itertools.chain(keys, values):
820                self._flatten(o)
821
822        elif isinstance(value, (list, tuple)):
823            for o in value:
824                self._flatten(o)
825
826    def _getrefnum(self, value):
827        if isinstance(value, _scalars):
828            return self._objtable[(type(value), value)]
829        elif isinstance(value, Data):
830            return self._objtable[(type(value.data), value.data)]
831        else:
832            return self._objidtable[id(value)]
833
834    def _write_size(self, token, size):
835        if size < 15:
836            self._fp.write(struct.pack('>B', token | size))
837
838        elif size < 1 << 8:
839            self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
840
841        elif size < 1 << 16:
842            self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
843
844        elif size < 1 << 32:
845            self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
846
847        else:
848            self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
849
850    def _write_object(self, value):
851        ref = self._getrefnum(value)
852        self._object_offsets[ref] = self._fp.tell()
853        if value is None:
854            self._fp.write(b'\x00')
855
856        elif value is False:
857            self._fp.write(b'\x08')
858
859        elif value is True:
860            self._fp.write(b'\x09')
861
862        elif isinstance(value, int):
863            if value < 0:
864                try:
865                    self._fp.write(struct.pack('>Bq', 0x13, value))
866                except struct.error:
867                    raise OverflowError(value) from None
868            elif value < 1 << 8:
869                self._fp.write(struct.pack('>BB', 0x10, value))
870            elif value < 1 << 16:
871                self._fp.write(struct.pack('>BH', 0x11, value))
872            elif value < 1 << 32:
873                self._fp.write(struct.pack('>BL', 0x12, value))
874            elif value < 1 << 63:
875                self._fp.write(struct.pack('>BQ', 0x13, value))
876            elif value < 1 << 64:
877                self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
878            else:
879                raise OverflowError(value)
880
881        elif isinstance(value, float):
882            self._fp.write(struct.pack('>Bd', 0x23, value))
883
884        elif isinstance(value, datetime.datetime):
885            f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
886            self._fp.write(struct.pack('>Bd', 0x33, f))
887
888        elif isinstance(value, Data):
889            self._write_size(0x40, len(value.data))
890            self._fp.write(value.data)
891
892        elif isinstance(value, (bytes, bytearray)):
893            self._write_size(0x40, len(value))
894            self._fp.write(value)
895
896        elif isinstance(value, str):
897            try:
898                t = value.encode('ascii')
899                self._write_size(0x50, len(value))
900            except UnicodeEncodeError:
901                t = value.encode('utf-16be')
902                self._write_size(0x60, len(t) // 2)
903
904            self._fp.write(t)
905
906        elif isinstance(value, UID):
907            if value.data < 0:
908                raise ValueError("UIDs must be positive")
909            elif value.data < 1 << 8:
910                self._fp.write(struct.pack('>BB', 0x80, value))
911            elif value.data < 1 << 16:
912                self._fp.write(struct.pack('>BH', 0x81, value))
913            elif value.data < 1 << 32:
914                self._fp.write(struct.pack('>BL', 0x83, value))
915            elif value.data < 1 << 64:
916                self._fp.write(struct.pack('>BQ', 0x87, value))
917            else:
918                raise OverflowError(value)
919
920        elif isinstance(value, (list, tuple)):
921            refs = [self._getrefnum(o) for o in value]
922            s = len(refs)
923            self._write_size(0xA0, s)
924            self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
925
926        elif isinstance(value, dict):
927            keyRefs, valRefs = [], []
928
929            if self._sort_keys:
930                rootItems = sorted(value.items())
931            else:
932                rootItems = value.items()
933
934            for k, v in rootItems:
935                if not isinstance(k, str):
936                    if self._skipkeys:
937                        continue
938                    raise TypeError("keys must be strings")
939                keyRefs.append(self._getrefnum(k))
940                valRefs.append(self._getrefnum(v))
941
942            s = len(keyRefs)
943            self._write_size(0xD0, s)
944            self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
945            self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
946
947        else:
948            raise TypeError(value)
949
950
951def _is_fmt_binary(header):
952    return header[:8] == b'bplist00'
953
954
955#
956# Generic bits
957#
958
959_FORMATS={
960    FMT_XML: dict(
961        detect=_is_fmt_xml,
962        parser=_PlistParser,
963        writer=_PlistWriter,
964    ),
965    FMT_BINARY: dict(
966        detect=_is_fmt_binary,
967        parser=_BinaryPlistParser,
968        writer=_BinaryPlistWriter,
969    )
970}
971
972
973def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
974    """Read a .plist file. 'fp' should be a readable and binary file object.
975    Return the unpacked root object (which usually is a dictionary).
976    """
977    if fmt is None:
978        header = fp.read(32)
979        fp.seek(0)
980        for info in _FORMATS.values():
981            if info['detect'](header):
982                P = info['parser']
983                break
984
985        else:
986            raise InvalidFileException()
987
988    else:
989        P = _FORMATS[fmt]['parser']
990
991    p = P(use_builtin_types=use_builtin_types, dict_type=dict_type)
992    return p.parse(fp)
993
994
995def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
996    """Read a .plist file from a bytes object.
997    Return the unpacked root object (which usually is a dictionary).
998    """
999    fp = BytesIO(value)
1000    return load(
1001        fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
1002
1003
1004def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
1005    """Write 'value' to a .plist file. 'fp' should be a writable,
1006    binary file object.
1007    """
1008    if fmt not in _FORMATS:
1009        raise ValueError("Unsupported format: %r"%(fmt,))
1010
1011    writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
1012    writer.write(value)
1013
1014
1015def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
1016    """Return a bytes object with the contents for a .plist file.
1017    """
1018    fp = BytesIO()
1019    dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
1020    return fp.getvalue()
1021