• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1""" Python 'utf-16' Codec
2
3
4Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
8"""
9import codecs, sys
10
11### Codec APIs
12
13encode = codecs.utf_16_encode
14
15def decode(input, errors='strict'):
16    return codecs.utf_16_decode(input, errors, True)
17
18class IncrementalEncoder(codecs.IncrementalEncoder):
19    def __init__(self, errors='strict'):
20        codecs.IncrementalEncoder.__init__(self, errors)
21        self.encoder = None
22
23    def encode(self, input, final=False):
24        if self.encoder is None:
25            result = codecs.utf_16_encode(input, self.errors)[0]
26            if sys.byteorder == 'little':
27                self.encoder = codecs.utf_16_le_encode
28            else:
29                self.encoder = codecs.utf_16_be_encode
30            return result
31        return self.encoder(input, self.errors)[0]
32
33    def reset(self):
34        codecs.IncrementalEncoder.reset(self)
35        self.encoder = None
36
37    def getstate(self):
38        # state info we return to the caller:
39        # 0: stream is in natural order for this platform
40        # 2: endianness hasn't been determined yet
41        # (we're never writing in unnatural order)
42        return (2 if self.encoder is None else 0)
43
44    def setstate(self, state):
45        if state:
46            self.encoder = None
47        else:
48            if sys.byteorder == 'little':
49                self.encoder = codecs.utf_16_le_encode
50            else:
51                self.encoder = codecs.utf_16_be_encode
52
53class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
54    def __init__(self, errors='strict'):
55        codecs.BufferedIncrementalDecoder.__init__(self, errors)
56        self.decoder = None
57
58    def _buffer_decode(self, input, errors, final):
59        if self.decoder is None:
60            (output, consumed, byteorder) = \
61                codecs.utf_16_ex_decode(input, errors, 0, final)
62            if byteorder == -1:
63                self.decoder = codecs.utf_16_le_decode
64            elif byteorder == 1:
65                self.decoder = codecs.utf_16_be_decode
66            elif consumed >= 2:
67                raise UnicodeError("UTF-16 stream does not start with BOM")
68            return (output, consumed)
69        return self.decoder(input, self.errors, final)
70
71    def reset(self):
72        codecs.BufferedIncrementalDecoder.reset(self)
73        self.decoder = None
74
75    def getstate(self):
76        # additional state info from the base class must be None here,
77        # as it isn't passed along to the caller
78        state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
79        # additional state info we pass to the caller:
80        # 0: stream is in natural order for this platform
81        # 1: stream is in unnatural order
82        # 2: endianness hasn't been determined yet
83        if self.decoder is None:
84            return (state, 2)
85        addstate = int((sys.byteorder == "big") !=
86                       (self.decoder is codecs.utf_16_be_decode))
87        return (state, addstate)
88
89    def setstate(self, state):
90        # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
91        codecs.BufferedIncrementalDecoder.setstate(self, state)
92        state = state[1]
93        if state == 0:
94            self.decoder = (codecs.utf_16_be_decode
95                            if sys.byteorder == "big"
96                            else codecs.utf_16_le_decode)
97        elif state == 1:
98            self.decoder = (codecs.utf_16_le_decode
99                            if sys.byteorder == "big"
100                            else codecs.utf_16_be_decode)
101        else:
102            self.decoder = None
103
104class StreamWriter(codecs.StreamWriter):
105    def __init__(self, stream, errors='strict'):
106        codecs.StreamWriter.__init__(self, stream, errors)
107        self.encoder = None
108
109    def reset(self):
110        codecs.StreamWriter.reset(self)
111        self.encoder = None
112
113    def encode(self, input, errors='strict'):
114        if self.encoder is None:
115            result = codecs.utf_16_encode(input, errors)
116            if sys.byteorder == 'little':
117                self.encoder = codecs.utf_16_le_encode
118            else:
119                self.encoder = codecs.utf_16_be_encode
120            return result
121        else:
122            return self.encoder(input, errors)
123
124class StreamReader(codecs.StreamReader):
125
126    def reset(self):
127        codecs.StreamReader.reset(self)
128        try:
129            del self.decode
130        except AttributeError:
131            pass
132
133    def decode(self, input, errors='strict'):
134        (object, consumed, byteorder) = \
135            codecs.utf_16_ex_decode(input, errors, 0, False)
136        if byteorder == -1:
137            self.decode = codecs.utf_16_le_decode
138        elif byteorder == 1:
139            self.decode = codecs.utf_16_be_decode
140        elif consumed>=2:
141            raise UnicodeError("UTF-16 stream does not start with BOM")
142        return (object, consumed)
143
144### encodings module API
145
146def getregentry():
147    return codecs.CodecInfo(
148        name='utf-16',
149        encode=encode,
150        decode=decode,
151        incrementalencoder=IncrementalEncoder,
152        incrementaldecoder=IncrementalDecoder,
153        streamreader=StreamReader,
154        streamwriter=StreamWriter,
155    )
156