• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "config.h"
27 #include "TextCodecUTF16.h"
28 
29 #include "PlatformString.h"
30 #include <wtf/text/CString.h>
31 #include <wtf/text/StringBuffer.h>
32 #include <wtf/PassOwnPtr.h>
33 
34 using namespace std;
35 
36 namespace WebCore {
37 
registerEncodingNames(EncodingNameRegistrar registrar)38 void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar)
39 {
40     registrar("UTF-16LE", "UTF-16LE");
41     registrar("UTF-16BE", "UTF-16BE");
42 
43     registrar("ISO-10646-UCS-2", "UTF-16LE");
44     registrar("UCS-2", "UTF-16LE");
45     registrar("UTF-16", "UTF-16LE");
46     registrar("Unicode", "UTF-16LE");
47     registrar("csUnicode", "UTF-16LE");
48     registrar("unicodeFEFF", "UTF-16LE");
49 
50     registrar("unicodeFFFE", "UTF-16BE");
51 }
52 
newStreamingTextDecoderUTF16LE(const TextEncoding &,const void *)53 static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16LE(const TextEncoding&, const void*)
54 {
55     return new TextCodecUTF16(true);
56 }
57 
newStreamingTextDecoderUTF16BE(const TextEncoding &,const void *)58 static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16BE(const TextEncoding&, const void*)
59 {
60     return new TextCodecUTF16(false);
61 }
62 
registerCodecs(TextCodecRegistrar registrar)63 void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar)
64 {
65     registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0);
66     registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0);
67 }
68 
decode(const char * bytes,size_t length,bool,bool,bool &)69 String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool, bool&)
70 {
71     if (!length)
72         return String();
73 
74     // FIXME: This should generate an error if there is an unpaired surrogate.
75 
76     const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes);
77     size_t numBytes = length + m_haveBufferedByte;
78     size_t numChars = numBytes / 2;
79 
80     StringBuffer buffer(numChars);
81     UChar* q = buffer.characters();
82 
83     if (m_haveBufferedByte) {
84         UChar c;
85         if (m_littleEndian)
86             c = m_bufferedByte | (p[0] << 8);
87         else
88             c = (m_bufferedByte << 8) | p[0];
89         *q++ = c;
90         m_haveBufferedByte = false;
91         p += 1;
92         numChars -= 1;
93     }
94 
95     if (m_littleEndian) {
96         for (size_t i = 0; i < numChars; ++i) {
97             UChar c = p[0] | (p[1] << 8);
98             p += 2;
99             *q++ = c;
100         }
101     } else {
102         for (size_t i = 0; i < numChars; ++i) {
103             UChar c = (p[0] << 8) | p[1];
104             p += 2;
105             *q++ = c;
106         }
107     }
108 
109     if (numBytes & 1) {
110         ASSERT(!m_haveBufferedByte);
111         m_haveBufferedByte = true;
112         m_bufferedByte = p[0];
113     }
114 
115     buffer.shrink(q - buffer.characters());
116 
117     return String::adopt(buffer);
118 }
119 
encode(const UChar * characters,size_t length,UnencodableHandling)120 CString TextCodecUTF16::encode(const UChar* characters, size_t length, UnencodableHandling)
121 {
122     // We need to be sure we can double the length without overflowing.
123     // Since the passed-in length is the length of an actual existing
124     // character buffer, each character is two bytes, and we know
125     // the buffer doesn't occupy the entire address space, we can
126     // assert here that doubling the length does not overflow size_t
127     // and there's no need for a runtime check.
128     ASSERT(length <= numeric_limits<size_t>::max() / 2);
129 
130     char* bytes;
131     CString string = CString::newUninitialized(length * 2, bytes);
132 
133     // FIXME: CString is not a reasonable data structure for encoded UTF-16, which will have
134     // null characters inside it. Perhaps the result of encode should not be a CString.
135     if (m_littleEndian) {
136         for (size_t i = 0; i < length; ++i) {
137             UChar c = characters[i];
138             bytes[i * 2] = c;
139             bytes[i * 2 + 1] = c >> 8;
140         }
141     } else {
142         for (size_t i = 0; i < length; ++i) {
143             UChar c = characters[i];
144             bytes[i * 2] = c >> 8;
145             bytes[i * 2 + 1] = c;
146         }
147     }
148 
149     return string;
150 }
151 
152 } // namespace WebCore
153