1 /*
2 * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
3 * Copyright (C) 2008 Holger Hans Peter Freyther
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "TextCodecQt.h"
29 #include "PlatformString.h"
30 #include <wtf/text/CString.h>
31 #include <qset.h>
32
33 namespace WebCore {
34
35 static QSet<QByteArray> *unique_names = 0;
36
getAtomicName(const QByteArray & name)37 static const char *getAtomicName(const QByteArray &name)
38 {
39 if (!unique_names)
40 unique_names = new QSet<QByteArray>;
41
42 unique_names->insert(name);
43 return unique_names->find(name)->constData();
44 }
45
registerEncodingNames(EncodingNameRegistrar registrar)46 void TextCodecQt::registerEncodingNames(EncodingNameRegistrar registrar)
47 {
48 QList<int> mibs = QTextCodec::availableMibs();
49 // qDebug() << ">>>>>>>>> registerEncodingNames";
50
51 for (int i = 0; i < mibs.size(); ++i) {
52 QTextCodec *c = QTextCodec::codecForMib(mibs.at(i));
53 const char *name = getAtomicName(c->name());
54 registrar(name, name);
55 // qDebug() << " " << name << name;
56 QList<QByteArray> aliases = c->aliases();
57 for (int i = 0; i < aliases.size(); ++i) {
58 const char *a = getAtomicName(aliases.at(i));
59 // qDebug() << " (a) " << a << name;
60 registrar(a, name);
61 }
62 }
63 }
64
newTextCodecQt(const TextEncoding & encoding,const void *)65 static PassOwnPtr<TextCodec> newTextCodecQt(const TextEncoding& encoding, const void*)
66 {
67 return new TextCodecQt(encoding);
68 }
69
registerCodecs(TextCodecRegistrar registrar)70 void TextCodecQt::registerCodecs(TextCodecRegistrar registrar)
71 {
72 QList<int> mibs = QTextCodec::availableMibs();
73 // qDebug() << ">>>>>>>>> registerCodecs";
74
75 for (int i = 0; i < mibs.size(); ++i) {
76 QTextCodec *c = QTextCodec::codecForMib(mibs.at(i));
77 const char *name = getAtomicName(c->name());
78 // qDebug() << " " << name;
79 registrar(name, newTextCodecQt, 0);
80 }
81 }
82
TextCodecQt(const TextEncoding & encoding)83 TextCodecQt::TextCodecQt(const TextEncoding& encoding)
84 : m_encoding(encoding)
85 {
86 m_codec = QTextCodec::codecForName(m_encoding.name());
87 }
88
~TextCodecQt()89 TextCodecQt::~TextCodecQt()
90 {
91 }
92
93
decode(const char * bytes,size_t length,bool flush,bool,bool & sawError)94 String TextCodecQt::decode(const char* bytes, size_t length, bool flush, bool /*stopOnError*/, bool& sawError)
95 {
96 // We chop input buffer to smaller buffers to avoid excessive memory consumption
97 // when the input buffer is big. This helps reduce peak memory consumption in
98 // mobile devices where system RAM is limited.
99 #if OS(SYMBIAN)
100 static const int MaxInputChunkSize = 32 * 1024;
101 #else
102 static const int MaxInputChunkSize = 1024 * 1024;
103 #endif
104 const char* buf = bytes;
105 const char* end = buf + length;
106 String unicode(""); // a non-null string is expected
107
108 while (buf < end) {
109 int size = end - buf;
110 size = qMin(size, MaxInputChunkSize);
111 QString decoded = m_codec->toUnicode(buf, size, &m_state);
112 unicode.append(reinterpret_cast_ptr<const UChar*>(decoded.unicode()), decoded.length());
113 buf += size;
114 }
115
116 sawError = m_state.invalidChars != 0;
117
118 if (flush) {
119 m_state.flags = QTextCodec::DefaultConversion;
120 m_state.remainingChars = 0;
121 m_state.invalidChars = 0;
122 }
123
124 return unicode;
125 }
126
encode(const UChar * characters,size_t length,UnencodableHandling handling)127 CString TextCodecQt::encode(const UChar* characters, size_t length, UnencodableHandling handling)
128 {
129 QTextCodec::ConverterState state;
130 state.flags = QTextCodec::ConversionFlags(QTextCodec::ConvertInvalidToNull | QTextCodec::IgnoreHeader);
131
132 if (!length)
133 return "";
134
135 QByteArray ba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), length, &state);
136
137 // If some <b> characters </b> are unencodable, escape them as specified by <b> handling </b>
138 // We append one valid encoded chunk to a QByteArray at a time. When we encounter an unencodable chunk we
139 // escape it with getUnencodableReplacement, append it, then move to the next chunk.
140 if (state.invalidChars) {
141 state.invalidChars = 0;
142 state.remainingChars = 0;
143 int len = 0;
144 ba.clear();
145 for (size_t pos = 0; pos < length; ++pos) {
146 QByteArray tba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), ++len, &state);
147 if (state.remainingChars)
148 continue;
149 if (state.invalidChars) {
150 UnencodableReplacementArray replacement;
151 getUnencodableReplacement(characters[0], handling, replacement);
152 tba.replace('\0', replacement);
153 state.invalidChars = 0;
154 }
155 ba.append(tba);
156 characters += len;
157 len = 0;
158 state.remainingChars = 0;
159 }
160 }
161
162 return CString(ba.constData(), ba.length());
163 }
164
165
166 } // namespace WebCore
167