1 /*
2 * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
3 * Copyright (C) 2007-2009 Torch Mobile, Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "TextEncodingRegistry.h"
29
30 #include "PlatformString.h"
31 #include "TextCodecLatin1.h"
32 #include "TextCodecUserDefined.h"
33 #include "TextCodecUTF16.h"
34 #include <wtf/ASCIICType.h>
35 #include <wtf/Assertions.h>
36 #include <wtf/HashFunctions.h>
37 #include <wtf/HashMap.h>
38 #include <wtf/StdLibExtras.h>
39 #include <wtf/StringExtras.h>
40 #include <wtf/Threading.h>
41
42 #if USE(ICU_UNICODE)
43 #include "TextCodecICU.h"
44 #endif
45 #if PLATFORM(MAC)
46 #include "TextCodecMac.h"
47 #endif
48 #if PLATFORM(QT)
49 #include "qt/TextCodecQt.h"
50 #endif
51 #if USE(GLIB_UNICODE)
52 #include "gtk/TextCodecGtk.h"
53 #endif
54 #if OS(WINCE) && !PLATFORM(QT)
55 #include "TextCodecWince.h"
56 #endif
57
58 using namespace WTF;
59
60 namespace WebCore {
61
62 const size_t maxEncodingNameLength = 63;
63
64 // Hash for all-ASCII strings that does case folding and skips any characters
65 // that are not alphanumeric. If passed any non-ASCII characters, depends on
66 // the behavior of isalnum -- if that returns false as it does on OS X, then
67 // it will properly skip those characters too.
68 struct TextEncodingNameHash {
69
equalWebCore::TextEncodingNameHash70 static bool equal(const char* s1, const char* s2)
71 {
72 char c1;
73 char c2;
74 do {
75 do
76 c1 = *s1++;
77 while (c1 && !isASCIIAlphanumeric(c1));
78 do
79 c2 = *s2++;
80 while (c2 && !isASCIIAlphanumeric(c2));
81 if (toASCIILower(c1) != toASCIILower(c2))
82 return false;
83 } while (c1 && c2);
84 return !c1 && !c2;
85 }
86
87 // This algorithm is the one-at-a-time hash from:
88 // http://burtleburtle.net/bob/hash/hashfaq.html
89 // http://burtleburtle.net/bob/hash/doobs.html
hashWebCore::TextEncodingNameHash90 static unsigned hash(const char* s)
91 {
92 unsigned h = WTF::stringHashingStartValue;
93 for (;;) {
94 char c;
95 do {
96 c = *s++;
97 if (!c) {
98 h += (h << 3);
99 h ^= (h >> 11);
100 h += (h << 15);
101 return h;
102 }
103 } while (!isASCIIAlphanumeric(c));
104 h += toASCIILower(c);
105 h += (h << 10);
106 h ^= (h >> 6);
107 }
108 }
109
110 static const bool safeToCompareToEmptyOrDeleted = false;
111 };
112
113 struct TextCodecFactory {
114 NewTextCodecFunction function;
115 const void* additionalData;
TextCodecFactoryWebCore::TextCodecFactory116 TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { }
117 };
118
119 typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
120 typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
121
encodingRegistryMutex()122 static Mutex& encodingRegistryMutex()
123 {
124 // We don't have to use AtomicallyInitializedStatic here because
125 // this function is called on the main thread for any page before
126 // it is used in worker threads.
127 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
128 return mutex;
129 }
130
131 static TextEncodingNameMap* textEncodingNameMap;
132 static TextCodecMap* textCodecMap;
133 static bool didExtendTextCodecMaps;
134
135 static const char* const textEncodingNameBlacklist[] = {
136 "UTF-7"
137 };
138
139 #if ERROR_DISABLED
140
checkExistingName(const char *,const char *)141 static inline void checkExistingName(const char*, const char*) { }
142
143 #else
144
checkExistingName(const char * alias,const char * atomicName)145 static void checkExistingName(const char* alias, const char* atomicName)
146 {
147 const char* oldAtomicName = textEncodingNameMap->get(alias);
148 if (!oldAtomicName)
149 return;
150 if (oldAtomicName == atomicName)
151 return;
152 // Keep the warning silent about one case where we know this will happen.
153 if (strcmp(alias, "ISO-8859-8-I") == 0
154 && strcmp(oldAtomicName, "ISO-8859-8-I") == 0
155 && strcasecmp(atomicName, "iso-8859-8") == 0)
156 return;
157 LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s",
158 alias, oldAtomicName, atomicName);
159 }
160
161 #endif
162
addToTextEncodingNameMap(const char * alias,const char * name)163 static void addToTextEncodingNameMap(const char* alias, const char* name)
164 {
165 ASSERT(strlen(alias) <= maxEncodingNameLength);
166 const char* atomicName = textEncodingNameMap->get(name);
167 ASSERT(strcmp(alias, name) == 0 || atomicName);
168 if (!atomicName)
169 atomicName = name;
170 checkExistingName(alias, atomicName);
171 textEncodingNameMap->add(alias, atomicName);
172 }
173
addToTextCodecMap(const char * name,NewTextCodecFunction function,const void * additionalData)174 static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData)
175 {
176 const char* atomicName = textEncodingNameMap->get(name);
177 ASSERT(atomicName);
178 textCodecMap->add(atomicName, TextCodecFactory(function, additionalData));
179 }
180
pruneBlacklistedCodecs()181 static void pruneBlacklistedCodecs()
182 {
183 size_t blacklistedCodecListLength = sizeof(textEncodingNameBlacklist) / sizeof(textEncodingNameBlacklist[0]);
184 for (size_t i = 0; i < blacklistedCodecListLength; ++i) {
185 const char* atomicName = textEncodingNameMap->get(textEncodingNameBlacklist[i]);
186 if (!atomicName)
187 continue;
188
189 Vector<const char*> names;
190 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
191 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
192 for (; it != end; ++it) {
193 if (it->second == atomicName)
194 names.append(it->first);
195 }
196
197 size_t length = names.size();
198 for (size_t j = 0; j < length; ++j)
199 textEncodingNameMap->remove(names[j]);
200
201 textCodecMap->remove(atomicName);
202 }
203 }
204
buildBaseTextCodecMaps()205 static void buildBaseTextCodecMaps()
206 {
207 ASSERT(isMainThread());
208 ASSERT(!textCodecMap);
209 ASSERT(!textEncodingNameMap);
210
211 textCodecMap = new TextCodecMap;
212 textEncodingNameMap = new TextEncodingNameMap;
213
214 TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
215 TextCodecLatin1::registerCodecs(addToTextCodecMap);
216
217 TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
218 TextCodecUTF16::registerCodecs(addToTextCodecMap);
219
220 TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
221 TextCodecUserDefined::registerCodecs(addToTextCodecMap);
222
223 #if USE(ICU_UNICODE)
224 TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap);
225 TextCodecICU::registerBaseCodecs(addToTextCodecMap);
226 #endif
227
228 #if USE(GLIB_UNICODE)
229 TextCodecGtk::registerBaseEncodingNames(addToTextEncodingNameMap);
230 TextCodecGtk::registerBaseCodecs(addToTextCodecMap);
231 #endif
232
233 #if OS(WINCE) && !PLATFORM(QT)
234 TextCodecWince::registerBaseEncodingNames(addToTextEncodingNameMap);
235 TextCodecWince::registerBaseCodecs(addToTextCodecMap);
236 #endif
237 }
238
extendTextCodecMaps()239 static void extendTextCodecMaps()
240 {
241 #if USE(ICU_UNICODE)
242 TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap);
243 TextCodecICU::registerExtendedCodecs(addToTextCodecMap);
244 #endif
245
246 #if USE(QT4_UNICODE)
247 TextCodecQt::registerEncodingNames(addToTextEncodingNameMap);
248 TextCodecQt::registerCodecs(addToTextCodecMap);
249 #endif
250
251 #if PLATFORM(MAC)
252 TextCodecMac::registerEncodingNames(addToTextEncodingNameMap);
253 TextCodecMac::registerCodecs(addToTextCodecMap);
254 #endif
255
256 #if USE(GLIB_UNICODE)
257 TextCodecGtk::registerExtendedEncodingNames(addToTextEncodingNameMap);
258 TextCodecGtk::registerExtendedCodecs(addToTextCodecMap);
259 #endif
260
261 #if OS(WINCE) && !PLATFORM(QT)
262 TextCodecWince::registerExtendedEncodingNames(addToTextEncodingNameMap);
263 TextCodecWince::registerExtendedCodecs(addToTextCodecMap);
264 #endif
265
266 pruneBlacklistedCodecs();
267 }
268
newTextCodec(const TextEncoding & encoding)269 PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding)
270 {
271 MutexLocker lock(encodingRegistryMutex());
272
273 ASSERT(textCodecMap);
274 TextCodecFactory factory = textCodecMap->get(encoding.name());
275 ASSERT(factory.function);
276 return factory.function(encoding, factory.additionalData);
277 }
278
atomicCanonicalTextEncodingName(const char * name)279 const char* atomicCanonicalTextEncodingName(const char* name)
280 {
281 if (!name || !name[0])
282 return 0;
283 if (!textEncodingNameMap)
284 buildBaseTextCodecMaps();
285
286 MutexLocker lock(encodingRegistryMutex());
287
288 if (const char* atomicName = textEncodingNameMap->get(name))
289 return atomicName;
290 if (didExtendTextCodecMaps)
291 return 0;
292 extendTextCodecMaps();
293 didExtendTextCodecMaps = true;
294 return textEncodingNameMap->get(name);
295 }
296
atomicCanonicalTextEncodingName(const UChar * characters,size_t length)297 const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length)
298 {
299 char buffer[maxEncodingNameLength + 1];
300 size_t j = 0;
301 for (size_t i = 0; i < length; ++i) {
302 UChar c = characters[i];
303 if (isASCIIAlphanumeric(c)) {
304 if (j == maxEncodingNameLength)
305 return 0;
306 buffer[j++] = c;
307 }
308 }
309 buffer[j] = 0;
310 return atomicCanonicalTextEncodingName(buffer);
311 }
312
noExtendedTextEncodingNameUsed()313 bool noExtendedTextEncodingNameUsed()
314 {
315 // If the calling thread did not use extended encoding names, it is fine for it to use a stale false value.
316 return !didExtendTextCodecMaps;
317 }
318
319 } // namespace WebCore
320