1 /*
2 * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
3 * Copyright (C) 2007-2009 Torch Mobile, Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "TextEncodingRegistry.h"
29
30 #include "PlatformString.h"
31 #include "TextCodecLatin1.h"
32 #include "TextCodecUserDefined.h"
33 #include "TextCodecUTF16.h"
34 #include <wtf/ASCIICType.h>
35 #include <wtf/Assertions.h>
36 #include <wtf/HashFunctions.h>
37 #include <wtf/HashMap.h>
38 #include <wtf/StdLibExtras.h>
39 #include <wtf/StringExtras.h>
40 #include <wtf/Threading.h>
41
42 #if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID)
43 #include "TextCodecICU.h"
44 #endif
45 #if PLATFORM(MAC)
46 #include "TextCodecMac.h"
47 #endif
48 #if PLATFORM(QT)
49 #include "qt/TextCodecQt.h"
50 #endif
51 #if PLATFORM(WINCE)
52 #include "TextCodecWince.h"
53 #endif
54
55 using namespace WTF;
56
57 namespace WebCore {
58
59 const size_t maxEncodingNameLength = 63;
60
61 // Hash for all-ASCII strings that does case folding and skips any characters
62 // that are not alphanumeric. If passed any non-ASCII characters, depends on
63 // the behavior of isalnum -- if that returns false as it does on OS X, then
64 // it will properly skip those characters too.
65 struct TextEncodingNameHash {
66
equalWebCore::TextEncodingNameHash67 static bool equal(const char* s1, const char* s2)
68 {
69 char c1;
70 char c2;
71 do {
72 do
73 c1 = *s1++;
74 while (c1 && !isASCIIAlphanumeric(c1));
75 do
76 c2 = *s2++;
77 while (c2 && !isASCIIAlphanumeric(c2));
78 if (toASCIILower(c1) != toASCIILower(c2))
79 return false;
80 } while (c1 && c2);
81 return !c1 && !c2;
82 }
83
84 // This algorithm is the one-at-a-time hash from:
85 // http://burtleburtle.net/bob/hash/hashfaq.html
86 // http://burtleburtle.net/bob/hash/doobs.html
hashWebCore::TextEncodingNameHash87 static unsigned hash(const char* s)
88 {
89 unsigned h = WTF::stringHashingStartValue;
90 for (;;) {
91 char c;
92 do {
93 c = *s++;
94 if (!c) {
95 h += (h << 3);
96 h ^= (h >> 11);
97 h += (h << 15);
98 return h;
99 }
100 } while (!isASCIIAlphanumeric(c));
101 h += toASCIILower(c);
102 h += (h << 10);
103 h ^= (h >> 6);
104 }
105 }
106
107 static const bool safeToCompareToEmptyOrDeleted = false;
108 };
109
110 struct TextCodecFactory {
111 NewTextCodecFunction function;
112 const void* additionalData;
TextCodecFactoryWebCore::TextCodecFactory113 TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { }
114 };
115
116 typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
117 typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
118
encodingRegistryMutex()119 static Mutex& encodingRegistryMutex()
120 {
121 // We don't have to use AtomicallyInitializedStatic here because
122 // this function is called on the main thread for any page before
123 // it is used in worker threads.
124 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
125 return mutex;
126 }
127
128 static TextEncodingNameMap* textEncodingNameMap;
129 static TextCodecMap* textCodecMap;
130 static bool didExtendTextCodecMaps;
131
132 #if ERROR_DISABLED
133
checkExistingName(const char *,const char *)134 static inline void checkExistingName(const char*, const char*) { }
135
136 #else
137
checkExistingName(const char * alias,const char * atomicName)138 static void checkExistingName(const char* alias, const char* atomicName)
139 {
140 const char* oldAtomicName = textEncodingNameMap->get(alias);
141 if (!oldAtomicName)
142 return;
143 if (oldAtomicName == atomicName)
144 return;
145 // Keep the warning silent about one case where we know this will happen.
146 if (strcmp(alias, "ISO-8859-8-I") == 0
147 && strcmp(oldAtomicName, "ISO-8859-8-I") == 0
148 && strcasecmp(atomicName, "iso-8859-8") == 0)
149 return;
150 LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s",
151 alias, oldAtomicName, atomicName);
152 }
153
154 #endif
155
addToTextEncodingNameMap(const char * alias,const char * name)156 static void addToTextEncodingNameMap(const char* alias, const char* name)
157 {
158 ASSERT(strlen(alias) <= maxEncodingNameLength);
159 const char* atomicName = textEncodingNameMap->get(name);
160 ASSERT(strcmp(alias, name) == 0 || atomicName);
161 if (!atomicName)
162 atomicName = name;
163 checkExistingName(alias, atomicName);
164 textEncodingNameMap->add(alias, atomicName);
165 }
166
addToTextCodecMap(const char * name,NewTextCodecFunction function,const void * additionalData)167 static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData)
168 {
169 const char* atomicName = textEncodingNameMap->get(name);
170 ASSERT(atomicName);
171 textCodecMap->add(atomicName, TextCodecFactory(function, additionalData));
172 }
173
buildBaseTextCodecMaps()174 static void buildBaseTextCodecMaps()
175 {
176 ASSERT(isMainThread());
177 ASSERT(!textCodecMap);
178 ASSERT(!textEncodingNameMap);
179
180 textCodecMap = new TextCodecMap;
181 textEncodingNameMap = new TextEncodingNameMap;
182
183 TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
184 TextCodecLatin1::registerCodecs(addToTextCodecMap);
185
186 TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
187 TextCodecUTF16::registerCodecs(addToTextCodecMap);
188
189 TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
190 TextCodecUserDefined::registerCodecs(addToTextCodecMap);
191
192 #if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID)
193 TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap);
194 TextCodecICU::registerBaseCodecs(addToTextCodecMap);
195 #endif
196
197 #if PLATFORM(WINCE)
198 TextCodecWince::registerBaseEncodingNames(addToTextEncodingNameMap);
199 TextCodecWince::registerBaseCodecs(addToTextCodecMap);
200 #endif
201 }
202
extendTextCodecMaps()203 static void extendTextCodecMaps()
204 {
205 #if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID)
206 TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap);
207 TextCodecICU::registerExtendedCodecs(addToTextCodecMap);
208 #endif
209
210 #if USE(QT4_UNICODE)
211 TextCodecQt::registerEncodingNames(addToTextEncodingNameMap);
212 TextCodecQt::registerCodecs(addToTextCodecMap);
213 #endif
214
215 #if PLATFORM(MAC)
216 TextCodecMac::registerEncodingNames(addToTextEncodingNameMap);
217 TextCodecMac::registerCodecs(addToTextCodecMap);
218 #endif
219
220 #if PLATFORM(WINCE)
221 TextCodecWince::registerExtendedEncodingNames(addToTextEncodingNameMap);
222 TextCodecWince::registerExtendedCodecs(addToTextCodecMap);
223 #endif
224 }
225
newTextCodec(const TextEncoding & encoding)226 PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding)
227 {
228 MutexLocker lock(encodingRegistryMutex());
229
230 ASSERT(textCodecMap);
231 TextCodecFactory factory = textCodecMap->get(encoding.name());
232 ASSERT(factory.function);
233 return factory.function(encoding, factory.additionalData);
234 }
235
atomicCanonicalTextEncodingName(const char * name)236 const char* atomicCanonicalTextEncodingName(const char* name)
237 {
238 if (!name || !name[0])
239 return 0;
240 if (!textEncodingNameMap)
241 buildBaseTextCodecMaps();
242
243 MutexLocker lock(encodingRegistryMutex());
244
245 if (const char* atomicName = textEncodingNameMap->get(name))
246 return atomicName;
247 if (didExtendTextCodecMaps)
248 return 0;
249 extendTextCodecMaps();
250 didExtendTextCodecMaps = true;
251 return textEncodingNameMap->get(name);
252 }
253
atomicCanonicalTextEncodingName(const UChar * characters,size_t length)254 const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length)
255 {
256 char buffer[maxEncodingNameLength + 1];
257 size_t j = 0;
258 for (size_t i = 0; i < length; ++i) {
259 UChar c = characters[i];
260 if (isASCIIAlphanumeric(c)) {
261 if (j == maxEncodingNameLength)
262 return 0;
263 buffer[j++] = c;
264 }
265 }
266 buffer[j] = 0;
267 return atomicCanonicalTextEncodingName(buffer);
268 }
269
noExtendedTextEncodingNameUsed()270 bool noExtendedTextEncodingNameUsed()
271 {
272 // If the calling thread did not use extended encoding names, it is fine for it to use a stale false value.
273 return !didExtendTextCodecMaps;
274 }
275
276 } // namespace WebCore
277