• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
3  * Copyright (C) 2007-2009 Torch Mobile, Inc.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "TextEncodingRegistry.h"
29 
30 #include "PlatformString.h"
31 #include "TextCodecLatin1.h"
32 #include "TextCodecUserDefined.h"
33 #include "TextCodecUTF16.h"
34 #include <wtf/ASCIICType.h>
35 #include <wtf/Assertions.h>
36 #include <wtf/HashFunctions.h>
37 #include <wtf/HashMap.h>
38 #include <wtf/StdLibExtras.h>
39 #include <wtf/StringExtras.h>
40 #include <wtf/Threading.h>
41 
42 #if USE(ICU_UNICODE)
43 #include "TextCodecICU.h"
44 #endif
45 #if PLATFORM(MAC)
46 #include "TextCodecMac.h"
47 #endif
48 #if PLATFORM(QT)
49 #include "qt/TextCodecQt.h"
50 #endif
51 #if USE(GLIB_UNICODE)
52 #include "gtk/TextCodecGtk.h"
53 #endif
54 #if OS(WINCE) && !PLATFORM(QT)
55 #include "TextCodecWince.h"
56 #endif
57 
58 using namespace WTF;
59 
60 namespace WebCore {
61 
62 const size_t maxEncodingNameLength = 63;
63 
64 // Hash for all-ASCII strings that does case folding and skips any characters
65 // that are not alphanumeric. If passed any non-ASCII characters, depends on
66 // the behavior of isalnum -- if that returns false as it does on OS X, then
67 // it will properly skip those characters too.
68 struct TextEncodingNameHash {
69 
equalWebCore::TextEncodingNameHash70     static bool equal(const char* s1, const char* s2)
71     {
72         char c1;
73         char c2;
74         do {
75             do
76                 c1 = *s1++;
77             while (c1 && !isASCIIAlphanumeric(c1));
78             do
79                 c2 = *s2++;
80             while (c2 && !isASCIIAlphanumeric(c2));
81             if (toASCIILower(c1) != toASCIILower(c2))
82                 return false;
83         } while (c1 && c2);
84         return !c1 && !c2;
85     }
86 
87     // This algorithm is the one-at-a-time hash from:
88     // http://burtleburtle.net/bob/hash/hashfaq.html
89     // http://burtleburtle.net/bob/hash/doobs.html
hashWebCore::TextEncodingNameHash90     static unsigned hash(const char* s)
91     {
92         unsigned h = WTF::stringHashingStartValue;
93         for (;;) {
94             char c;
95             do {
96                 c = *s++;
97                 if (!c) {
98                     h += (h << 3);
99                     h ^= (h >> 11);
100                     h += (h << 15);
101                     return h;
102                 }
103             } while (!isASCIIAlphanumeric(c));
104             h += toASCIILower(c);
105             h += (h << 10);
106             h ^= (h >> 6);
107         }
108     }
109 
110     static const bool safeToCompareToEmptyOrDeleted = false;
111 };
112 
113 struct TextCodecFactory {
114     NewTextCodecFunction function;
115     const void* additionalData;
TextCodecFactoryWebCore::TextCodecFactory116     TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { }
117 };
118 
119 typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
120 typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
121 
encodingRegistryMutex()122 static Mutex& encodingRegistryMutex()
123 {
124     // We don't have to use AtomicallyInitializedStatic here because
125     // this function is called on the main thread for any page before
126     // it is used in worker threads.
127     DEFINE_STATIC_LOCAL(Mutex, mutex, ());
128     return mutex;
129 }
130 
131 static TextEncodingNameMap* textEncodingNameMap;
132 static TextCodecMap* textCodecMap;
133 static bool didExtendTextCodecMaps;
134 
135 static const char* const textEncodingNameBlacklist[] = {
136     "UTF-7"
137 };
138 
139 #if ERROR_DISABLED
140 
checkExistingName(const char *,const char *)141 static inline void checkExistingName(const char*, const char*) { }
142 
143 #else
144 
checkExistingName(const char * alias,const char * atomicName)145 static void checkExistingName(const char* alias, const char* atomicName)
146 {
147     const char* oldAtomicName = textEncodingNameMap->get(alias);
148     if (!oldAtomicName)
149         return;
150     if (oldAtomicName == atomicName)
151         return;
152     // Keep the warning silent about one case where we know this will happen.
153     if (strcmp(alias, "ISO-8859-8-I") == 0
154             && strcmp(oldAtomicName, "ISO-8859-8-I") == 0
155             && strcasecmp(atomicName, "iso-8859-8") == 0)
156         return;
157     LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s",
158         alias, oldAtomicName, atomicName);
159 }
160 
161 #endif
162 
addToTextEncodingNameMap(const char * alias,const char * name)163 static void addToTextEncodingNameMap(const char* alias, const char* name)
164 {
165     ASSERT(strlen(alias) <= maxEncodingNameLength);
166     const char* atomicName = textEncodingNameMap->get(name);
167     ASSERT(strcmp(alias, name) == 0 || atomicName);
168     if (!atomicName)
169         atomicName = name;
170     checkExistingName(alias, atomicName);
171     textEncodingNameMap->add(alias, atomicName);
172 }
173 
addToTextCodecMap(const char * name,NewTextCodecFunction function,const void * additionalData)174 static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData)
175 {
176     const char* atomicName = textEncodingNameMap->get(name);
177     ASSERT(atomicName);
178     textCodecMap->add(atomicName, TextCodecFactory(function, additionalData));
179 }
180 
pruneBlacklistedCodecs()181 static void pruneBlacklistedCodecs()
182 {
183     size_t blacklistedCodecListLength = sizeof(textEncodingNameBlacklist) / sizeof(textEncodingNameBlacklist[0]);
184     for (size_t i = 0; i < blacklistedCodecListLength; ++i) {
185         const char* atomicName = textEncodingNameMap->get(textEncodingNameBlacklist[i]);
186         if (!atomicName)
187             continue;
188 
189         Vector<const char*> names;
190         TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
191         TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
192         for (; it != end; ++it) {
193             if (it->second == atomicName)
194                 names.append(it->first);
195         }
196 
197         size_t length = names.size();
198         for (size_t j = 0; j < length; ++j)
199             textEncodingNameMap->remove(names[j]);
200 
201         textCodecMap->remove(atomicName);
202     }
203 }
204 
buildBaseTextCodecMaps()205 static void buildBaseTextCodecMaps()
206 {
207     ASSERT(isMainThread());
208     ASSERT(!textCodecMap);
209     ASSERT(!textEncodingNameMap);
210 
211     textCodecMap = new TextCodecMap;
212     textEncodingNameMap = new TextEncodingNameMap;
213 
214     TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
215     TextCodecLatin1::registerCodecs(addToTextCodecMap);
216 
217     TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
218     TextCodecUTF16::registerCodecs(addToTextCodecMap);
219 
220     TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
221     TextCodecUserDefined::registerCodecs(addToTextCodecMap);
222 
223 #if USE(ICU_UNICODE)
224     TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap);
225     TextCodecICU::registerBaseCodecs(addToTextCodecMap);
226 #endif
227 
228 #if USE(GLIB_UNICODE)
229     TextCodecGtk::registerBaseEncodingNames(addToTextEncodingNameMap);
230     TextCodecGtk::registerBaseCodecs(addToTextCodecMap);
231 #endif
232 
233 #if OS(WINCE) && !PLATFORM(QT)
234     TextCodecWince::registerBaseEncodingNames(addToTextEncodingNameMap);
235     TextCodecWince::registerBaseCodecs(addToTextCodecMap);
236 #endif
237 }
238 
extendTextCodecMaps()239 static void extendTextCodecMaps()
240 {
241 #if USE(ICU_UNICODE)
242     TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap);
243     TextCodecICU::registerExtendedCodecs(addToTextCodecMap);
244 #endif
245 
246 #if USE(QT4_UNICODE)
247     TextCodecQt::registerEncodingNames(addToTextEncodingNameMap);
248     TextCodecQt::registerCodecs(addToTextCodecMap);
249 #endif
250 
251 #if PLATFORM(MAC)
252     TextCodecMac::registerEncodingNames(addToTextEncodingNameMap);
253     TextCodecMac::registerCodecs(addToTextCodecMap);
254 #endif
255 
256 #if USE(GLIB_UNICODE)
257     TextCodecGtk::registerExtendedEncodingNames(addToTextEncodingNameMap);
258     TextCodecGtk::registerExtendedCodecs(addToTextCodecMap);
259 #endif
260 
261 #if OS(WINCE) && !PLATFORM(QT)
262     TextCodecWince::registerExtendedEncodingNames(addToTextEncodingNameMap);
263     TextCodecWince::registerExtendedCodecs(addToTextCodecMap);
264 #endif
265 
266     pruneBlacklistedCodecs();
267 }
268 
newTextCodec(const TextEncoding & encoding)269 PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding)
270 {
271     MutexLocker lock(encodingRegistryMutex());
272 
273     ASSERT(textCodecMap);
274     TextCodecFactory factory = textCodecMap->get(encoding.name());
275     ASSERT(factory.function);
276     return factory.function(encoding, factory.additionalData);
277 }
278 
atomicCanonicalTextEncodingName(const char * name)279 const char* atomicCanonicalTextEncodingName(const char* name)
280 {
281     if (!name || !name[0])
282         return 0;
283     if (!textEncodingNameMap)
284         buildBaseTextCodecMaps();
285 
286     MutexLocker lock(encodingRegistryMutex());
287 
288     if (const char* atomicName = textEncodingNameMap->get(name))
289         return atomicName;
290     if (didExtendTextCodecMaps)
291         return 0;
292     extendTextCodecMaps();
293     didExtendTextCodecMaps = true;
294     return textEncodingNameMap->get(name);
295 }
296 
atomicCanonicalTextEncodingName(const UChar * characters,size_t length)297 const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length)
298 {
299     char buffer[maxEncodingNameLength + 1];
300     size_t j = 0;
301     for (size_t i = 0; i < length; ++i) {
302         UChar c = characters[i];
303         if (isASCIIAlphanumeric(c)) {
304             if (j == maxEncodingNameLength)
305                 return 0;
306             buffer[j++] = c;
307         }
308     }
309     buffer[j] = 0;
310     return atomicCanonicalTextEncodingName(buffer);
311 }
312 
noExtendedTextEncodingNameUsed()313 bool noExtendedTextEncodingNameUsed()
314 {
315     // If the calling thread did not use extended encoding names, it is fine for it to use a stale false value.
316     return !didExtendTextCodecMaps;
317 }
318 
319 } // namespace WebCore
320