• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
3  * Copyright (C) 2007-2009 Torch Mobile, Inc.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "TextEncodingRegistry.h"
29 
30 #include "PlatformString.h"
31 #include "TextCodecLatin1.h"
32 #include "TextCodecUserDefined.h"
33 #include "TextCodecUTF16.h"
34 #include <wtf/ASCIICType.h>
35 #include <wtf/Assertions.h>
36 #include <wtf/HashFunctions.h>
37 #include <wtf/HashMap.h>
38 #include <wtf/StdLibExtras.h>
39 #include <wtf/StringExtras.h>
40 #include <wtf/Threading.h>
41 
42 #if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID)
43 #include "TextCodecICU.h"
44 #endif
45 #if PLATFORM(MAC)
46 #include "TextCodecMac.h"
47 #endif
48 #if PLATFORM(QT)
49 #include "qt/TextCodecQt.h"
50 #endif
51 #if PLATFORM(WINCE)
52 #include "TextCodecWince.h"
53 #endif
54 
55 using namespace WTF;
56 
57 namespace WebCore {
58 
59 const size_t maxEncodingNameLength = 63;
60 
61 // Hash for all-ASCII strings that does case folding and skips any characters
62 // that are not alphanumeric. If passed any non-ASCII characters, depends on
63 // the behavior of isalnum -- if that returns false as it does on OS X, then
64 // it will properly skip those characters too.
65 struct TextEncodingNameHash {
66 
equalWebCore::TextEncodingNameHash67     static bool equal(const char* s1, const char* s2)
68     {
69         char c1;
70         char c2;
71         do {
72             do
73                 c1 = *s1++;
74             while (c1 && !isASCIIAlphanumeric(c1));
75             do
76                 c2 = *s2++;
77             while (c2 && !isASCIIAlphanumeric(c2));
78             if (toASCIILower(c1) != toASCIILower(c2))
79                 return false;
80         } while (c1 && c2);
81         return !c1 && !c2;
82     }
83 
84     // This algorithm is the one-at-a-time hash from:
85     // http://burtleburtle.net/bob/hash/hashfaq.html
86     // http://burtleburtle.net/bob/hash/doobs.html
hashWebCore::TextEncodingNameHash87     static unsigned hash(const char* s)
88     {
89         unsigned h = WTF::stringHashingStartValue;
90         for (;;) {
91             char c;
92             do {
93                 c = *s++;
94                 if (!c) {
95                     h += (h << 3);
96                     h ^= (h >> 11);
97                     h += (h << 15);
98                     return h;
99                 }
100             } while (!isASCIIAlphanumeric(c));
101             h += toASCIILower(c);
102             h += (h << 10);
103             h ^= (h >> 6);
104         }
105     }
106 
107     static const bool safeToCompareToEmptyOrDeleted = false;
108 };
109 
110 struct TextCodecFactory {
111     NewTextCodecFunction function;
112     const void* additionalData;
TextCodecFactoryWebCore::TextCodecFactory113     TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { }
114 };
115 
116 typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
117 typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
118 
encodingRegistryMutex()119 static Mutex& encodingRegistryMutex()
120 {
121     // We don't have to use AtomicallyInitializedStatic here because
122     // this function is called on the main thread for any page before
123     // it is used in worker threads.
124     DEFINE_STATIC_LOCAL(Mutex, mutex, ());
125     return mutex;
126 }
127 
128 static TextEncodingNameMap* textEncodingNameMap;
129 static TextCodecMap* textCodecMap;
130 static bool didExtendTextCodecMaps;
131 
132 #if ERROR_DISABLED
133 
checkExistingName(const char *,const char *)134 static inline void checkExistingName(const char*, const char*) { }
135 
136 #else
137 
checkExistingName(const char * alias,const char * atomicName)138 static void checkExistingName(const char* alias, const char* atomicName)
139 {
140     const char* oldAtomicName = textEncodingNameMap->get(alias);
141     if (!oldAtomicName)
142         return;
143     if (oldAtomicName == atomicName)
144         return;
145     // Keep the warning silent about one case where we know this will happen.
146     if (strcmp(alias, "ISO-8859-8-I") == 0
147             && strcmp(oldAtomicName, "ISO-8859-8-I") == 0
148             && strcasecmp(atomicName, "iso-8859-8") == 0)
149         return;
150     LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s",
151         alias, oldAtomicName, atomicName);
152 }
153 
154 #endif
155 
addToTextEncodingNameMap(const char * alias,const char * name)156 static void addToTextEncodingNameMap(const char* alias, const char* name)
157 {
158     ASSERT(strlen(alias) <= maxEncodingNameLength);
159     const char* atomicName = textEncodingNameMap->get(name);
160     ASSERT(strcmp(alias, name) == 0 || atomicName);
161     if (!atomicName)
162         atomicName = name;
163     checkExistingName(alias, atomicName);
164     textEncodingNameMap->add(alias, atomicName);
165 }
166 
addToTextCodecMap(const char * name,NewTextCodecFunction function,const void * additionalData)167 static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData)
168 {
169     const char* atomicName = textEncodingNameMap->get(name);
170     ASSERT(atomicName);
171     textCodecMap->add(atomicName, TextCodecFactory(function, additionalData));
172 }
173 
buildBaseTextCodecMaps()174 static void buildBaseTextCodecMaps()
175 {
176     ASSERT(isMainThread());
177     ASSERT(!textCodecMap);
178     ASSERT(!textEncodingNameMap);
179 
180     textCodecMap = new TextCodecMap;
181     textEncodingNameMap = new TextEncodingNameMap;
182 
183     TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
184     TextCodecLatin1::registerCodecs(addToTextCodecMap);
185 
186     TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
187     TextCodecUTF16::registerCodecs(addToTextCodecMap);
188 
189     TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
190     TextCodecUserDefined::registerCodecs(addToTextCodecMap);
191 
192 #if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID)
193     TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap);
194     TextCodecICU::registerBaseCodecs(addToTextCodecMap);
195 #endif
196 
197 #if PLATFORM(WINCE)
198     TextCodecWince::registerBaseEncodingNames(addToTextEncodingNameMap);
199     TextCodecWince::registerBaseCodecs(addToTextCodecMap);
200 #endif
201 }
202 
extendTextCodecMaps()203 static void extendTextCodecMaps()
204 {
205 #if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID)
206     TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap);
207     TextCodecICU::registerExtendedCodecs(addToTextCodecMap);
208 #endif
209 
210 #if USE(QT4_UNICODE)
211     TextCodecQt::registerEncodingNames(addToTextEncodingNameMap);
212     TextCodecQt::registerCodecs(addToTextCodecMap);
213 #endif
214 
215 #if PLATFORM(MAC)
216     TextCodecMac::registerEncodingNames(addToTextEncodingNameMap);
217     TextCodecMac::registerCodecs(addToTextCodecMap);
218 #endif
219 
220 #if PLATFORM(WINCE)
221     TextCodecWince::registerExtendedEncodingNames(addToTextEncodingNameMap);
222     TextCodecWince::registerExtendedCodecs(addToTextCodecMap);
223 #endif
224 }
225 
newTextCodec(const TextEncoding & encoding)226 PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding)
227 {
228     MutexLocker lock(encodingRegistryMutex());
229 
230     ASSERT(textCodecMap);
231     TextCodecFactory factory = textCodecMap->get(encoding.name());
232     ASSERT(factory.function);
233     return factory.function(encoding, factory.additionalData);
234 }
235 
atomicCanonicalTextEncodingName(const char * name)236 const char* atomicCanonicalTextEncodingName(const char* name)
237 {
238     if (!name || !name[0])
239         return 0;
240     if (!textEncodingNameMap)
241         buildBaseTextCodecMaps();
242 
243     MutexLocker lock(encodingRegistryMutex());
244 
245     if (const char* atomicName = textEncodingNameMap->get(name))
246         return atomicName;
247     if (didExtendTextCodecMaps)
248         return 0;
249     extendTextCodecMaps();
250     didExtendTextCodecMaps = true;
251     return textEncodingNameMap->get(name);
252 }
253 
atomicCanonicalTextEncodingName(const UChar * characters,size_t length)254 const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length)
255 {
256     char buffer[maxEncodingNameLength + 1];
257     size_t j = 0;
258     for (size_t i = 0; i < length; ++i) {
259         UChar c = characters[i];
260         if (isASCIIAlphanumeric(c)) {
261             if (j == maxEncodingNameLength)
262                 return 0;
263             buffer[j++] = c;
264         }
265     }
266     buffer[j] = 0;
267     return atomicCanonicalTextEncodingName(buffer);
268 }
269 
noExtendedTextEncodingNameUsed()270 bool noExtendedTextEncodingNameUsed()
271 {
272     // If the calling thread did not use extended encoding names, it is fine for it to use a stale false value.
273     return !didExtendTextCodecMaps;
274 }
275 
276 } // namespace WebCore
277