• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved.
3  * Copyright (C) 2010-2011 Patrick Gansterer <paroga@paroga.com>
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  *  This library is distributed in the hope that i will be useful,
15  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  *  Library General Public License for more details.
18  *
19  *  You should have received a copy of the GNU Library General Public License
20  *  along with this library; see the file COPYING.LIB.  If not, write to
21  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22  *  Boston, MA 02110-1301, USA.
23  */
24 
25 #include "config.h"
26 #include "TextCodecWinCE.h"
27 
28 #include "FontCache.h"
29 #include "PlatformString.h"
30 #include <mlang.h>
31 #include <winbase.h>
32 #include <winnls.h>
33 #include <wtf/HashMap.h>
34 #include <wtf/HashSet.h>
35 #include <wtf/text/CString.h>
36 #include <wtf/text/StringConcatenate.h>
37 #include <wtf/text/StringHash.h>
38 
39 namespace WebCore {
40 
41 struct CharsetInfo {
42     CString m_name;
43     String m_friendlyName;
44     UINT m_codePage;
45     Vector<CString> m_aliases;
46 };
47 
48 class LanguageManager {
49 private:
50     LanguageManager();
51 
52     friend LanguageManager& languageManager();
53 };
54 
55 // Usage: a lookup table used to get CharsetInfo with code page ID.
56 // Key: code page ID. Value: charset information.
codePageCharsets()57 static HashMap<UINT, CString>& codePageCharsets()
58 {
59     static HashMap<UINT, CString> cc;
60     return cc;
61 }
62 
knownCharsets()63 static HashMap<String, CharsetInfo>& knownCharsets()
64 {
65     static HashMap<String, CharsetInfo> kc;
66     return kc;
67 }
68 
69 // Usage: a map that stores charsets that are supported by system. Sorted by name.
70 // Key: charset. Value: code page ID.
71 typedef HashSet<String> CharsetSet;
supportedCharsets()72 static CharsetSet& supportedCharsets()
73 {
74     static CharsetSet sl;
75     return sl;
76 }
77 
languageManager()78 static LanguageManager& languageManager()
79 {
80     static LanguageManager lm;
81     return lm;
82 }
83 
LanguageManager()84 LanguageManager::LanguageManager()
85 {
86     IEnumCodePage* enumInterface;
87     IMultiLanguage* mli = FontCache::getMultiLanguageInterface();
88     if (mli && S_OK == mli->EnumCodePages(MIMECONTF_BROWSER, &enumInterface)) {
89         MIMECPINFO cpInfo;
90         ULONG ccpInfo;
91         while (S_OK == enumInterface->Next(1, &cpInfo, &ccpInfo) && ccpInfo) {
92             if (!IsValidCodePage(cpInfo.uiCodePage))
93                 continue;
94 
95             HashMap<UINT, CString>::iterator i = codePageCharsets().find(cpInfo.uiCodePage);
96 
97             CString name(String(cpInfo.wszWebCharset).latin1());
98             if (i == codePageCharsets().end()) {
99                 CharsetInfo info;
100                 info.m_codePage = cpInfo.uiCodePage;
101                 knownCharsets().set(name.data(), info);
102                 i = codePageCharsets().set(cpInfo.uiCodePage, name).first;
103             }
104             if (i != codePageCharsets().end()) {
105                 HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(String(i->second.data(), i->second.length()));
106                 ASSERT(j != knownCharsets().end());
107                 CharsetInfo& info = j->second;
108                 info.m_name = i->second.data();
109                 info.m_friendlyName = cpInfo.wszDescription;
110                 info.m_aliases.append(name);
111                 info.m_aliases.append(String(cpInfo.wszHeaderCharset).latin1());
112                 info.m_aliases.append(String(cpInfo.wszBodyCharset).latin1());
113                 String cpName = makeString("cp", String::number(cpInfo.uiCodePage));
114                 info.m_aliases.append(cpName.latin1());
115                 supportedCharsets().add(i->second.data());
116             }
117         }
118         enumInterface->Release();
119     }
120 }
121 
getCodePage(const char * name)122 static UINT getCodePage(const char* name)
123 {
124     // Explicitly use a "const" reference to fix the silly VS build error
125     // saying "==" is not found for const_iterator and iterator
126     const HashMap<String, CharsetInfo>& charsets = knownCharsets();
127     HashMap<String, CharsetInfo>::const_iterator i = charsets.find(name);
128     return i == charsets.end() ? CP_ACP : i->second.m_codePage;
129 }
130 
newTextCodecWinCE(const TextEncoding & encoding,const void *)131 static PassOwnPtr<TextCodec> newTextCodecWinCE(const TextEncoding& encoding, const void*)
132 {
133     return new TextCodecWinCE(getCodePage(encoding.name()));
134 }
135 
TextCodecWinCE(UINT codePage)136 TextCodecWinCE::TextCodecWinCE(UINT codePage)
137     : m_codePage(codePage)
138 {
139 }
140 
~TextCodecWinCE()141 TextCodecWinCE::~TextCodecWinCE()
142 {
143 }
144 
registerExtendedEncodingNames(EncodingNameRegistrar registrar)145 void TextCodecWinCE::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
146 {
147     languageManager();
148     for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
149         HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
150         if (j != knownCharsets().end()) {
151             registrar(j->second.m_name.data(), j->second.m_name.data());
152             for (Vector<CString>::const_iterator alias = j->second.m_aliases.begin(); alias != j->second.m_aliases.end(); ++alias)
153                 registrar(alias->data(), j->second.m_name.data());
154         }
155     }
156 }
157 
registerExtendedCodecs(TextCodecRegistrar registrar)158 void TextCodecWinCE::registerExtendedCodecs(TextCodecRegistrar registrar)
159 {
160     languageManager();
161     for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
162         HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
163         if (j != knownCharsets().end())
164             registrar(j->second.m_name.data(), newTextCodecWinCE, 0);
165     }
166 }
167 
getCodePageFlags(UINT codePage)168 static DWORD getCodePageFlags(UINT codePage)
169 {
170     if (codePage == 42) // Symbol
171         return 0;
172 
173     // Microsoft says the flag must be 0 for the following code pages
174     if (codePage > 50000) {
175         if ((codePage >= 50220 && codePage <= 50222)
176             || codePage == 50225
177             || codePage == 50227
178             || codePage == 50229
179             || codePage == 52936
180             || codePage == 54936
181             || (codePage >= 57002 && codePage <= 57001)
182             || codePage == 65000 // UTF-7
183             )
184             return 0;
185     }
186 
187     return MB_PRECOMPOSED | MB_ERR_INVALID_CHARS;
188 }
189 
findFirstNonAsciiCharacter(const char * bytes,size_t length)190 static inline const char* findFirstNonAsciiCharacter(const char* bytes, size_t length)
191 {
192     for (const char* bytesEnd = bytes + length; bytes < bytesEnd; ++bytes) {
193         if (*bytes & 0x80)
194             break;
195     }
196     return bytes;
197 }
198 
decodeInternal(Vector<UChar,8192> & result,UINT codePage,const char * bytes,size_t length,size_t * left)199 static void decodeInternal(Vector<UChar, 8192>& result, UINT codePage, const char* bytes, size_t length, size_t* left)
200 {
201     *left = length;
202     if (!bytes || !length)
203         return;
204 
205     DWORD flags = getCodePageFlags(codePage);
206 
207     int testLength = length;
208     int untestedLength = length;
209     for (;;) {
210         int resultLength = MultiByteToWideChar(codePage, flags, bytes, testLength, 0, 0);
211 
212         if (resultLength > 0) {
213             int oldSize = result.size();
214             result.resize(oldSize + resultLength);
215 
216             MultiByteToWideChar(codePage, flags, bytes, testLength, result.data() + oldSize, resultLength);
217 
218             if (testLength == untestedLength) {
219                 *left = length - testLength;
220                 break;
221             }
222             untestedLength -= testLength;
223             length -= testLength;
224             bytes += testLength;
225         } else {
226             untestedLength = testLength - 1;
227             if (!untestedLength) {
228                 *left = length;
229                 break;
230             }
231         }
232         testLength = (untestedLength + 1) / 2;
233     }
234 }
235 
decode(const char * bytes,size_t length,bool flush,bool stopOnError,bool & sawError)236 String TextCodecWinCE::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
237 {
238     if (!m_decodeBuffer.isEmpty()) {
239         m_decodeBuffer.append(bytes, length);
240         bytes = m_decodeBuffer.data();
241         length = m_decodeBuffer.size();
242     }
243 
244     size_t left;
245     Vector<UChar, 8192> result;
246     for (;;) {
247         decodeInternal(result, m_codePage, bytes, length, &left);
248         if (!left)
249             break;
250 
251         if (!flush && left < 16)
252             break;
253 
254         result.append(L'?');
255         sawError = true;
256         if (stopOnError)
257             return String::adopt(result);
258 
259         if (left == 1)
260             break;
261 
262         bytes += length - left + 1;
263         length = left - 1;
264     }
265     if (left && !flush) {
266         if (m_decodeBuffer.isEmpty())
267             m_decodeBuffer.append(bytes + length - left, left);
268         else {
269             memmove(m_decodeBuffer.data(), bytes + length - left, left);
270             m_decodeBuffer.resize(left);
271         }
272     } else
273         m_decodeBuffer.clear();
274 
275     return String::adopt(result);
276 }
277 
encode(const UChar * characters,size_t length,UnencodableHandling)278 CString TextCodecWinCE::encode(const UChar* characters, size_t length, UnencodableHandling)
279 {
280     if (!characters || !length)
281         return CString();
282 
283     int resultLength = WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, 0, 0, 0, 0);
284 
285     // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencodables, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables.
286 
287     if (resultLength <= 0)
288         return "?";
289 
290     char* characterBuffer;
291     CString result = CString::newUninitialized(resultLength, characterBuffer);
292 
293     WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, characterBuffer, resultLength, 0, 0);
294 
295     return result;
296 }
297 
enumerateSupportedEncodings(EncodingReceiver & receiver)298 void TextCodecWinCE::enumerateSupportedEncodings(EncodingReceiver& receiver)
299 {
300     languageManager();
301     for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
302         HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
303         if (j != knownCharsets().end() && !receiver.receive(j->second.m_name.data(), j->second.m_friendlyName.charactersWithNullTermination(), j->second.m_codePage))
304             break;
305     }
306 }
307 
308 } // namespace WebCore
309