1 /*
2 * Copyright (c) 2006, 2007, 2008, Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "config.h"
32 #include "FontUtilsChromiumWin.h"
33
34 #include <limits>
35
36 #include "PlatformString.h"
37 #include "StringHash.h"
38 #include "UniscribeHelper.h"
39 #include <unicode/locid.h>
40 #include <unicode/uchar.h>
41 #include <wtf/HashMap.h>
42
43 namespace WebCore {
44
45 namespace {
46
47 // A simple mapping from UScriptCode to family name. This is a sparse array,
48 // which works well since the range of UScriptCode values is small.
49 typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT];
50
initializeScriptFontMap(ScriptToFontMap & scriptFontMap)51 void initializeScriptFontMap(ScriptToFontMap& scriptFontMap)
52 {
53 struct FontMap {
54 UScriptCode script;
55 const UChar* family;
56 };
57
58 const static FontMap fontMap[] = {
59 {USCRIPT_LATIN, L"times new roman"},
60 {USCRIPT_GREEK, L"times new roman"},
61 {USCRIPT_CYRILLIC, L"times new roman"},
62 {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
63 {USCRIPT_HIRAGANA, L"ms pgothic"},
64 {USCRIPT_KATAKANA, L"ms pgothic"},
65 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
66 {USCRIPT_HANGUL, L"gulim"},
67 {USCRIPT_THAI, L"tahoma"},
68 {USCRIPT_HEBREW, L"david"},
69 {USCRIPT_ARABIC, L"tahoma"},
70 {USCRIPT_DEVANAGARI, L"mangal"},
71 {USCRIPT_BENGALI, L"vrinda"},
72 {USCRIPT_GURMUKHI, L"raavi"},
73 {USCRIPT_GUJARATI, L"shruti"},
74 {USCRIPT_ORIYA, L"kalinga"},
75 {USCRIPT_TAMIL, L"latha"},
76 {USCRIPT_TELUGU, L"gautami"},
77 {USCRIPT_KANNADA, L"tunga"},
78 {USCRIPT_MALAYALAM, L"kartika"},
79 {USCRIPT_LAO, L"dokchampa"},
80 {USCRIPT_TIBETAN, L"microsoft himalaya"},
81 {USCRIPT_GEORGIAN, L"sylfaen"},
82 {USCRIPT_ARMENIAN, L"sylfaen"},
83 {USCRIPT_ETHIOPIC, L"nyala"},
84 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
85 {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
86 {USCRIPT_YI, L"microsoft yi balti"},
87 {USCRIPT_SINHALA, L"iskoola pota"},
88 {USCRIPT_SYRIAC, L"estrangelo edessa"},
89 {USCRIPT_KHMER, L"daunpenh"},
90 {USCRIPT_THAANA, L"mv boli"},
91 {USCRIPT_MONGOLIAN, L"mongolian balti"},
92 {USCRIPT_MYANMAR, L"padauk"},
93 // For USCRIPT_COMMON, we map blocks to scripts when
94 // that makes sense.
95 };
96
97 for (int i = 0; i < sizeof(fontMap) / sizeof(fontMap[0]); ++i)
98 scriptFontMap[fontMap[i].script] = fontMap[i].family;
99
100 // Initialize the locale-dependent mapping.
101 // Since Chrome synchronizes the ICU default locale with its UI locale,
102 // this ICU locale tells the current UI locale of Chrome.
103 icu::Locale locale = icu::Locale::getDefault();
104 const UChar* localeFamily = 0;
105 if (locale == icu::Locale::getJapanese())
106 localeFamily = scriptFontMap[USCRIPT_HIRAGANA];
107 else if (locale == icu::Locale::getKorean())
108 localeFamily = scriptFontMap[USCRIPT_HANGUL];
109 else {
110 // Use Simplified Chinese font for all other locales including
111 // Traditional Chinese because Simsun (SC font) has a wider
112 // coverage (covering both SC and TC) than PMingLiu (TC font).
113 // Note that |fontMap| does not have a separate entry for
114 // USCRIPT_TRADITIONAL_HAN for that reason.
115 // This also speeds up the TC version of Chrome when rendering SC
116 // pages.
117 localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN];
118 }
119 if (localeFamily)
120 scriptFontMap[USCRIPT_HAN] = localeFamily;
121 }
122
123 // There are a lot of characters in USCRIPT_COMMON that can be covered
124 // by fonts for scripts closely related to them. See
125 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
126 // FIXME: make this more efficient with a wider coverage
getScriptBasedOnUnicodeBlock(int ucs4)127 UScriptCode getScriptBasedOnUnicodeBlock(int ucs4)
128 {
129 UBlockCode block = ublock_getCode(ucs4);
130 switch (block) {
131 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
132 return USCRIPT_HAN;
133 case UBLOCK_HIRAGANA:
134 case UBLOCK_KATAKANA:
135 return USCRIPT_HIRAGANA;
136 case UBLOCK_ARABIC:
137 return USCRIPT_ARABIC;
138 case UBLOCK_THAI:
139 return USCRIPT_THAI;
140 case UBLOCK_GREEK:
141 return USCRIPT_GREEK;
142 case UBLOCK_DEVANAGARI:
143 // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
144 // font for now although they're used by other scripts as well.
145 // Without a context, we can't do any better.
146 return USCRIPT_DEVANAGARI;
147 case UBLOCK_ARMENIAN:
148 return USCRIPT_ARMENIAN;
149 case UBLOCK_GEORGIAN:
150 return USCRIPT_GEORGIAN;
151 case UBLOCK_KANNADA:
152 return USCRIPT_KANNADA;
153 default:
154 return USCRIPT_COMMON;
155 }
156 }
157
getScript(int ucs4)158 UScriptCode getScript(int ucs4)
159 {
160 UErrorCode err = U_ZERO_ERROR;
161 UScriptCode script = uscript_getScript(ucs4, &err);
162 // If script is invalid, common or inherited or there's an error,
163 // infer a script based on the unicode block of a character.
164 if (script <= USCRIPT_INHERITED || U_FAILURE(err))
165 script = getScriptBasedOnUnicodeBlock(ucs4);
166 return script;
167 }
168
169 const int kUndefinedAscent = std::numeric_limits<int>::min();
170
171 // Given an HFONT, return the ascent. If GetTextMetrics fails,
172 // kUndefinedAscent is returned, instead.
getAscent(HFONT hfont)173 int getAscent(HFONT hfont)
174 {
175 HDC dc = GetDC(0);
176 HGDIOBJ oldFont = SelectObject(dc, hfont);
177 TEXTMETRIC tm;
178 BOOL gotMetrics = GetTextMetrics(dc, &tm);
179 SelectObject(dc, oldFont);
180 ReleaseDC(0, dc);
181 return gotMetrics ? tm.tmAscent : kUndefinedAscent;
182 }
183
184 struct FontData {
FontDataWebCore::__anon3d7d36d80111::FontData185 FontData()
186 : hfont(0)
187 , ascent(kUndefinedAscent)
188 , scriptCache(0)
189 {
190 }
191
192 HFONT hfont;
193 int ascent;
194 mutable SCRIPT_CACHE scriptCache;
195 };
196
197 // Again, using hash_map does not earn us much here. page_cycler_test intl2
198 // gave us a 'better' result with map than with hash_map even though they're
199 // well-within 1-sigma of each other so that the difference is not significant.
200 // On the other hand, some pages in intl2 seem to take longer to load with map
201 // in the 1st pass. Need to experiment further.
202 typedef HashMap<String, FontData> FontDataCache;
203
204 } // namespace
205
206 // FIXME: this is font fallback code version 0.1
207 // - Cover all the scripts
208 // - Get the default font for each script/generic family from the
209 // preference instead of hardcoding in the source.
210 // (at least, read values from the registry for IE font settings).
211 // - Support generic families (from FontDescription)
212 // - If the default font for a script is not available,
213 // try some more fonts known to support it. Finally, we can
214 // use EnumFontFamilies or similar APIs to come up with a list of
215 // fonts supporting the script and cache the result.
216 // - Consider using UnicodeSet (or UnicodeMap) converted from
217 // GLYPHSET (BMP) or directly read from truetype cmap tables to
218 // keep track of which character is supported by which font
219 // - Update script_font_cache in response to WM_FONTCHANGE
220
getFontFamilyForScript(UScriptCode script,FontDescription::GenericFamilyType generic)221 const UChar* getFontFamilyForScript(UScriptCode script,
222 FontDescription::GenericFamilyType generic)
223 {
224 static ScriptToFontMap scriptFontMap;
225 static bool initialized = false;
226 if (!initialized) {
227 initializeScriptFontMap(scriptFontMap);
228 initialized = true;
229 }
230 if (script == USCRIPT_INVALID_CODE)
231 return 0;
232 ASSERT(script < USCRIPT_CODE_LIMIT);
233 return scriptFontMap[script];
234 }
235
236 // FIXME:
237 // - Handle 'Inherited', 'Common' and 'Unknown'
238 // (see http://www.unicode.org/reports/tr24/#Usage_Model )
239 // For 'Inherited' and 'Common', perhaps we need to
240 // accept another parameter indicating the previous family
241 // and just return it.
242 // - All the characters (or characters up to the point a single
243 // font can cover) need to be taken into account
getFallbackFamily(const UChar * characters,int length,FontDescription::GenericFamilyType generic,UChar32 * charChecked,UScriptCode * scriptChecked)244 const UChar* getFallbackFamily(const UChar* characters,
245 int length,
246 FontDescription::GenericFamilyType generic,
247 UChar32* charChecked,
248 UScriptCode* scriptChecked)
249 {
250 ASSERT(characters && characters[0] && length > 0);
251 UScriptCode script = USCRIPT_COMMON;
252
253 // Sometimes characters common to script (e.g. space) is at
254 // the beginning of a string so that we need to skip them
255 // to get a font required to render the string.
256 int i = 0;
257 UChar32 ucs4 = 0;
258 while (i < length && script == USCRIPT_COMMON) {
259 U16_NEXT(characters, i, length, ucs4);
260 script = getScript(ucs4);
261 }
262
263 // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for
264 // Han (determined in a locale-dependent way above). Full-width ASCII
265 // characters are rather widely used in Japanese and Chinese documents and
266 // they're fully covered by Chinese, Japanese and Korean fonts.
267 if (0xFF00 < ucs4 && ucs4 < 0xFF5F)
268 script = USCRIPT_HAN;
269
270 if (script == USCRIPT_COMMON)
271 script = getScriptBasedOnUnicodeBlock(ucs4);
272
273 // Another lame work-around to cover non-BMP characters.
274 const UChar* family = getFontFamilyForScript(script, generic);
275 if (!family) {
276 int plane = ucs4 >> 16;
277 switch (plane) {
278 case 1:
279 family = L"code2001";
280 break;
281 case 2:
282 family = L"simsun-extb";
283 break;
284 default:
285 family = L"lucida sans unicode";
286 }
287 }
288
289 if (charChecked)
290 *charChecked = ucs4;
291 if (scriptChecked)
292 *scriptChecked = script;
293 return family;
294 }
295
296 // Be aware that this is not thread-safe.
getDerivedFontData(const UChar * family,int style,LOGFONT * logfont,int * ascent,HFONT * hfont,SCRIPT_CACHE ** scriptCache)297 bool getDerivedFontData(const UChar* family,
298 int style,
299 LOGFONT* logfont,
300 int* ascent,
301 HFONT* hfont,
302 SCRIPT_CACHE** scriptCache)
303 {
304 ASSERT(logfont);
305 ASSERT(family);
306 ASSERT(*family);
307
308 // It does not matter that we leak font data when we exit.
309 static FontDataCache fontDataCache;
310
311 // FIXME: This comes up pretty high in the profile so that
312 // we need to measure whether using SHA256 (after coercing all the
313 // fields to char*) is faster than String::format.
314 String fontKey = String::format("%1d:%d:%ls", style, logfont->lfHeight, family);
315 FontDataCache::iterator iter = fontDataCache.find(fontKey);
316 FontData* derived;
317 if (iter == fontDataCache.end()) {
318 ASSERT(wcslen(family) < LF_FACESIZE);
319 wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family);
320 // FIXME: CreateFontIndirect always comes up with
321 // a font even if there's no font matching the name. Need to
322 // check it against what we actually want (as is done in
323 // FontCacheWin.cpp)
324 pair<FontDataCache::iterator, bool> entry = fontDataCache.add(fontKey, FontData());
325 derived = &entry.first->second;
326 derived->hfont = CreateFontIndirect(logfont);
327 // GetAscent may return kUndefinedAscent, but we still want to
328 // cache it so that we won't have to call CreateFontIndirect once
329 // more for HFONT next time.
330 derived->ascent = getAscent(derived->hfont);
331 } else {
332 derived = &iter->second;
333 // Last time, GetAscent failed so that only HFONT was
334 // cached. Try once more assuming that TryPreloadFont
335 // was called by a caller between calls.
336 if (kUndefinedAscent == derived->ascent)
337 derived->ascent = getAscent(derived->hfont);
338 }
339 *hfont = derived->hfont;
340 *ascent = derived->ascent;
341 *scriptCache = &(derived->scriptCache);
342 return *ascent != kUndefinedAscent;
343 }
344
getStyleFromLogfont(const LOGFONT * logfont)345 int getStyleFromLogfont(const LOGFONT* logfont)
346 {
347 // FIXME: consider defining UNDEFINED or INVALID for style and
348 // returning it when logfont is 0
349 if (!logfont) {
350 ASSERT_NOT_REACHED();
351 return FontStyleNormal;
352 }
353 return (logfont->lfItalic ? FontStyleItalic : FontStyleNormal) |
354 (logfont->lfUnderline ? FontStyleUnderlined : FontStyleNormal) |
355 (logfont->lfWeight >= 700 ? FontStyleBold : FontStyleNormal);
356 }
357
358 } // namespace WebCore
359