• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2006, 2007, 2008, Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following disclaimer
12  * in the documentation and/or other materials provided with the
13  * distribution.
14  *     * Neither the name of Google Inc. nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include "config.h"
32 #include "FontUtilsChromiumWin.h"
33 
34 #include <limits>
35 
36 #include "PlatformString.h"
37 #include "StringHash.h"
38 #include "UniscribeHelper.h"
39 #include <unicode/locid.h>
40 #include <unicode/uchar.h>
41 #include <wtf/HashMap.h>
42 
43 namespace WebCore {
44 
45 namespace {
46 
47 // A simple mapping from UScriptCode to family name.  This is a sparse array,
48 // which works well since the range of UScriptCode values is small.
49 typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT];
50 
initializeScriptFontMap(ScriptToFontMap & scriptFontMap)51 void initializeScriptFontMap(ScriptToFontMap& scriptFontMap)
52 {
53     struct FontMap {
54         UScriptCode script;
55         const UChar* family;
56     };
57 
58     const static FontMap fontMap[] = {
59         {USCRIPT_LATIN, L"times new roman"},
60         {USCRIPT_GREEK, L"times new roman"},
61         {USCRIPT_CYRILLIC, L"times new roman"},
62         {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
63         {USCRIPT_HIRAGANA, L"ms pgothic"},
64         {USCRIPT_KATAKANA, L"ms pgothic"},
65         {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
66         {USCRIPT_HANGUL, L"gulim"},
67         {USCRIPT_THAI, L"tahoma"},
68         {USCRIPT_HEBREW, L"david"},
69         {USCRIPT_ARABIC, L"tahoma"},
70         {USCRIPT_DEVANAGARI, L"mangal"},
71         {USCRIPT_BENGALI, L"vrinda"},
72         {USCRIPT_GURMUKHI, L"raavi"},
73         {USCRIPT_GUJARATI, L"shruti"},
74         {USCRIPT_ORIYA, L"kalinga"},
75         {USCRIPT_TAMIL, L"latha"},
76         {USCRIPT_TELUGU, L"gautami"},
77         {USCRIPT_KANNADA, L"tunga"},
78         {USCRIPT_MALAYALAM, L"kartika"},
79         {USCRIPT_LAO, L"dokchampa"},
80         {USCRIPT_TIBETAN, L"microsoft himalaya"},
81         {USCRIPT_GEORGIAN, L"sylfaen"},
82         {USCRIPT_ARMENIAN, L"sylfaen"},
83         {USCRIPT_ETHIOPIC, L"nyala"},
84         {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
85         {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
86         {USCRIPT_YI, L"microsoft yi balti"},
87         {USCRIPT_SINHALA, L"iskoola pota"},
88         {USCRIPT_SYRIAC, L"estrangelo edessa"},
89         {USCRIPT_KHMER, L"daunpenh"},
90         {USCRIPT_THAANA, L"mv boli"},
91         {USCRIPT_MONGOLIAN, L"mongolian balti"},
92         {USCRIPT_MYANMAR, L"padauk"},
93         // For USCRIPT_COMMON, we map blocks to scripts when
94         // that makes sense.
95     };
96 
97     for (int i = 0; i < sizeof(fontMap) / sizeof(fontMap[0]); ++i)
98         scriptFontMap[fontMap[i].script] = fontMap[i].family;
99 
100     // Initialize the locale-dependent mapping.
101     // Since Chrome synchronizes the ICU default locale with its UI locale,
102     // this ICU locale tells the current UI locale of Chrome.
103     icu::Locale locale = icu::Locale::getDefault();
104     const UChar* localeFamily = 0;
105     if (locale == icu::Locale::getJapanese())
106         localeFamily = scriptFontMap[USCRIPT_HIRAGANA];
107     else if (locale == icu::Locale::getKorean())
108         localeFamily = scriptFontMap[USCRIPT_HANGUL];
109     else {
110         // Use Simplified Chinese font for all other locales including
111         // Traditional Chinese because Simsun (SC font) has a wider
112         // coverage (covering both SC and TC) than PMingLiu (TC font).
113         // Note that |fontMap| does not have a separate entry for
114         // USCRIPT_TRADITIONAL_HAN for that reason.
115         // This also speeds up the TC version of Chrome when rendering SC
116         // pages.
117         localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN];
118     }
119     if (localeFamily)
120         scriptFontMap[USCRIPT_HAN] = localeFamily;
121 }
122 
123 // There are a lot of characters in USCRIPT_COMMON that can be covered
124 // by fonts for scripts closely related to them. See
125 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
126 // FIXME: make this more efficient with a wider coverage
getScriptBasedOnUnicodeBlock(int ucs4)127 UScriptCode getScriptBasedOnUnicodeBlock(int ucs4)
128 {
129     UBlockCode block = ublock_getCode(ucs4);
130     switch (block) {
131     case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
132         return USCRIPT_HAN;
133     case UBLOCK_HIRAGANA:
134     case UBLOCK_KATAKANA:
135         return USCRIPT_HIRAGANA;
136     case UBLOCK_ARABIC:
137         return USCRIPT_ARABIC;
138     case UBLOCK_THAI:
139         return USCRIPT_THAI;
140     case UBLOCK_GREEK:
141         return USCRIPT_GREEK;
142     case UBLOCK_DEVANAGARI:
143         // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
144         // font for now although they're used by other scripts as well.
145         // Without a context, we can't do any better.
146         return USCRIPT_DEVANAGARI;
147     case UBLOCK_ARMENIAN:
148         return USCRIPT_ARMENIAN;
149     case UBLOCK_GEORGIAN:
150         return USCRIPT_GEORGIAN;
151     case UBLOCK_KANNADA:
152         return USCRIPT_KANNADA;
153     default:
154         return USCRIPT_COMMON;
155     }
156 }
157 
getScript(int ucs4)158 UScriptCode getScript(int ucs4)
159 {
160     UErrorCode err = U_ZERO_ERROR;
161     UScriptCode script = uscript_getScript(ucs4, &err);
162     // If script is invalid, common or inherited or there's an error,
163     // infer a script based on the unicode block of a character.
164     if (script <= USCRIPT_INHERITED || U_FAILURE(err))
165         script = getScriptBasedOnUnicodeBlock(ucs4);
166     return script;
167 }
168 
169 const int kUndefinedAscent = std::numeric_limits<int>::min();
170 
171 // Given an HFONT, return the ascent. If GetTextMetrics fails,
172 // kUndefinedAscent is returned, instead.
getAscent(HFONT hfont)173 int getAscent(HFONT hfont)
174 {
175     HDC dc = GetDC(0);
176     HGDIOBJ oldFont = SelectObject(dc, hfont);
177     TEXTMETRIC tm;
178     BOOL gotMetrics = GetTextMetrics(dc, &tm);
179     SelectObject(dc, oldFont);
180     ReleaseDC(0, dc);
181     return gotMetrics ? tm.tmAscent : kUndefinedAscent;
182 }
183 
184 struct FontData {
FontDataWebCore::__anon3d7d36d80111::FontData185     FontData()
186         : hfont(0)
187         , ascent(kUndefinedAscent)
188         , scriptCache(0)
189     {
190     }
191 
192     HFONT hfont;
193     int ascent;
194     mutable SCRIPT_CACHE scriptCache;
195 };
196 
197 // Again, using hash_map does not earn us much here.  page_cycler_test intl2
198 // gave us a 'better' result with map than with hash_map even though they're
199 // well-within 1-sigma of each other so that the difference is not significant.
200 // On the other hand, some pages in intl2 seem to take longer to load with map
201 // in the 1st pass. Need to experiment further.
202 typedef HashMap<String, FontData> FontDataCache;
203 
204 }  // namespace
205 
206 // FIXME: this is font fallback code version 0.1
207 //  - Cover all the scripts
208 //  - Get the default font for each script/generic family from the
209 //    preference instead of hardcoding in the source.
210 //    (at least, read values from the registry for IE font settings).
211 //  - Support generic families (from FontDescription)
212 //  - If the default font for a script is not available,
213 //    try some more fonts known to support it. Finally, we can
214 //    use EnumFontFamilies or similar APIs to come up with a list of
215 //    fonts supporting the script and cache the result.
216 //  - Consider using UnicodeSet (or UnicodeMap) converted from
217 //    GLYPHSET (BMP) or directly read from truetype cmap tables to
218 //    keep track of which character is supported by which font
219 //  - Update script_font_cache in response to WM_FONTCHANGE
220 
getFontFamilyForScript(UScriptCode script,FontDescription::GenericFamilyType generic)221 const UChar* getFontFamilyForScript(UScriptCode script,
222                                     FontDescription::GenericFamilyType generic)
223 {
224     static ScriptToFontMap scriptFontMap;
225     static bool initialized = false;
226     if (!initialized) {
227         initializeScriptFontMap(scriptFontMap);
228         initialized = true;
229     }
230     if (script == USCRIPT_INVALID_CODE)
231         return 0;
232     ASSERT(script < USCRIPT_CODE_LIMIT);
233     return scriptFontMap[script];
234 }
235 
236 // FIXME:
237 //  - Handle 'Inherited', 'Common' and 'Unknown'
238 //    (see http://www.unicode.org/reports/tr24/#Usage_Model )
239 //    For 'Inherited' and 'Common', perhaps we need to
240 //    accept another parameter indicating the previous family
241 //    and just return it.
242 //  - All the characters (or characters up to the point a single
243 //    font can cover) need to be taken into account
getFallbackFamily(const UChar * characters,int length,FontDescription::GenericFamilyType generic,UChar32 * charChecked,UScriptCode * scriptChecked)244 const UChar* getFallbackFamily(const UChar* characters,
245                                int length,
246                                FontDescription::GenericFamilyType generic,
247                                UChar32* charChecked,
248                                UScriptCode* scriptChecked)
249 {
250     ASSERT(characters && characters[0] && length > 0);
251     UScriptCode script = USCRIPT_COMMON;
252 
253     // Sometimes characters common to script (e.g. space) is at
254     // the beginning of a string so that we need to skip them
255     // to get a font required to render the string.
256     int i = 0;
257     UChar32 ucs4 = 0;
258     while (i < length && script == USCRIPT_COMMON) {
259         U16_NEXT(characters, i, length, ucs4);
260         script = getScript(ucs4);
261     }
262 
263     // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for
264     // Han (determined in a locale-dependent way above). Full-width ASCII
265     // characters are rather widely used in Japanese and Chinese documents and
266     // they're fully covered by Chinese, Japanese and Korean fonts.
267     if (0xFF00 < ucs4 && ucs4 < 0xFF5F)
268         script = USCRIPT_HAN;
269 
270     if (script == USCRIPT_COMMON)
271         script = getScriptBasedOnUnicodeBlock(ucs4);
272 
273     // Another lame work-around to cover non-BMP characters.
274     const UChar* family = getFontFamilyForScript(script, generic);
275     if (!family) {
276         int plane = ucs4 >> 16;
277         switch (plane) {
278         case 1:
279             family = L"code2001";
280             break;
281         case 2:
282             family = L"simsun-extb";
283             break;
284         default:
285             family = L"lucida sans unicode";
286         }
287     }
288 
289     if (charChecked)
290         *charChecked = ucs4;
291     if (scriptChecked)
292         *scriptChecked = script;
293     return family;
294 }
295 
296 // Be aware that this is not thread-safe.
getDerivedFontData(const UChar * family,int style,LOGFONT * logfont,int * ascent,HFONT * hfont,SCRIPT_CACHE ** scriptCache)297 bool getDerivedFontData(const UChar* family,
298                         int style,
299                         LOGFONT* logfont,
300                         int* ascent,
301                         HFONT* hfont,
302                         SCRIPT_CACHE** scriptCache)
303 {
304     ASSERT(logfont);
305     ASSERT(family);
306     ASSERT(*family);
307 
308     // It does not matter that we leak font data when we exit.
309     static FontDataCache fontDataCache;
310 
311     // FIXME: This comes up pretty high in the profile so that
312     // we need to measure whether using SHA256 (after coercing all the
313     // fields to char*) is faster than String::format.
314     String fontKey = String::format("%1d:%d:%ls", style, logfont->lfHeight, family);
315     FontDataCache::iterator iter = fontDataCache.find(fontKey);
316     FontData* derived;
317     if (iter == fontDataCache.end()) {
318         ASSERT(wcslen(family) < LF_FACESIZE);
319         wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family);
320         // FIXME: CreateFontIndirect always comes up with
321         // a font even if there's no font matching the name. Need to
322         // check it against what we actually want (as is done in
323         // FontCacheWin.cpp)
324         pair<FontDataCache::iterator, bool> entry = fontDataCache.add(fontKey, FontData());
325         derived = &entry.first->second;
326         derived->hfont = CreateFontIndirect(logfont);
327         // GetAscent may return kUndefinedAscent, but we still want to
328         // cache it so that we won't have to call CreateFontIndirect once
329         // more for HFONT next time.
330         derived->ascent = getAscent(derived->hfont);
331     } else {
332         derived = &iter->second;
333         // Last time, GetAscent failed so that only HFONT was
334         // cached. Try once more assuming that TryPreloadFont
335         // was called by a caller between calls.
336         if (kUndefinedAscent == derived->ascent)
337             derived->ascent = getAscent(derived->hfont);
338     }
339     *hfont = derived->hfont;
340     *ascent = derived->ascent;
341     *scriptCache = &(derived->scriptCache);
342     return *ascent != kUndefinedAscent;
343 }
344 
getStyleFromLogfont(const LOGFONT * logfont)345 int getStyleFromLogfont(const LOGFONT* logfont)
346 {
347     // FIXME: consider defining UNDEFINED or INVALID for style and
348     //                  returning it when logfont is 0
349     if (!logfont) {
350         ASSERT_NOT_REACHED();
351         return FontStyleNormal;
352     }
353     return (logfont->lfItalic ? FontStyleItalic : FontStyleNormal) |
354            (logfont->lfUnderline ? FontStyleUnderlined : FontStyleNormal) |
355            (logfont->lfWeight >= 700 ? FontStyleBold : FontStyleNormal);
356 }
357 
358 }  // namespace WebCore
359