• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2006, 2007, 2008, 2009, Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following disclaimer
12  * in the documentation and/or other materials provided with the
13  * distribution.
14  *     * Neither the name of Google Inc. nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 // A wrapper around Uniscribe that provides a reasonable API.
32 
33 #ifndef UniscribeHelper_h
34 #define UniscribeHelper_h
35 
36 #include <windows.h>
37 #include <usp10.h>
38 #include <map>
39 
40 #include <unicode/uchar.h>
41 #include <wtf/Vector.h>
42 
43 class UniscribeTest_TooBig_Test;  // A gunit test for UniscribeHelper.
44 
45 namespace WebCore {
46 
47 class GraphicsContext;
48 
49 #define UNISCRIBE_HELPER_STACK_RUNS 8
50 #define UNISCRIBE_HELPER_STACK_CHARS 32
51 
52 // This object should be safe to create & destroy frequently, as long as the
53 // caller preserves the script_cache when possible (this data may be slow to
54 // compute).
55 //
56 // This object is "kind of large" (~1K) because it reserves a lot of space for
57 // working with to avoid expensive heap operations. Therefore, not only should
58 // you not worry about creating and destroying it, you should try to not keep
59 // them around.
60 class UniscribeHelper {
61 public:
62     // Initializes this Uniscribe run with the text pointed to by |run| with
63     // |length|. The input is NOT null terminated.
64     //
65     // The is_rtl flag should be set if the input script is RTL. It is assumed
66     // that the caller has already divided up the input text (using ICU, for
67     // example) into runs of the same direction of script. This avoids
68     // disagreements between the caller and Uniscribe later (see FillItems).
69     //
70     // A script cache should be provided by the caller that is initialized to
71     // NULL. When the caller is done with the cache (it may be stored between
72     // runs as long as it is used consistently with the same HFONT), it should
73     // call ScriptFreeCache().
74     UniscribeHelper(const UChar* input,
75                     int inputLength,
76                     bool isRtl,
77                     HFONT,
78                     SCRIPT_CACHE*,
79                     SCRIPT_FONTPROPERTIES*);
80 
81     virtual ~UniscribeHelper();
82 
83     // Sets Uniscribe's directional override flag. False by default.
directionalOverride()84     bool directionalOverride() const
85     {
86         return m_directionalOverride;
87     }
setDirectionalOverride(bool override)88     void setDirectionalOverride(bool override)
89     {
90         m_directionalOverride = override;
91     }
92 
93     // Set's Uniscribe's no-ligate override flag. False by default.
inhibitLigate()94     bool inhibitLigate() const
95     {
96         return m_inhibitLigate;
97     }
setInhibitLigate(bool inhibit)98     void setInhibitLigate(bool inhibit)
99     {
100         m_inhibitLigate = inhibit;
101     }
102 
103     // Set letter spacing. We will try to insert this much space between
104     // graphemes (one or more glyphs perceived as a single unit by ordinary
105     // users of a script). Positive values increase letter spacing, negative
106     // values decrease it. 0 by default.
letterSpacing()107     int letterSpacing() const
108     {
109         return m_letterSpacing;
110     }
setLetterSpacing(int letterSpacing)111     void setLetterSpacing(int letterSpacing)
112     {
113         m_letterSpacing = letterSpacing;
114     }
115 
116     // Set the width of a standard space character. We use this to normalize
117     // space widths. Windows will make spaces after Hindi characters larger than
118     // other spaces. A space_width of 0 means to use the default space width.
119     //
120     // Must be set before Init() is called.
spaceWidth()121     int spaceWidth() const
122     {
123         return m_spaceWidth;
124     }
setSpaceWidth(int spaceWidth)125     void setSpaceWidth(int spaceWidth)
126     {
127         m_spaceWidth = spaceWidth;
128     }
129 
130     // Set word spacing. We will try to insert this much extra space between
131     // each word in the input (beyond whatever whitespace character separates
132     // words). Positive values lead to increased letter spacing, negative values
133     // decrease it. 0 by default.
134     //
135     // Must be set before Init() is called.
wordSpacing()136     int wordSpacing() const
137     {
138         return m_wordSpacing;
139     }
setWordSpacing(int wordSpacing)140     void setWordSpacing(int wordSpacing)
141     {
142         m_wordSpacing = wordSpacing;
143     }
144 
setAscent(int ascent)145     void setAscent(int ascent)
146     {
147         m_ascent = ascent;
148     }
149 
150     // When set to true, this class is used only to look up glyph
151     // indices for a range of Unicode characters without glyph placement.
152     // By default, it's false. This should be set to true when this
153     // class is used for glyph index look-up for non-BMP characters
154     // in GlyphPageNodeChromiumWin.cpp.
setDisableFontFallback(bool disableFontFallback)155     void setDisableFontFallback(bool disableFontFallback)
156     {
157         m_disableFontFallback = true;
158     }
159 
160     // You must call this after setting any options but before doing any
161     // other calls like asking for widths or drawing.
init()162     void init()
163     {
164         initWithOptionalLengthProtection(true);
165     }
166 
167     // Returns the total width in pixels of the text run.
168     int width() const;
169 
170     // Call to justify the text, with the amount of space that should be ADDED
171     // to get the desired width that the column should be justified to.
172     // Normally, spaces are inserted, but for Arabic there will be kashidas
173     // (extra strokes) inserted instead.
174     //
175     // This function MUST be called AFTER Init().
176     void justify(int additionalSpace);
177 
178     // Computes the given character offset into a pixel offset of the beginning
179     // of that character.
180     int characterToX(int offset) const;
181 
182     // Converts the given pixel X position into a logical character offset into
183     // the run. For positions appearing before the first character, this will
184     // return -1.
185     int xToCharacter(int x) const;
186 
187     // Draws the given characters to (x, y) in the given DC. The font will be
188     // handled by this function, but the font color and other attributes should
189     // be pre-set.
190     //
191     // The y position is the upper left corner, NOT the baseline.
192     void draw(GraphicsContext* graphicsContext, HDC dc, int x, int y, int from,
193               int to);
194 
195     // Returns the first glyph assigned to the character at the given offset.
196     // This function is used to retrieve glyph information when Uniscribe is
197     // being used to generate glyphs for non-complex, non-BMP (above U+FFFF)
198     // characters. These characters are not otherwise special and have no
199     // complex shaping rules, so we don't otherwise need Uniscribe, except
200     // Uniscribe is the only way to get glyphs for non-BMP characters.
201     //
202     // Returns 0 if there is no glyph for the given character.
203     WORD firstGlyphForCharacter(int charOffset) const;
204 
205 protected:
206     // Backend for init. The flag allows the unit test to specify whether we
207     // should fail early for very long strings like normal, or try to pass the
208     // long string to Uniscribe. The latter provides a way to force failure of
209     // shaping.
210     void initWithOptionalLengthProtection(bool lengthProtection);
211 
212     // Tries to preload the font when the it is not accessible.
213     // This is the default implementation and it does not do anything.
tryToPreloadFont(HFONT)214     virtual void tryToPreloadFont(HFONT) {}
215 
216 private:
217     friend class UniscribeTest_TooBig_Test;
218 
219     // An array corresponding to each item in runs_ containing information
220     // on each of the glyphs that were generated. Like runs_, this is in
221     // reading order. However, for rtl text, the characters within each
222     // item will be reversed.
223     struct Shaping {
ShapingShaping224         Shaping()
225             : m_prePadding(0)
226             , m_hfont(NULL)
227             , m_scriptCache(NULL)
228             , m_ascentOffset(0) {
229             m_abc.abcA = 0;
230             m_abc.abcB = 0;
231             m_abc.abcC = 0;
232         }
233 
234         // Returns the number of glyphs (which will be drawn to the screen)
235         // in this run.
glyphLengthShaping236         int glyphLength() const
237         {
238             return static_cast<int>(m_glyphs.size());
239         }
240 
241         // Returns the number of characters (that we started with) in this run.
charLengthShaping242         int charLength() const
243         {
244             return static_cast<int>(m_logs.size());
245         }
246 
247         // Returns the advance array that should be used when measuring glyphs.
248         // The returned pointer will indicate an array with glyph_length()
249         // elements and the advance that should be used for each one. This is
250         // either the real advance, or the justified advances if there is one,
251         // and is the array we want to use for measurement.
effectiveAdvancesShaping252         const int* effectiveAdvances() const
253         {
254             if (m_advance.size() == 0)
255                 return 0;
256             if (m_justify.size() == 0)
257                 return &m_advance[0];
258             return &m_justify[0];
259         }
260 
261         // This is the advance amount of space that we have added to the
262         // beginning of the run. It is like the ABC's |A| advance but one that
263         // we create and must handle internally whenever computing with pixel
264         // offsets.
265         int m_prePadding;
266 
267         // Glyph indices in the font used to display this item. These indices
268         // are in screen order.
269         Vector<WORD, UNISCRIBE_HELPER_STACK_CHARS> m_glyphs;
270 
271         // For each input character, this tells us the first glyph index it
272         // generated. This is the only array with size of the input chars.
273         //
274         // All offsets are from the beginning of this run. Multiple characters
275         // can generate one glyph, in which case there will be adjacent
276         // duplicates in this list. One character can also generate multiple
277         // glyphs, in which case there will be skipped indices in this list.
278         Vector<WORD, UNISCRIBE_HELPER_STACK_CHARS> m_logs;
279 
280         // Flags and such for each glyph.
281         Vector<SCRIPT_VISATTR, UNISCRIBE_HELPER_STACK_CHARS> m_visualAttributes;
282 
283         // Horizontal advances for each glyph listed above, this is basically
284         // how wide each glyph is.
285         Vector<int, UNISCRIBE_HELPER_STACK_CHARS> m_advance;
286 
287         // This contains glyph offsets, from the nominal position of a glyph.
288         // It is used to adjust the positions of multiple combining characters
289         // around/above/below base characters in a context-sensitive manner so
290         // that they don't bump against each other and the base character.
291         Vector<GOFFSET, UNISCRIBE_HELPER_STACK_CHARS> m_offsets;
292 
293         // Filled by a call to Justify, this is empty for nonjustified text.
294         // If nonempty, this contains the array of justify characters for each
295         // character as returned by ScriptJustify.
296         //
297         // This is the same as the advance array, but with extra space added
298         // for some characters. The difference between a glyph's |justify|
299         // width and it's |advance| width is the extra space added.
300         Vector<int, UNISCRIBE_HELPER_STACK_CHARS> m_justify;
301 
302         // Sizing information for this run. This treats the entire run as a
303         // character with a preceeding advance, width, and ending advance.  The
304         // B width is the sum of the |advance| array, and the A and C widths
305         // are any extra spacing applied to each end.
306         //
307         // It is unclear from the documentation what this actually means. From
308         // experimentation, it seems that the sum of the character advances is
309         // always the sum of the ABC values, and I'm not sure what you're
310         // supposed to do with the ABC values.
311         ABC m_abc;
312 
313         // Pointers to windows font data used to render this run.
314         HFONT m_hfont;
315         SCRIPT_CACHE* m_scriptCache;
316 
317         // Ascent offset between the ascent of the primary font
318         // and that of the fallback font. The offset needs to be applied,
319         // when drawing a string, to align multiple runs rendered with
320         // different fonts.
321         int m_ascentOffset;
322     };
323 
324     // Computes the runs_ array from the text run.
325     void fillRuns();
326 
327     // Computes the shapes_ array given an runs_ array already filled in.
328     void fillShapes();
329 
330     // Fills in the screen_order_ array (see below).
331     void fillScreenOrder();
332 
333     // Called to update the glyph positions based on the current spacing
334     // options that are set.
335     void applySpacing();
336 
337     // Normalizes all advances for spaces to the same width. This keeps windows
338     // from making spaces after Hindi characters larger, which is then
339     // inconsistent with our meaure of the width since WebKit doesn't include
340     // spaces in text-runs sent to uniscribe unless white-space:pre.
341     void adjustSpaceAdvances();
342 
343     // Returns the total width of a single item.
344     int advanceForItem(int) const;
345 
346     // Shapes a run (pointed to by |input|) using |hfont| first.
347     // Tries a series of fonts specified retrieved with NextWinFontData
348     // and finally a font covering characters in |*input|. A string pointed
349     // by |input| comes from ScriptItemize and is supposed to contain
350     // characters belonging to a single script aside from characters common to
351     // all scripts (e.g. space).
352     bool shape(const UChar* input, int itemLength, int numGlyphs, SCRIPT_ITEM& run, Shaping&);
353 
354     // Gets Windows font data for the next best font to try in the list
355     // of fonts. When there's no more font available, returns false
356     // without touching any of out params. Need to call ResetFontIndex
357     // to start scanning of the font list from the beginning.
nextWinFontData(HFONT *,SCRIPT_CACHE **,SCRIPT_FONTPROPERTIES **,int * ascent)358     virtual bool nextWinFontData(HFONT*, SCRIPT_CACHE**, SCRIPT_FONTPROPERTIES**, int* ascent)
359     {
360         return false;
361     }
362 
363     // Resets the font index to the first in the list of fonts to try after the
364     // primaryFont turns out not to work. With fontIndex reset,
365     // NextWinFontData scans fallback fonts from the beginning.
resetFontIndex()366     virtual void resetFontIndex() {}
367 
368     // The input data for this run of Uniscribe. See the constructor.
369     const UChar* m_input;
370     const int m_inputLength;
371     const bool m_isRtl;
372 
373     // Windows font data for the primary font. In a sense, m_logfont and m_style
374     // are redundant because m_hfont contains all the information. However,
375     // invoking GetObject, everytime we need the height and the style, is rather
376     // expensive so that we cache them. Would it be better to add getter and
377     // (virtual) setter for the height and the style of the primary font,
378     // instead of m_logfont? Then, a derived class ctor can set m_ascent,
379     // m_height and m_style if they're known. Getters for them would have to
380     // 'infer' their values from m_hfont ONLY when they're not set.
381     HFONT m_hfont;
382     SCRIPT_CACHE* m_scriptCache;
383     SCRIPT_FONTPROPERTIES* m_fontProperties;
384     int m_ascent;
385     LOGFONT m_logfont;
386     int m_style;
387 
388     // Options, see the getters/setters above.
389     bool m_directionalOverride;
390     bool m_inhibitLigate;
391     int m_letterSpacing;
392     int m_spaceWidth;
393     int m_wordSpacing;
394     bool m_disableFontFallback;
395 
396     // Uniscribe breaks the text into Runs. These are one length of text that is
397     // in one script and one direction. This array is in reading order.
398     Vector<SCRIPT_ITEM, UNISCRIBE_HELPER_STACK_RUNS> m_runs;
399 
400     Vector<Shaping, UNISCRIBE_HELPER_STACK_RUNS> m_shapes;
401 
402     // This is a mapping between reading order and screen order for the items.
403     // Uniscribe's items array are in reading order. For right-to-left text,
404     // or mixed (although WebKit's |TextRun| should really be only one
405     // direction), this makes it very difficult to compute character offsets
406     // and positions. This list is in screen order from left to right, and
407     // gives the index into the |m_runs| and |m_shapes| arrays of each
408     // subsequent item.
409     Vector<int, UNISCRIBE_HELPER_STACK_RUNS> m_screenOrder;
410 };
411 
412 }  // namespace WebCore
413 
414 #endif  // UniscribeHelper_h
415