1 /* 2 * Copyright (c) 2006, 2007, 2008, 2009, Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 // A wrapper around Uniscribe that provides a reasonable API. 32 33 #ifndef UniscribeHelper_h 34 #define UniscribeHelper_h 35 36 #include <windows.h> 37 #include <usp10.h> 38 #include <map> 39 40 #include <unicode/uchar.h> 41 #include <wtf/Vector.h> 42 43 class UniscribeTest_TooBig_Test; // A gunit test for UniscribeHelper. 44 45 namespace WebCore { 46 47 class GraphicsContext; 48 49 #define UNISCRIBE_HELPER_STACK_RUNS 8 50 #define UNISCRIBE_HELPER_STACK_CHARS 32 51 52 // This object should be safe to create & destroy frequently, as long as the 53 // caller preserves the script_cache when possible (this data may be slow to 54 // compute). 55 // 56 // This object is "kind of large" (~1K) because it reserves a lot of space for 57 // working with to avoid expensive heap operations. Therefore, not only should 58 // you not worry about creating and destroying it, you should try to not keep 59 // them around. 60 class UniscribeHelper { 61 public: 62 // Initializes this Uniscribe run with the text pointed to by |run| with 63 // |length|. The input is NOT null terminated. 64 // 65 // The is_rtl flag should be set if the input script is RTL. It is assumed 66 // that the caller has already divided up the input text (using ICU, for 67 // example) into runs of the same direction of script. This avoids 68 // disagreements between the caller and Uniscribe later (see FillItems). 69 // 70 // A script cache should be provided by the caller that is initialized to 71 // NULL. When the caller is done with the cache (it may be stored between 72 // runs as long as it is used consistently with the same HFONT), it should 73 // call ScriptFreeCache(). 74 UniscribeHelper(const UChar* input, 75 int inputLength, 76 bool isRtl, 77 HFONT, 78 SCRIPT_CACHE*, 79 SCRIPT_FONTPROPERTIES*, 80 WORD); 81 82 virtual ~UniscribeHelper(); 83 84 // Sets Uniscribe's directional override flag. False by default. directionalOverride()85 bool directionalOverride() const 86 { 87 return m_directionalOverride; 88 } setDirectionalOverride(bool override)89 void setDirectionalOverride(bool override) 90 { 91 m_directionalOverride = override; 92 } 93 94 // Set's Uniscribe's no-ligate override flag. False by default. inhibitLigate()95 bool inhibitLigate() const 96 { 97 return m_inhibitLigate; 98 } setInhibitLigate(bool inhibit)99 void setInhibitLigate(bool inhibit) 100 { 101 m_inhibitLigate = inhibit; 102 } 103 104 // Set letter spacing. We will try to insert this much space between 105 // graphemes (one or more glyphs perceived as a single unit by ordinary 106 // users of a script). Positive values increase letter spacing, negative 107 // values decrease it. 0 by default. letterSpacing()108 int letterSpacing() const 109 { 110 return m_letterSpacing; 111 } setLetterSpacing(int letterSpacing)112 void setLetterSpacing(int letterSpacing) 113 { 114 m_letterSpacing = letterSpacing; 115 } 116 117 // Set the width of a standard space character. We use this to normalize 118 // space widths. Windows will make spaces after Hindi characters larger than 119 // other spaces. A space_width of 0 means to use the default space width. 120 // 121 // Must be set before Init() is called. spaceWidth()122 int spaceWidth() const 123 { 124 return m_spaceWidth; 125 } setSpaceWidth(int spaceWidth)126 void setSpaceWidth(int spaceWidth) 127 { 128 m_spaceWidth = spaceWidth; 129 } 130 131 // Set word spacing. We will try to insert this much extra space between 132 // each word in the input (beyond whatever whitespace character separates 133 // words). Positive values lead to increased letter spacing, negative values 134 // decrease it. 0 by default. 135 // 136 // Must be set before Init() is called. wordSpacing()137 int wordSpacing() const 138 { 139 return m_wordSpacing; 140 } setWordSpacing(int wordSpacing)141 void setWordSpacing(int wordSpacing) 142 { 143 m_wordSpacing = wordSpacing; 144 } 145 setAscent(int ascent)146 void setAscent(int ascent) 147 { 148 m_ascent = ascent; 149 } 150 151 // When set to true, this class is used only to look up glyph 152 // indices for a range of Unicode characters without glyph placement. 153 // By default, it's false. This should be set to true when this 154 // class is used for glyph index look-up for non-BMP characters 155 // in GlyphPageNodeChromiumWin.cpp. setDisableFontFallback(bool disableFontFallback)156 void setDisableFontFallback(bool disableFontFallback) 157 { 158 m_disableFontFallback = true; 159 } 160 161 // You must call this after setting any options but before doing any 162 // other calls like asking for widths or drawing. init()163 void init() 164 { 165 initWithOptionalLengthProtection(true); 166 } 167 168 // Returns the total width in pixels of the text run. 169 int width() const; 170 171 // Call to justify the text, with the amount of space that should be ADDED 172 // to get the desired width that the column should be justified to. 173 // Normally, spaces are inserted, but for Arabic there will be kashidas 174 // (extra strokes) inserted instead. 175 // 176 // This function MUST be called AFTER Init(). 177 void justify(int additionalSpace); 178 179 // Computes the given character offset into a pixel offset of the beginning 180 // of that character. 181 int characterToX(int offset) const; 182 183 // Converts the given pixel X position into a logical character offset into 184 // the run. For positions appearing before the first character, this will 185 // return -1. 186 int xToCharacter(int x) const; 187 188 // Draws the given characters to (x, y) in the given DC. The font will be 189 // handled by this function, but the font color and other attributes should 190 // be pre-set. 191 // 192 // The y position is the upper left corner, NOT the baseline. 193 void draw(GraphicsContext* graphicsContext, HDC dc, int x, int y, int from, 194 int to); 195 196 // Returns the first glyph assigned to the character at the given offset. 197 // This function is used to retrieve glyph information when Uniscribe is 198 // being used to generate glyphs for non-complex, non-BMP (above U+FFFF) 199 // characters. These characters are not otherwise special and have no 200 // complex shaping rules, so we don't otherwise need Uniscribe, except 201 // Uniscribe is the only way to get glyphs for non-BMP characters. 202 // 203 // Returns 0 if there is no glyph for the given character. 204 WORD firstGlyphForCharacter(int charOffset) const; 205 206 protected: 207 // Backend for init. The flag allows the unit test to specify whether we 208 // should fail early for very long strings like normal, or try to pass the 209 // long string to Uniscribe. The latter provides a way to force failure of 210 // shaping. 211 void initWithOptionalLengthProtection(bool lengthProtection); 212 213 // Tries to preload the font when the it is not accessible. 214 // This is the default implementation and it does not do anything. tryToPreloadFont(HFONT)215 virtual void tryToPreloadFont(HFONT) {} 216 217 private: 218 friend class UniscribeTest_TooBig_Test; 219 220 // An array corresponding to each item in runs_ containing information 221 // on each of the glyphs that were generated. Like runs_, this is in 222 // reading order. However, for rtl text, the characters within each 223 // item will be reversed. 224 struct Shaping { ShapingShaping225 Shaping() 226 : m_prePadding(0) 227 , m_hfont(NULL) 228 , m_scriptCache(NULL) 229 , m_ascentOffset(0) 230 , m_spaceGlyph(0) 231 { 232 m_abc.abcA = 0; 233 m_abc.abcB = 0; 234 m_abc.abcC = 0; 235 } 236 237 // Returns the number of glyphs (which will be drawn to the screen) 238 // in this run. glyphLengthShaping239 int glyphLength() const 240 { 241 return static_cast<int>(m_glyphs.size()); 242 } 243 244 // Returns the number of characters (that we started with) in this run. charLengthShaping245 int charLength() const 246 { 247 return static_cast<int>(m_logs.size()); 248 } 249 250 // Returns the advance array that should be used when measuring glyphs. 251 // The returned pointer will indicate an array with glyph_length() 252 // elements and the advance that should be used for each one. This is 253 // either the real advance, or the justified advances if there is one, 254 // and is the array we want to use for measurement. effectiveAdvancesShaping255 const int* effectiveAdvances() const 256 { 257 if (m_advance.size() == 0) 258 return 0; 259 if (m_justify.size() == 0) 260 return &m_advance[0]; 261 return &m_justify[0]; 262 } 263 264 // This is the advance amount of space that we have added to the 265 // beginning of the run. It is like the ABC's |A| advance but one that 266 // we create and must handle internally whenever computing with pixel 267 // offsets. 268 int m_prePadding; 269 270 // Glyph indices in the font used to display this item. These indices 271 // are in screen order. 272 Vector<WORD, UNISCRIBE_HELPER_STACK_CHARS> m_glyphs; 273 274 // For each input character, this tells us the first glyph index it 275 // generated. This is the only array with size of the input chars. 276 // 277 // All offsets are from the beginning of this run. Multiple characters 278 // can generate one glyph, in which case there will be adjacent 279 // duplicates in this list. One character can also generate multiple 280 // glyphs, in which case there will be skipped indices in this list. 281 Vector<WORD, UNISCRIBE_HELPER_STACK_CHARS> m_logs; 282 283 // Flags and such for each glyph. 284 Vector<SCRIPT_VISATTR, UNISCRIBE_HELPER_STACK_CHARS> m_visualAttributes; 285 286 // Horizontal advances for each glyph listed above, this is basically 287 // how wide each glyph is. 288 Vector<int, UNISCRIBE_HELPER_STACK_CHARS> m_advance; 289 290 // This contains glyph offsets, from the nominal position of a glyph. 291 // It is used to adjust the positions of multiple combining characters 292 // around/above/below base characters in a context-sensitive manner so 293 // that they don't bump against each other and the base character. 294 Vector<GOFFSET, UNISCRIBE_HELPER_STACK_CHARS> m_offsets; 295 296 // Filled by a call to Justify, this is empty for nonjustified text. 297 // If nonempty, this contains the array of justify characters for each 298 // character as returned by ScriptJustify. 299 // 300 // This is the same as the advance array, but with extra space added 301 // for some characters. The difference between a glyph's |justify| 302 // width and it's |advance| width is the extra space added. 303 Vector<int, UNISCRIBE_HELPER_STACK_CHARS> m_justify; 304 305 // Sizing information for this run. This treats the entire run as a 306 // character with a preceeding advance, width, and ending advance. The 307 // B width is the sum of the |advance| array, and the A and C widths 308 // are any extra spacing applied to each end. 309 // 310 // It is unclear from the documentation what this actually means. From 311 // experimentation, it seems that the sum of the character advances is 312 // always the sum of the ABC values, and I'm not sure what you're 313 // supposed to do with the ABC values. 314 ABC m_abc; 315 316 // Pointers to windows font data used to render this run. 317 HFONT m_hfont; 318 SCRIPT_CACHE* m_scriptCache; 319 320 // Ascent offset between the ascent of the primary font 321 // and that of the fallback font. The offset needs to be applied, 322 // when drawing a string, to align multiple runs rendered with 323 // different fonts. 324 int m_ascentOffset; 325 326 WORD m_spaceGlyph; 327 }; 328 329 // Computes the runs_ array from the text run. 330 void fillRuns(); 331 332 // Computes the shapes_ array given an runs_ array already filled in. 333 void fillShapes(); 334 335 // Fills in the screen_order_ array (see below). 336 void fillScreenOrder(); 337 338 // Called to update the glyph positions based on the current spacing 339 // options that are set. 340 void applySpacing(); 341 342 // Normalizes all advances for spaces to the same width. This keeps windows 343 // from making spaces after Hindi characters larger, which is then 344 // inconsistent with our meaure of the width since WebKit doesn't include 345 // spaces in text-runs sent to uniscribe unless white-space:pre. 346 void adjustSpaceAdvances(); 347 348 // Returns the total width of a single item. 349 int advanceForItem(int) const; 350 351 bool containsMissingGlyphs(const Shaping&, 352 const SCRIPT_ITEM&, 353 const SCRIPT_FONTPROPERTIES*) const; 354 355 // Shapes a run (pointed to by |input|) using |hfont| first. 356 // Tries a series of fonts specified retrieved with NextWinFontData 357 // and finally a font covering characters in |*input|. A string pointed 358 // by |input| comes from ScriptItemize and is supposed to contain 359 // characters belonging to a single script aside from characters common to 360 // all scripts (e.g. space). 361 bool shape(const UChar* input, int itemLength, int numGlyphs, SCRIPT_ITEM& run, Shaping&); 362 363 // Gets Windows font data for the next best font to try in the list 364 // of fonts. When there's no more font available, returns false 365 // without touching any of out params. Need to call ResetFontIndex 366 // to start scanning of the font list from the beginning. nextWinFontData(HFONT *,SCRIPT_CACHE **,SCRIPT_FONTPROPERTIES **,int * ascent)367 virtual bool nextWinFontData(HFONT*, SCRIPT_CACHE**, SCRIPT_FONTPROPERTIES**, int* ascent) 368 { 369 return false; 370 } 371 372 // Resets the font index to the first in the list of fonts to try after the 373 // primaryFont turns out not to work. With fontIndex reset, 374 // NextWinFontData scans fallback fonts from the beginning. resetFontIndex()375 virtual void resetFontIndex() {} 376 377 // The input data for this run of Uniscribe. See the constructor. 378 const UChar* m_input; 379 const int m_inputLength; 380 const bool m_isRtl; 381 382 // Windows font data for the primary font. In a sense, m_logfont and m_style 383 // are redundant because m_hfont contains all the information. However, 384 // invoking GetObject, everytime we need the height and the style, is rather 385 // expensive so that we cache them. Would it be better to add getter and 386 // (virtual) setter for the height and the style of the primary font, 387 // instead of m_logfont? Then, a derived class ctor can set m_ascent, 388 // m_height and m_style if they're known. Getters for them would have to 389 // 'infer' their values from m_hfont ONLY when they're not set. 390 HFONT m_hfont; 391 SCRIPT_CACHE* m_scriptCache; 392 SCRIPT_FONTPROPERTIES* m_fontProperties; 393 int m_ascent; 394 LOGFONT m_logfont; 395 int m_style; 396 WORD m_spaceGlyph; 397 398 // Options, see the getters/setters above. 399 bool m_directionalOverride; 400 bool m_inhibitLigate; 401 int m_letterSpacing; 402 int m_spaceWidth; 403 int m_wordSpacing; 404 bool m_disableFontFallback; 405 406 // Uniscribe breaks the text into Runs. These are one length of text that is 407 // in one script and one direction. This array is in reading order. 408 Vector<SCRIPT_ITEM, UNISCRIBE_HELPER_STACK_RUNS> m_runs; 409 410 Vector<Shaping, UNISCRIBE_HELPER_STACK_RUNS> m_shapes; 411 412 // This is a mapping between reading order and screen order for the items. 413 // Uniscribe's items array are in reading order. For right-to-left text, 414 // or mixed (although WebKit's |TextRun| should really be only one 415 // direction), this makes it very difficult to compute character offsets 416 // and positions. This list is in screen order from left to right, and 417 // gives the index into the |m_runs| and |m_shapes| arrays of each 418 // subsequent item. 419 Vector<int, UNISCRIBE_HELPER_STACK_RUNS> m_screenOrder; 420 }; 421 422 } // namespace WebCore 423 424 #endif // UniscribeHelper_h 425