1 /*
2 * Copyright (c) 2010 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "config.h"
32 #include "ComplexTextControllerLinux.h"
33
34 #include "Font.h"
35 #include "TextRun.h"
36
37 #include <unicode/normlzr.h>
38
39 namespace WebCore {
40
41 // Harfbuzz uses 26.6 fixed point values for pixel offsets. However, we don't
42 // handle subpixel positioning so this function is used to truncate Harfbuzz
43 // values to a number of pixels.
truncateFixedPointToInteger(HB_Fixed value)44 static int truncateFixedPointToInteger(HB_Fixed value)
45 {
46 return value >> 6;
47 }
48
ComplexTextController(const TextRun & run,unsigned startingX,const Font * font)49 ComplexTextController::ComplexTextController(const TextRun& run, unsigned startingX, const Font* font)
50 : m_font(font)
51 , m_run(getNormalizedTextRun(run, m_normalizedRun, m_normalizedBuffer))
52 , m_wordSpacingAdjustment(0)
53 , m_padding(0)
54 , m_padPerWordBreak(0)
55 , m_padError(0)
56 , m_letterSpacing(0)
57 {
58 // Do not use |run| inside this constructor. Use |m_run| instead.
59
60 memset(&m_item, 0, sizeof(m_item));
61 // We cannot know, ahead of time, how many glyphs a given script run
62 // will produce. We take a guess that script runs will not produce more
63 // than twice as many glyphs as there are code points plus a bit of
64 // padding and fallback if we find that we are wrong.
65 createGlyphArrays((m_run.length() + 2) * 2);
66
67 m_item.log_clusters = new unsigned short[m_run.length()];
68
69 m_item.face = 0;
70 m_item.font = allocHarfbuzzFont();
71
72 m_item.item.bidiLevel = m_run.rtl();
73
74 m_item.string = m_run.characters();
75 m_item.stringLength = m_run.length();
76
77 reset(startingX);
78 }
79
~ComplexTextController()80 ComplexTextController::~ComplexTextController()
81 {
82 fastFree(m_item.font);
83 deleteGlyphArrays();
84 delete[] m_item.log_clusters;
85 }
86
isWordBreak(unsigned index)87 bool ComplexTextController::isWordBreak(unsigned index)
88 {
89 return index && isCodepointSpace(m_item.string[index]) && !isCodepointSpace(m_item.string[index - 1]);
90 }
91
determineWordBreakSpacing(unsigned logClustersIndex)92 int ComplexTextController::determineWordBreakSpacing(unsigned logClustersIndex)
93 {
94 int wordBreakSpacing = 0;
95 // The first half of the conjunction works around the case where
96 // output glyphs aren't associated with any codepoints by the
97 // clusters log.
98 if (logClustersIndex < m_item.item.length
99 && isWordBreak(m_item.item.pos + logClustersIndex)) {
100 wordBreakSpacing = m_wordSpacingAdjustment;
101
102 if (m_padding > 0) {
103 int toPad = roundf(m_padPerWordBreak + m_padError);
104 m_padError += m_padPerWordBreak - toPad;
105
106 if (m_padding < toPad)
107 toPad = m_padding;
108 m_padding -= toPad;
109 wordBreakSpacing += toPad;
110 }
111 }
112 return wordBreakSpacing;
113 }
114
115 // setPadding sets a number of pixels to be distributed across the TextRun.
116 // WebKit uses this to justify text.
setPadding(int padding)117 void ComplexTextController::setPadding(int padding)
118 {
119 m_padding = padding;
120 if (!m_padding)
121 return;
122
123 // If we have padding to distribute, then we try to give an equal
124 // amount to each space. The last space gets the smaller amount, if
125 // any.
126 unsigned numWordBreaks = 0;
127
128 for (unsigned i = 0; i < m_item.stringLength; i++) {
129 if (isWordBreak(i))
130 numWordBreaks++;
131 }
132
133 if (numWordBreaks)
134 m_padPerWordBreak = m_padding / numWordBreaks;
135 else
136 m_padPerWordBreak = 0;
137 }
138
reset(unsigned offset)139 void ComplexTextController::reset(unsigned offset)
140 {
141 m_indexOfNextScriptRun = 0;
142 m_offsetX = offset;
143 }
144
145 // Advance to the next script run, returning false when the end of the
146 // TextRun has been reached.
nextScriptRun()147 bool ComplexTextController::nextScriptRun()
148 {
149 // Ensure we're not pointing at the small caps buffer.
150 m_item.string = m_run.characters();
151
152 if (!hb_utf16_script_run_next(0, &m_item.item, m_run.characters(), m_run.length(), &m_indexOfNextScriptRun))
153 return false;
154
155 // It is actually wrong to consider script runs at all in this code.
156 // Other WebKit code (e.g. Mac) segments complex text just by finding
157 // the longest span of text covered by a single font.
158 // But we currently need to call hb_utf16_script_run_next anyway to fill
159 // in the harfbuzz data structures to e.g. pick the correct script's shaper.
160 // So we allow that to run first, then do a second pass over the range it
161 // found and take the largest subregion that stays within a single font.
162 m_currentFontData = m_font->glyphDataForCharacter(m_item.string[m_item.item.pos], false).fontData;
163 unsigned endOfRun;
164 for (endOfRun = 1; endOfRun < m_item.item.length; ++endOfRun) {
165 const SimpleFontData* nextFontData = m_font->glyphDataForCharacter(m_item.string[m_item.item.pos + endOfRun], false).fontData;
166 if (nextFontData != m_currentFontData)
167 break;
168 }
169 m_item.item.length = endOfRun;
170 m_indexOfNextScriptRun = m_item.item.pos + endOfRun;
171
172 setupFontForScriptRun();
173 shapeGlyphs();
174 setGlyphXPositions(rtl());
175
176 return true;
177 }
178
widthOfFullRun()179 float ComplexTextController::widthOfFullRun()
180 {
181 float widthSum = 0;
182 while (nextScriptRun())
183 widthSum += width();
184
185 return widthSum;
186 }
187
setupFontForScriptRun()188 void ComplexTextController::setupFontForScriptRun()
189 {
190 FontDataVariant fontDataVariant = AutoVariant;
191 // Determine if this script run needs to be converted to small caps.
192 // nextScriptRun() will always send us a run of the same case, because a
193 // case change while in small-caps mode always results in different
194 // FontData, so we only need to check the first character's case.
195 if (m_font->isSmallCaps() && u_islower(m_item.string[m_item.item.pos])) {
196 m_smallCapsString = String(m_run.data(m_item.item.pos), m_item.item.length);
197 m_smallCapsString.makeUpper();
198 m_item.string = m_smallCapsString.characters();
199 m_item.item.pos = 0;
200 fontDataVariant = SmallCapsVariant;
201 }
202 const FontData* fontData = m_font->glyphDataForCharacter(m_item.string[m_item.item.pos], false, fontDataVariant).fontData;
203 const FontPlatformData& platformData = fontData->fontDataForCharacter(' ')->platformData();
204 m_item.face = platformData.harfbuzzFace();
205 void* opaquePlatformData = const_cast<FontPlatformData*>(&platformData);
206 m_item.font->userData = opaquePlatformData;
207
208 int size = platformData.size();
209 m_item.font->x_ppem = size;
210 m_item.font->y_ppem = size;
211 // x_ and y_scale are the conversion factors from font design space (fEmSize) to 1/64th of device pixels in 16.16 format.
212 const int devicePixelFraction = 64;
213 const int multiplyFor16Dot16 = 1 << 16;
214 int scale = devicePixelFraction * size * multiplyFor16Dot16 / platformData.emSizeInFontUnits();
215 m_item.font->x_scale = scale;
216 m_item.font->y_scale = scale;
217 }
218
allocHarfbuzzFont()219 HB_FontRec* ComplexTextController::allocHarfbuzzFont()
220 {
221 HB_FontRec* font = reinterpret_cast<HB_FontRec*>(fastMalloc(sizeof(HB_FontRec)));
222 memset(font, 0, sizeof(HB_FontRec));
223 font->klass = &harfbuzzSkiaClass;
224 font->userData = 0;
225
226 return font;
227 }
228
deleteGlyphArrays()229 void ComplexTextController::deleteGlyphArrays()
230 {
231 delete[] m_item.glyphs;
232 delete[] m_item.attributes;
233 delete[] m_item.advances;
234 delete[] m_item.offsets;
235 delete[] m_glyphs16;
236 delete[] m_xPositions;
237 }
238
createGlyphArrays(int size)239 void ComplexTextController::createGlyphArrays(int size)
240 {
241 m_item.glyphs = new HB_Glyph[size];
242 m_item.attributes = new HB_GlyphAttributes[size];
243 m_item.advances = new HB_Fixed[size];
244 m_item.offsets = new HB_FixedPoint[size];
245
246 m_glyphs16 = new uint16_t[size];
247 m_xPositions = new SkScalar[size];
248
249 m_item.num_glyphs = size;
250 m_glyphsArrayCapacity = size; // Save the GlyphArrays size.
251 resetGlyphArrays();
252 }
253
resetGlyphArrays()254 void ComplexTextController::resetGlyphArrays()
255 {
256 int size = m_item.num_glyphs;
257 // All the types here don't have pointers. It is safe to reset to
258 // zero unless Harfbuzz breaks the compatibility in the future.
259 memset(m_item.glyphs, 0, size * sizeof(HB_Glyph));
260 memset(m_item.attributes, 0, size * sizeof(HB_GlyphAttributes));
261 memset(m_item.advances, 0, size * sizeof(HB_Fixed));
262 memset(m_item.offsets, 0, size * sizeof(HB_FixedPoint));
263 memset(m_glyphs16, 0, size * sizeof(uint16_t));
264 memset(m_xPositions, 0, size * sizeof(SkScalar));
265 }
266
shapeGlyphs()267 void ComplexTextController::shapeGlyphs()
268 {
269 // HB_ShapeItem() resets m_item.num_glyphs. If the previous call to
270 // HB_ShapeItem() used less space than was available, the capacity of
271 // the array may be larger than the current value of m_item.num_glyphs.
272 // So, we need to reset the num_glyphs to the capacity of the array.
273 m_item.num_glyphs = m_glyphsArrayCapacity;
274 resetGlyphArrays();
275 while (!HB_ShapeItem(&m_item)) {
276 // We overflowed our arrays. Resize and retry.
277 // HB_ShapeItem fills in m_item.num_glyphs with the needed size.
278 deleteGlyphArrays();
279 // The |+ 1| here is a workaround for a bug in Harfbuzz: the Khmer
280 // shaper (at least) can fail because of insufficient glyph buffers
281 // and request 0 additional glyphs: throwing us into an infinite
282 // loop.
283 createGlyphArrays(m_item.num_glyphs + 1);
284 }
285 }
286
setGlyphXPositions(bool isRTL)287 void ComplexTextController::setGlyphXPositions(bool isRTL)
288 {
289 const double rtlFlip = isRTL ? -1 : 1;
290 double position = 0;
291
292 // logClustersIndex indexes logClusters for the first codepoint of the current glyph.
293 // Each time we advance a glyph, we skip over all the codepoints that contributed to the current glyph.
294 int logClustersIndex = 0;
295
296 // Iterate through the glyphs in logical order, flipping for RTL where necessary.
297 // Glyphs are positioned starting from m_offsetX; in RTL mode they go leftwards from there.
298 for (size_t i = 0; i < m_item.num_glyphs; ++i) {
299 while (static_cast<unsigned>(logClustersIndex) < m_item.item.length && logClusters()[logClustersIndex] < i)
300 logClustersIndex++;
301
302 // If the current glyph is just after a space, add in the word spacing.
303 position += determineWordBreakSpacing(logClustersIndex);
304
305 m_glyphs16[i] = m_item.glyphs[i];
306 double offsetX = truncateFixedPointToInteger(m_item.offsets[i].x);
307 double advance = truncateFixedPointToInteger(m_item.advances[i]);
308 if (isRTL)
309 offsetX -= advance;
310
311 m_xPositions[i] = m_offsetX + (position * rtlFlip) + offsetX;
312
313 if (m_currentFontData->isZeroWidthSpaceGlyph(m_glyphs16[i]))
314 continue;
315
316 // At the end of each cluster, add in the letter spacing.
317 if (i + 1 == m_item.num_glyphs || m_item.attributes[i + 1].clusterStart)
318 position += m_letterSpacing;
319
320 position += advance;
321 }
322 m_pixelWidth = std::max(position, 0.0);
323 m_offsetX += m_pixelWidth * rtlFlip;
324 }
325
normalizeSpacesAndMirrorChars(const UChar * source,bool rtl,UChar * destination,int length)326 void ComplexTextController::normalizeSpacesAndMirrorChars(const UChar* source, bool rtl, UChar* destination, int length)
327 {
328 int position = 0;
329 bool error = false;
330 // Iterate characters in source and mirror character if needed.
331 while (position < length) {
332 UChar32 character;
333 int nextPosition = position;
334 U16_NEXT(source, nextPosition, length, character);
335 if (Font::treatAsSpace(character))
336 character = ' ';
337 else if (Font::treatAsZeroWidthSpace(character))
338 character = zeroWidthSpace;
339 else if (rtl)
340 character = u_charMirror(character);
341 U16_APPEND(destination, position, length, character, error);
342 ASSERT(!error);
343 position = nextPosition;
344 }
345 }
346
getNormalizedTextRun(const TextRun & originalRun,OwnPtr<TextRun> & normalizedRun,OwnArrayPtr<UChar> & normalizedBuffer)347 const TextRun& ComplexTextController::getNormalizedTextRun(const TextRun& originalRun, OwnPtr<TextRun>& normalizedRun, OwnArrayPtr<UChar>& normalizedBuffer)
348 {
349 // Normalize the text run in three ways:
350 // 1) Convert the |originalRun| to NFC normalized form if combining diacritical marks
351 // (U+0300..) are used in the run. This conversion is necessary since most OpenType
352 // fonts (e.g., Arial) don't have substitution rules for the diacritical marks in
353 // their GSUB tables.
354 //
355 // Note that we don't use the icu::Normalizer::isNormalized(UNORM_NFC) API here since
356 // the API returns FALSE (= not normalized) for complex runs that don't require NFC
357 // normalization (e.g., Arabic text). Unless the run contains the diacritical marks,
358 // Harfbuzz will do the same thing for us using the GSUB table.
359 // 2) Convert spacing characters into plain spaces, as some fonts will provide glyphs
360 // for characters like '\n' otherwise.
361 // 3) Convert mirrored characters such as parenthesis for rtl text.
362
363 // Convert to NFC form if the text has diacritical marks.
364 icu::UnicodeString normalizedString;
365 UErrorCode error = U_ZERO_ERROR;
366
367 for (int16_t i = 0; i < originalRun.length(); ++i) {
368 UChar ch = originalRun[i];
369 if (::ublock_getCode(ch) == UBLOCK_COMBINING_DIACRITICAL_MARKS) {
370 icu::Normalizer::normalize(icu::UnicodeString(originalRun.characters(),
371 originalRun.length()), UNORM_NFC, 0 /* no options */,
372 normalizedString, error);
373 if (U_FAILURE(error))
374 return originalRun;
375 break;
376 }
377 }
378
379 // Normalize space and mirror parenthesis for rtl text.
380 int normalizedBufferLength;
381 const UChar* sourceText;
382 if (normalizedString.isEmpty()) {
383 normalizedBufferLength = originalRun.length();
384 sourceText = originalRun.characters();
385 } else {
386 normalizedBufferLength = normalizedString.length();
387 sourceText = normalizedString.getBuffer();
388 }
389
390 normalizedBuffer = adoptArrayPtr(new UChar[normalizedBufferLength + 1]);
391
392 normalizeSpacesAndMirrorChars(sourceText, originalRun.rtl(), normalizedBuffer.get(), normalizedBufferLength);
393
394 normalizedRun.set(new TextRun(originalRun));
395 normalizedRun->setText(normalizedBuffer.get(), normalizedBufferLength);
396 return *normalizedRun;
397 }
398
399 } // namespace WebCore
400