1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define VERBOSE_DEBUG 0
18
19 #define LOG_TAG "Minikin"
20
21 #include <algorithm>
22 #include <climits>
23 #include <limits>
24
25 #include <log/log.h>
26
27 #include <minikin/Layout.h>
28 #include <minikin/LineBreaker.h>
29 #include <utils/WindowsUtils.h>
30 #include "LayoutUtils.h"
31
32 using std::vector;
33
34 namespace minikin {
35
36 // Large scores in a hierarchy; we prefer desperate breaks to an overfull line.
37 // All these constants are larger than any reasonable actual width score.
38 const float SCORE_INFTY = std::numeric_limits<float>::max();
39 const float SCORE_OVERFULL = 1e12f;
40 const float SCORE_DESPERATE = 1e10f;
41
42 // Multiplier for hyphen penalty on last line.
43 const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
44 // Penalty assigned to each line break (to try to minimize number of lines)
45 // TODO: when we implement full justification (so spaces can shrink and
46 // stretch), this is probably not the most appropriate method.
47 const float LINE_PENALTY_MULTIPLIER = 2.0f;
48
49 // Penalty assigned to shrinking the whitepsace.
50 const float SHRINK_PENALTY_MULTIPLIER = 4.0f;
51
52 // Very long words trigger O(n^2) behavior in hyphenation, so we disable
53 // hyphenation for unreasonably long words. This is somewhat of a heuristic
54 // because extremely long words are possible in some languages. This does mean
55 // that very long real words can get broken by desperate breaks, with no
56 // hyphens.
57 const size_t LONGEST_HYPHENATED_WORD = 45;
58
59 // When the text buffer is within this limit, capacity of vectors is retained at
60 // finish(), to avoid allocation.
61 const size_t MAX_TEXT_BUF_RETAIN = 32678;
62
63 // Maximum amount that spaces can shrink, in justified text.
64 const float SHRINKABILITY = 1.0 / 3.0;
65
setLocale()66 void LineBreaker::setLocale() {
67 mWordBreaker.setLocale();
68 mLocale = icu::Locale();
69 mHyphenator = nullptr;
70 }
71
setText()72 void LineBreaker::setText() {
73 mWordBreaker.setText(mTextBuf.data(), mTextBuf.size());
74
75 // handle initial break here because addStyleRun may never be called
76 mWordBreaker.next();
77 mCandidates.clear();
78 Candidate cand = {0, 0, 0.0, 0.0, 0.0,
79 0.0, 0, 0, 0, HyphenationType::DONT_BREAK};
80 mCandidates.push_back(cand);
81
82 // reset greedy breaker state
83 mBreaks.clear();
84 mWidths.clear();
85 mFlags.clear();
86 mLastBreak = 0;
87 mBestBreak = 0;
88 mBestScore = SCORE_INFTY;
89 mPreBreak = 0;
90 mLastHyphenation = HyphenEdit::NO_EDIT;
91 mFirstTabIndex = INT_MAX;
92 mSpaceCount = 0;
93 }
94
setLineWidths(float firstWidth,int firstWidthLineCount,float restWidth)95 void LineBreaker::setLineWidths(float firstWidth,
96 int firstWidthLineCount,
97 float restWidth) {
98 mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
99 }
100
setIndents(const std::vector<float> & indents)101 void LineBreaker::setIndents(const std::vector<float>& indents) {
102 mLineWidths.setIndents(indents);
103 }
104
105 // This function determines whether a character is a space that disappears at
106 // end of line. It is the Unicode set:
107 // [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], plus '\n'. Note:
108 // all such characters are in the BMP, so it's ok to use code units for this.
isLineEndSpace(uint16_t c)109 bool isLineEndSpace(uint16_t c) {
110 return c == '\n' || c == ' ' || c == 0x1680 ||
111 (0x2000 <= c && c <= 0x200A && c != 0x2007) || c == 0x205F ||
112 c == 0x3000;
113 }
114
115 // Ordinarily, this method measures the text in the range given. However, when
116 // paint is nullptr, it assumes the widths have already been calculated and
117 // stored in the width buffer. This method finds the candidate word breaks
118 // (using the ICU break iterator) and sends them to addCandidate.
addStyleRun(MinikinPaint * paint,const std::shared_ptr<FontCollection> & typeface,FontStyle style,size_t start,size_t end,bool isRtl)119 float LineBreaker::addStyleRun(MinikinPaint* paint,
120 const std::shared_ptr<FontCollection>& typeface,
121 FontStyle style,
122 size_t start,
123 size_t end,
124 bool isRtl) {
125 float width = 0.0f;
126 int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
127
128 float hyphenPenalty = 0.0;
129 if (paint != nullptr) {
130 width = Layout::measureText(mTextBuf.data(), start, end - start,
131 mTextBuf.size(), bidiFlags, style, *paint,
132 typeface, mCharWidths.data() + start);
133
134 // a heuristic that seems to perform well
135 hyphenPenalty =
136 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
137 if (mHyphenationFrequency == kHyphenationFrequency_Normal) {
138 hyphenPenalty *=
139 4.0; // TODO: Replace with a better value after some testing
140 }
141
142 if (mJustified) {
143 // Make hyphenation more aggressive for fully justified text (so that
144 // "normal" in justified mode is the same as "full" in ragged-right).
145 hyphenPenalty *= 0.25;
146 } else {
147 // Line penalty is zero for justified text.
148 mLinePenalty =
149 std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
150 }
151 }
152
153 size_t current = (size_t)mWordBreaker.current();
154 size_t afterWord = start;
155 size_t lastBreak = start;
156 ParaWidth lastBreakWidth = mWidth;
157 ParaWidth postBreak = mWidth;
158 size_t postSpaceCount = mSpaceCount;
159 for (size_t i = start; i < end; i++) {
160 uint16_t c = mTextBuf[i];
161 // libtxt: Tab handling was removed here.
162 if (isWordSpace(c))
163 mSpaceCount += 1;
164 mWidth += mCharWidths[i];
165 if (!isLineEndSpace(c)) {
166 postBreak = mWidth;
167 postSpaceCount = mSpaceCount;
168 afterWord = i + 1;
169 }
170 if (i + 1 == current) {
171 size_t wordStart = mWordBreaker.wordStart();
172 size_t wordEnd = mWordBreaker.wordEnd();
173 if (paint != nullptr && mHyphenator != nullptr &&
174 mHyphenationFrequency != kHyphenationFrequency_None &&
175 wordStart >= start && wordEnd > wordStart &&
176 wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) {
177 mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[wordStart],
178 wordEnd - wordStart, mLocale);
179 #if VERBOSE_DEBUG
180 std::string hyphenatedString;
181 for (size_t j = wordStart; j < wordEnd; j++) {
182 if (mHyphBuf[j - wordStart] ==
183 HyphenationType::BREAK_AND_INSERT_HYPHEN) {
184 hyphenatedString.push_back('-');
185 }
186 // Note: only works with ASCII, should do UTF-8 conversion here
187 hyphenatedString.push_back(buffer()[j]);
188 }
189 ALOGD("hyphenated string: %s", hyphenatedString.c_str());
190 #endif
191
192 // measure hyphenated substrings
193 for (size_t j = wordStart; j < wordEnd; j++) {
194 HyphenationType hyph = mHyphBuf[j - wordStart];
195 if (hyph != HyphenationType::DONT_BREAK) {
196 paint->hyphenEdit = HyphenEdit::editForThisLine(hyph);
197 const float firstPartWidth = Layout::measureText(
198 mTextBuf.data(), lastBreak, j - lastBreak, mTextBuf.size(),
199 bidiFlags, style, *paint, typeface, nullptr);
200 ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth;
201
202 paint->hyphenEdit = HyphenEdit::editForNextLine(hyph);
203 const float secondPartWidth = Layout::measureText(
204 mTextBuf.data(), j, afterWord - j, mTextBuf.size(), bidiFlags,
205 style, *paint, typeface, nullptr);
206 ParaWidth hyphPreBreak = postBreak - secondPartWidth;
207
208 addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount,
209 postSpaceCount, hyphenPenalty, hyph);
210
211 paint->hyphenEdit = HyphenEdit::NO_EDIT;
212 }
213 }
214 }
215
216 // Skip break for zero-width characters inside replacement span
217 if (paint != nullptr || current == end || mCharWidths[current] > 0) {
218 float penalty = hyphenPenalty * mWordBreaker.breakBadness();
219 addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount,
220 penalty, HyphenationType::DONT_BREAK);
221 }
222 lastBreak = current;
223 lastBreakWidth = mWidth;
224 current = (size_t)mWordBreaker.next();
225 }
226 }
227
228 return width;
229 }
230
231 // add a word break (possibly for a hyphenated fragment), and add desperate
232 // breaks if needed (ie when word exceeds current line width)
addWordBreak(size_t offset,ParaWidth preBreak,ParaWidth postBreak,size_t preSpaceCount,size_t postSpaceCount,float penalty,HyphenationType hyph)233 void LineBreaker::addWordBreak(size_t offset,
234 ParaWidth preBreak,
235 ParaWidth postBreak,
236 size_t preSpaceCount,
237 size_t postSpaceCount,
238 float penalty,
239 HyphenationType hyph) {
240 Candidate cand;
241 ParaWidth width = mCandidates.back().preBreak;
242 // libtxt: add a fudge factor to this comparison. The currentLineWidth passed
243 // by the framework is based on maxIntrinsicWidth/Layout::measureText
244 // calculations that may not precisely match the postBreak width.
245 if (IsSplittingCharacters(postBreak)) {
246 // Add desperate breaks.
247 // Note: these breaks are based on the shaping of the (non-broken) original
248 // text; they are imprecise especially in the presence of kerning,
249 // ligatures, and Arabic shaping.
250 size_t i = mCandidates.back().offset;
251 width += mCharWidths[i++];
252 for (; i < offset; i++) {
253 float w = mCharWidths[i];
254 if (w > 0) {
255 cand.offset = i;
256 cand.preBreak = width;
257 cand.postBreak = width;
258 // postSpaceCount doesn't include trailing spaces
259 cand.preSpaceCount = postSpaceCount;
260 cand.postSpaceCount = postSpaceCount;
261 cand.penalty = SCORE_DESPERATE;
262 cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
263 #if VERBOSE_DEBUG
264 ALOGD("desperate cand: %zd %g:%g", mCandidates.size(), cand.postBreak,
265 cand.preBreak);
266 #endif
267 addCandidate(cand);
268 width += w;
269 }
270 }
271 }
272
273 cand.offset = offset;
274 cand.preBreak = preBreak;
275 cand.postBreak = postBreak;
276 cand.penalty = penalty;
277 cand.preSpaceCount = preSpaceCount;
278 cand.postSpaceCount = postSpaceCount;
279 cand.hyphenType = hyph;
280 #if VERBOSE_DEBUG
281 ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
282 #endif
283 addCandidate(cand);
284 }
285
IsSplittingCharacters(ParaWidth postBreak)286 bool LineBreaker::IsSplittingCharacters(ParaWidth postBreak) {
287 ParaWidth width = mCandidates.back().preBreak;
288 return mWordBreakType == WordBreakType::kWordBreakType_BreakAll ||
289 (postBreak - width > currentLineWidth() + 0.00001 &&
290 mWordBreakType != WordBreakType::kWordBreakType_Normal);
291 }
292
293 // Helper method for addCandidate()
pushGreedyBreak()294 void LineBreaker::pushGreedyBreak() {
295 const Candidate& bestCandidate = mCandidates[mBestBreak];
296 pushBreak(
297 bestCandidate.offset, bestCandidate.postBreak - mPreBreak,
298 mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType));
299 mBestScore = SCORE_INFTY;
300 #if VERBOSE_DEBUG
301 ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
302 #endif
303 mLastBreak = mBestBreak;
304 mPreBreak = bestCandidate.preBreak;
305 mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType);
306 }
307
308 // TODO performance: could avoid populating mCandidates if greedy only
addCandidate(Candidate cand)309 void LineBreaker::addCandidate(Candidate cand) {
310 const size_t candIndex = mCandidates.size();
311 mCandidates.push_back(cand);
312
313 // mLastBreak is the index of the last line break we decided to do in
314 // mCandidates, and mPreBreak is its preBreak value. mBestBreak is the index
315 // of the best line breaking candidate we have found since then, and
316 // mBestScore is its penalty.
317 if (cand.postBreak - mPreBreak > currentLineWidth()) {
318 // This break would create an overfull line, pick the best break and break
319 // there (greedy)
320 if (mBestBreak == mLastBreak) {
321 // No good break has been found since last break. Break here.
322 mBestBreak = candIndex;
323 }
324 pushGreedyBreak();
325 }
326
327 if (mWordBreakType == WordBreakType::kWordBreakType_BreakAll &&
328 cand.postBreak - mPreBreak <= currentLineWidth()) {
329 mBestBreak = candIndex;
330 }
331
332 while (mLastBreak != candIndex &&
333 cand.postBreak - mPreBreak > currentLineWidth()) {
334 // We should rarely come here. But if we are here, we have broken the line,
335 // but the remaining part still doesn't fit. We now need to break at the
336 // second best place after the last break, but we have not kept that
337 // information, so we need to go back and find it.
338 //
339 // In some really rare cases, postBreak - preBreak of a candidate itself may
340 // be over the current line width. We protect ourselves against an infinite
341 // loop in that case by checking that we have not broken the line at this
342 // candidate already.
343 for (size_t i = mLastBreak + 1; i < candIndex; i++) {
344 const float penalty = mCandidates[i].penalty;
345 if (penalty <= mBestScore) {
346 mBestBreak = i;
347 mBestScore = penalty;
348 }
349 }
350 if (mBestBreak == mLastBreak) {
351 // We didn't find anything good. Break here.
352 mBestBreak = candIndex;
353 }
354 pushGreedyBreak();
355 }
356
357 if (cand.penalty <= mBestScore) {
358 mBestBreak = candIndex;
359 mBestScore = cand.penalty;
360 }
361 }
362
pushBreak(int offset,float width,uint8_t hyphenEdit)363 void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) {
364 mBreaks.push_back(offset);
365 mWidths.push_back(width);
366 int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
367 flags |= hyphenEdit;
368 mFlags.push_back(flags);
369 mFirstTabIndex = INT_MAX;
370 }
371
372 // libtxt: Add ability to set custom char widths. This allows manual definition
373 // of the widths of arbitrary glyphs. To linebreak properly, call addStyleRun
374 // with nullptr as the paint property, which will lead it to assume the width
375 // has already been calculated. Used for properly breaking inline widgets.
setCustomCharWidth(size_t offset,float width)376 void LineBreaker::setCustomCharWidth(size_t offset, float width) {
377 mCharWidths[offset] = (width);
378 }
379
addReplacement(size_t start,size_t end,float width)380 void LineBreaker::addReplacement(size_t start, size_t end, float width) {
381 mCharWidths[start] = width;
382 std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
383 addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
384 }
385
386 // Get the width of a space. May return 0 if there are no spaces.
387 // Note: if there are multiple different widths for spaces (for example, because
388 // of mixing of fonts), it's only guaranteed to pick one.
getSpaceWidth() const389 float LineBreaker::getSpaceWidth() const {
390 for (size_t i = 0; i < mTextBuf.size(); i++) {
391 if (isWordSpace(mTextBuf[i])) {
392 return mCharWidths[i];
393 }
394 }
395 return 0.0f;
396 }
397
currentLineWidth() const398 float LineBreaker::currentLineWidth() const {
399 return mLineWidths.getLineWidth(mBreaks.size());
400 }
401
computeBreaksGreedy()402 void LineBreaker::computeBreaksGreedy() {
403 // All breaks but the last have been added in addCandidate already.
404 size_t nCand = mCandidates.size();
405 if (nCand > 0 && (nCand == 1 || mLastBreak != nCand - 1)) {
406 pushBreak(mCandidates[nCand - 1].offset,
407 mCandidates[nCand - 1].postBreak - mPreBreak, mLastHyphenation);
408 // don't need to update mBestScore, because we're done
409 #if VERBOSE_DEBUG
410 ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
411 #endif
412 }
413 }
414
415 // Follow "prev" links in mCandidates array, and copy to result arrays.
finishBreaksOptimal()416 void LineBreaker::finishBreaksOptimal() {
417 // clear existing greedy break result
418 mBreaks.clear();
419 mWidths.clear();
420 mFlags.clear();
421 size_t nCand = mCandidates.size();
422 size_t prev;
423 for (size_t i = nCand - 1; i > 0; i = prev) {
424 prev = mCandidates[i].prev;
425 mBreaks.push_back(mCandidates[i].offset);
426 mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
427 int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType);
428 if (prev > 0) {
429 flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType);
430 }
431 mFlags.push_back(flags);
432 }
433 std::reverse(mBreaks.begin(), mBreaks.end());
434 std::reverse(mWidths.begin(), mWidths.end());
435 std::reverse(mFlags.begin(), mFlags.end());
436 }
437
computeBreaksOptimal(bool isRectangle)438 void LineBreaker::computeBreaksOptimal(bool isRectangle) {
439 size_t active = 0;
440 size_t nCand = mCandidates.size();
441 float width = mLineWidths.getLineWidth(0);
442 float shortLineFactor = mJustified ? 0.75f : 0.5f;
443 float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f;
444
445 // "i" iterates through candidates for the end of the line.
446 for (size_t i = 1; i < nCand; i++) {
447 bool atEnd = i == nCand - 1;
448 float best = SCORE_INFTY;
449 size_t bestPrev = 0;
450 size_t lineNumberLast = 0;
451
452 if (!isRectangle) {
453 size_t lineNumberLast = mCandidates[active].lineNumber;
454 width = mLineWidths.getLineWidth(lineNumberLast);
455 }
456 ParaWidth leftEdge = mCandidates[i].postBreak - width;
457 float bestHope = 0;
458
459 // "j" iterates through candidates for the beginning of the line.
460 for (size_t j = active; j < i; j++) {
461 if (!isRectangle) {
462 size_t lineNumber = mCandidates[j].lineNumber;
463 if (lineNumber != lineNumberLast) {
464 float widthNew = mLineWidths.getLineWidth(lineNumber);
465 if (widthNew != width) {
466 leftEdge = mCandidates[i].postBreak - width;
467 bestHope = 0;
468 width = widthNew;
469 }
470 lineNumberLast = lineNumber;
471 }
472 }
473 float jScore = mCandidates[j].score;
474 if (jScore + bestHope >= best)
475 continue;
476 float delta = mCandidates[j].preBreak - leftEdge;
477
478 // compute width score for line
479
480 // Note: the "bestHope" optimization makes the assumption that, when delta
481 // is non-negative, widthScore will increase monotonically as successive
482 // candidate breaks are considered.
483 float widthScore = 0.0f;
484 float additionalPenalty = 0.0f;
485 if ((atEnd || !mJustified) && delta < 0) {
486 widthScore = SCORE_OVERFULL;
487 } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
488 // increase penalty for hyphen on last line
489 additionalPenalty =
490 LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
491 // Penalize very short (< 1 - shortLineFactor of total width) lines.
492 float underfill = delta - shortLineFactor * width;
493 widthScore = underfill > 0 ? underfill * underfill : 0;
494 } else {
495 widthScore = delta * delta;
496 if (delta < 0) {
497 if (-delta < maxShrink * (mCandidates[i].postSpaceCount -
498 mCandidates[j].preSpaceCount)) {
499 widthScore *= SHRINK_PENALTY_MULTIPLIER;
500 } else {
501 widthScore = SCORE_OVERFULL;
502 }
503 }
504 }
505
506 if (delta < 0) {
507 active = j + 1;
508 } else {
509 bestHope = widthScore;
510 }
511
512 float score = jScore + widthScore + additionalPenalty;
513 if (score <= best) {
514 best = score;
515 bestPrev = j;
516 }
517 }
518 mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty;
519 mCandidates[i].prev = bestPrev;
520 mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
521 #if VERBOSE_DEBUG
522 ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score,
523 mCandidates[i].prev);
524 #endif
525 }
526 finishBreaksOptimal();
527 }
528
computeBreaks()529 size_t LineBreaker::computeBreaks() {
530 if (mStrategy == kBreakStrategy_Greedy) {
531 computeBreaksGreedy();
532 } else {
533 computeBreaksOptimal(mLineWidths.isConstant());
534 }
535 return mBreaks.size();
536 }
537
finish()538 void LineBreaker::finish() {
539 mWordBreaker.finish();
540 mWidth = 0;
541 mLineWidths.clear();
542 mCandidates.clear();
543 mBreaks.clear();
544 mWidths.clear();
545 mFlags.clear();
546 if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
547 mTextBuf.clear();
548 mTextBuf.shrink_to_fit();
549 mCharWidths.clear();
550 mCharWidths.shrink_to_fit();
551 mHyphBuf.clear();
552 mHyphBuf.shrink_to_fit();
553 mCandidates.shrink_to_fit();
554 mBreaks.shrink_to_fit();
555 mWidths.shrink_to_fit();
556 mFlags.shrink_to_fit();
557 }
558 mStrategy = kBreakStrategy_Greedy;
559 mHyphenationFrequency = kHyphenationFrequency_Normal;
560 mLinePenalty = 0.0f;
561 mJustified = false;
562 }
563
564 } // namespace minikin
565