• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define VERBOSE_DEBUG 0
18 
19 #define LOG_TAG "Minikin"
20 
21 #include <algorithm>
22 #include <climits>
23 #include <limits>
24 
25 #include <log/log.h>
26 
27 #include <minikin/Layout.h>
28 #include <minikin/LineBreaker.h>
29 #include <utils/WindowsUtils.h>
30 #include "LayoutUtils.h"
31 
32 using std::vector;
33 
34 namespace minikin {
35 
36 // Large scores in a hierarchy; we prefer desperate breaks to an overfull line.
37 // All these constants are larger than any reasonable actual width score.
38 const float SCORE_INFTY = std::numeric_limits<float>::max();
39 const float SCORE_OVERFULL = 1e12f;
40 const float SCORE_DESPERATE = 1e10f;
41 
42 // Multiplier for hyphen penalty on last line.
43 const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
44 // Penalty assigned to each line break (to try to minimize number of lines)
45 // TODO: when we implement full justification (so spaces can shrink and
46 // stretch), this is probably not the most appropriate method.
47 const float LINE_PENALTY_MULTIPLIER = 2.0f;
48 
49 // Penalty assigned to shrinking the whitepsace.
50 const float SHRINK_PENALTY_MULTIPLIER = 4.0f;
51 
52 // Very long words trigger O(n^2) behavior in hyphenation, so we disable
53 // hyphenation for unreasonably long words. This is somewhat of a heuristic
54 // because extremely long words are possible in some languages. This does mean
55 // that very long real words can get broken by desperate breaks, with no
56 // hyphens.
57 const size_t LONGEST_HYPHENATED_WORD = 45;
58 
59 // When the text buffer is within this limit, capacity of vectors is retained at
60 // finish(), to avoid allocation.
61 const size_t MAX_TEXT_BUF_RETAIN = 32678;
62 
63 // Maximum amount that spaces can shrink, in justified text.
64 const float SHRINKABILITY = 1.0 / 3.0;
65 
setLocale()66 void LineBreaker::setLocale() {
67   mWordBreaker.setLocale();
68   mLocale = icu::Locale();
69   mHyphenator = nullptr;
70 }
71 
setText()72 void LineBreaker::setText() {
73   mWordBreaker.setText(mTextBuf.data(), mTextBuf.size());
74 
75   // handle initial break here because addStyleRun may never be called
76   mWordBreaker.next();
77   mCandidates.clear();
78   Candidate cand = {0,   0, 0.0, 0.0, 0.0,
79                     0.0, 0, 0,   0,   HyphenationType::DONT_BREAK};
80   mCandidates.push_back(cand);
81 
82   // reset greedy breaker state
83   mBreaks.clear();
84   mWidths.clear();
85   mFlags.clear();
86   mLastBreak = 0;
87   mBestBreak = 0;
88   mBestScore = SCORE_INFTY;
89   mPreBreak = 0;
90   mLastHyphenation = HyphenEdit::NO_EDIT;
91   mFirstTabIndex = INT_MAX;
92   mSpaceCount = 0;
93 }
94 
setLineWidths(float firstWidth,int firstWidthLineCount,float restWidth)95 void LineBreaker::setLineWidths(float firstWidth,
96                                 int firstWidthLineCount,
97                                 float restWidth) {
98   mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
99 }
100 
setIndents(const std::vector<float> & indents)101 void LineBreaker::setIndents(const std::vector<float>& indents) {
102   mLineWidths.setIndents(indents);
103 }
104 
105 // This function determines whether a character is a space that disappears at
106 // end of line. It is the Unicode set:
107 // [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], plus '\n'. Note:
108 // all such characters are in the BMP, so it's ok to use code units for this.
isLineEndSpace(uint16_t c)109 bool isLineEndSpace(uint16_t c) {
110   return c == '\n' || c == ' ' || c == 0x1680 ||
111          (0x2000 <= c && c <= 0x200A && c != 0x2007) || c == 0x205F ||
112          c == 0x3000;
113 }
114 
115 // Ordinarily, this method measures the text in the range given. However, when
116 // paint is nullptr, it assumes the widths have already been calculated and
117 // stored in the width buffer. This method finds the candidate word breaks
118 // (using the ICU break iterator) and sends them to addCandidate.
addStyleRun(MinikinPaint * paint,const std::shared_ptr<FontCollection> & typeface,FontStyle style,size_t start,size_t end,bool isRtl)119 float LineBreaker::addStyleRun(MinikinPaint* paint,
120                                const std::shared_ptr<FontCollection>& typeface,
121                                FontStyle style,
122                                size_t start,
123                                size_t end,
124                                bool isRtl) {
125   float width = 0.0f;
126   int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
127 
128   float hyphenPenalty = 0.0;
129   if (paint != nullptr) {
130     width = Layout::measureText(mTextBuf.data(), start, end - start,
131                                 mTextBuf.size(), bidiFlags, style, *paint,
132                                 typeface, mCharWidths.data() + start);
133 
134     // a heuristic that seems to perform well
135     hyphenPenalty =
136         0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
137     if (mHyphenationFrequency == kHyphenationFrequency_Normal) {
138       hyphenPenalty *=
139           4.0;  // TODO: Replace with a better value after some testing
140     }
141 
142     if (mJustified) {
143       // Make hyphenation more aggressive for fully justified text (so that
144       // "normal" in justified mode is the same as "full" in ragged-right).
145       hyphenPenalty *= 0.25;
146     } else {
147       // Line penalty is zero for justified text.
148       mLinePenalty =
149           std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
150     }
151   }
152 
153   size_t current = (size_t)mWordBreaker.current();
154   size_t afterWord = start;
155   size_t lastBreak = start;
156   ParaWidth lastBreakWidth = mWidth;
157   ParaWidth postBreak = mWidth;
158   size_t postSpaceCount = mSpaceCount;
159   for (size_t i = start; i < end; i++) {
160     uint16_t c = mTextBuf[i];
161     // libtxt: Tab handling was removed here.
162     if (isWordSpace(c))
163       mSpaceCount += 1;
164     mWidth += mCharWidths[i];
165     if (!isLineEndSpace(c)) {
166       postBreak = mWidth;
167       postSpaceCount = mSpaceCount;
168       afterWord = i + 1;
169     }
170     if (i + 1 == current) {
171       size_t wordStart = mWordBreaker.wordStart();
172       size_t wordEnd = mWordBreaker.wordEnd();
173       if (paint != nullptr && mHyphenator != nullptr &&
174           mHyphenationFrequency != kHyphenationFrequency_None &&
175           wordStart >= start && wordEnd > wordStart &&
176           wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) {
177         mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[wordStart],
178                                wordEnd - wordStart, mLocale);
179 #if VERBOSE_DEBUG
180         std::string hyphenatedString;
181         for (size_t j = wordStart; j < wordEnd; j++) {
182           if (mHyphBuf[j - wordStart] ==
183               HyphenationType::BREAK_AND_INSERT_HYPHEN) {
184             hyphenatedString.push_back('-');
185           }
186           // Note: only works with ASCII, should do UTF-8 conversion here
187           hyphenatedString.push_back(buffer()[j]);
188         }
189         ALOGD("hyphenated string: %s", hyphenatedString.c_str());
190 #endif
191 
192         // measure hyphenated substrings
193         for (size_t j = wordStart; j < wordEnd; j++) {
194           HyphenationType hyph = mHyphBuf[j - wordStart];
195           if (hyph != HyphenationType::DONT_BREAK) {
196             paint->hyphenEdit = HyphenEdit::editForThisLine(hyph);
197             const float firstPartWidth = Layout::measureText(
198                 mTextBuf.data(), lastBreak, j - lastBreak, mTextBuf.size(),
199                 bidiFlags, style, *paint, typeface, nullptr);
200             ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth;
201 
202             paint->hyphenEdit = HyphenEdit::editForNextLine(hyph);
203             const float secondPartWidth = Layout::measureText(
204                 mTextBuf.data(), j, afterWord - j, mTextBuf.size(), bidiFlags,
205                 style, *paint, typeface, nullptr);
206             ParaWidth hyphPreBreak = postBreak - secondPartWidth;
207 
208             addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount,
209                          postSpaceCount, hyphenPenalty, hyph);
210 
211             paint->hyphenEdit = HyphenEdit::NO_EDIT;
212           }
213         }
214       }
215 
216       // Skip break for zero-width characters inside replacement span
217       if (paint != nullptr || current == end || mCharWidths[current] > 0) {
218         float penalty = hyphenPenalty * mWordBreaker.breakBadness();
219         addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount,
220                      penalty, HyphenationType::DONT_BREAK);
221       }
222       lastBreak = current;
223       lastBreakWidth = mWidth;
224       current = (size_t)mWordBreaker.next();
225     }
226   }
227 
228   return width;
229 }
230 
231 // add a word break (possibly for a hyphenated fragment), and add desperate
232 // breaks if needed (ie when word exceeds current line width)
addWordBreak(size_t offset,ParaWidth preBreak,ParaWidth postBreak,size_t preSpaceCount,size_t postSpaceCount,float penalty,HyphenationType hyph)233 void LineBreaker::addWordBreak(size_t offset,
234                                ParaWidth preBreak,
235                                ParaWidth postBreak,
236                                size_t preSpaceCount,
237                                size_t postSpaceCount,
238                                float penalty,
239                                HyphenationType hyph) {
240   Candidate cand;
241   ParaWidth width = mCandidates.back().preBreak;
242   // libtxt: add a fudge factor to this comparison.  The currentLineWidth passed
243   // by the framework is based on maxIntrinsicWidth/Layout::measureText
244   // calculations that may not precisely match the postBreak width.
245   if (IsSplittingCharacters(postBreak)) {
246     // Add desperate breaks.
247     // Note: these breaks are based on the shaping of the (non-broken) original
248     // text; they are imprecise especially in the presence of kerning,
249     // ligatures, and Arabic shaping.
250     size_t i = mCandidates.back().offset;
251     width += mCharWidths[i++];
252     for (; i < offset; i++) {
253       float w = mCharWidths[i];
254       if (w > 0) {
255         cand.offset = i;
256         cand.preBreak = width;
257         cand.postBreak = width;
258         // postSpaceCount doesn't include trailing spaces
259         cand.preSpaceCount = postSpaceCount;
260         cand.postSpaceCount = postSpaceCount;
261         cand.penalty = SCORE_DESPERATE;
262         cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
263 #if VERBOSE_DEBUG
264         ALOGD("desperate cand: %zd %g:%g", mCandidates.size(), cand.postBreak,
265               cand.preBreak);
266 #endif
267         addCandidate(cand);
268         width += w;
269       }
270     }
271   }
272 
273   cand.offset = offset;
274   cand.preBreak = preBreak;
275   cand.postBreak = postBreak;
276   cand.penalty = penalty;
277   cand.preSpaceCount = preSpaceCount;
278   cand.postSpaceCount = postSpaceCount;
279   cand.hyphenType = hyph;
280 #if VERBOSE_DEBUG
281   ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
282 #endif
283   addCandidate(cand);
284 }
285 
IsSplittingCharacters(ParaWidth postBreak)286 bool LineBreaker::IsSplittingCharacters(ParaWidth postBreak) {
287   ParaWidth width = mCandidates.back().preBreak;
288   return mWordBreakType == WordBreakType::kWordBreakType_BreakAll ||
289           (postBreak - width > currentLineWidth() + 0.00001 &&
290           mWordBreakType != WordBreakType::kWordBreakType_Normal);
291 }
292 
293 // Helper method for addCandidate()
pushGreedyBreak()294 void LineBreaker::pushGreedyBreak() {
295   const Candidate& bestCandidate = mCandidates[mBestBreak];
296   pushBreak(
297       bestCandidate.offset, bestCandidate.postBreak - mPreBreak,
298       mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType));
299   mBestScore = SCORE_INFTY;
300 #if VERBOSE_DEBUG
301   ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
302 #endif
303   mLastBreak = mBestBreak;
304   mPreBreak = bestCandidate.preBreak;
305   mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType);
306 }
307 
308 // TODO performance: could avoid populating mCandidates if greedy only
addCandidate(Candidate cand)309 void LineBreaker::addCandidate(Candidate cand) {
310   const size_t candIndex = mCandidates.size();
311   mCandidates.push_back(cand);
312 
313   // mLastBreak is the index of the last line break we decided to do in
314   // mCandidates, and mPreBreak is its preBreak value. mBestBreak is the index
315   // of the best line breaking candidate we have found since then, and
316   // mBestScore is its penalty.
317   if (cand.postBreak - mPreBreak > currentLineWidth()) {
318     // This break would create an overfull line, pick the best break and break
319     // there (greedy)
320     if (mBestBreak == mLastBreak) {
321       // No good break has been found since last break. Break here.
322       mBestBreak = candIndex;
323     }
324     pushGreedyBreak();
325   }
326 
327   if (mWordBreakType == WordBreakType::kWordBreakType_BreakAll &&
328       cand.postBreak - mPreBreak <= currentLineWidth()) {
329     mBestBreak = candIndex;
330   }
331 
332   while (mLastBreak != candIndex &&
333          cand.postBreak - mPreBreak > currentLineWidth()) {
334     // We should rarely come here. But if we are here, we have broken the line,
335     // but the remaining part still doesn't fit. We now need to break at the
336     // second best place after the last break, but we have not kept that
337     // information, so we need to go back and find it.
338     //
339     // In some really rare cases, postBreak - preBreak of a candidate itself may
340     // be over the current line width. We protect ourselves against an infinite
341     // loop in that case by checking that we have not broken the line at this
342     // candidate already.
343     for (size_t i = mLastBreak + 1; i < candIndex; i++) {
344       const float penalty = mCandidates[i].penalty;
345       if (penalty <= mBestScore) {
346         mBestBreak = i;
347         mBestScore = penalty;
348       }
349     }
350     if (mBestBreak == mLastBreak) {
351       // We didn't find anything good. Break here.
352       mBestBreak = candIndex;
353     }
354     pushGreedyBreak();
355   }
356 
357   if (cand.penalty <= mBestScore) {
358     mBestBreak = candIndex;
359     mBestScore = cand.penalty;
360   }
361 }
362 
pushBreak(int offset,float width,uint8_t hyphenEdit)363 void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) {
364   mBreaks.push_back(offset);
365   mWidths.push_back(width);
366   int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
367   flags |= hyphenEdit;
368   mFlags.push_back(flags);
369   mFirstTabIndex = INT_MAX;
370 }
371 
372 // libtxt: Add ability to set custom char widths. This allows manual definition
373 // of the widths of arbitrary glyphs. To linebreak properly, call addStyleRun
374 // with nullptr as the paint property, which will lead it to assume the width
375 // has already been calculated. Used for properly breaking inline widgets.
setCustomCharWidth(size_t offset,float width)376 void LineBreaker::setCustomCharWidth(size_t offset, float width) {
377   mCharWidths[offset] = (width);
378 }
379 
addReplacement(size_t start,size_t end,float width)380 void LineBreaker::addReplacement(size_t start, size_t end, float width) {
381   mCharWidths[start] = width;
382   std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
383   addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
384 }
385 
386 // Get the width of a space. May return 0 if there are no spaces.
387 // Note: if there are multiple different widths for spaces (for example, because
388 // of mixing of fonts), it's only guaranteed to pick one.
getSpaceWidth() const389 float LineBreaker::getSpaceWidth() const {
390   for (size_t i = 0; i < mTextBuf.size(); i++) {
391     if (isWordSpace(mTextBuf[i])) {
392       return mCharWidths[i];
393     }
394   }
395   return 0.0f;
396 }
397 
currentLineWidth() const398 float LineBreaker::currentLineWidth() const {
399   return mLineWidths.getLineWidth(mBreaks.size());
400 }
401 
computeBreaksGreedy()402 void LineBreaker::computeBreaksGreedy() {
403   // All breaks but the last have been added in addCandidate already.
404   size_t nCand = mCandidates.size();
405   if (nCand > 0 && (nCand == 1 || mLastBreak != nCand - 1)) {
406     pushBreak(mCandidates[nCand - 1].offset,
407               mCandidates[nCand - 1].postBreak - mPreBreak, mLastHyphenation);
408     // don't need to update mBestScore, because we're done
409 #if VERBOSE_DEBUG
410     ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
411 #endif
412   }
413 }
414 
415 // Follow "prev" links in mCandidates array, and copy to result arrays.
finishBreaksOptimal()416 void LineBreaker::finishBreaksOptimal() {
417   // clear existing greedy break result
418   mBreaks.clear();
419   mWidths.clear();
420   mFlags.clear();
421   size_t nCand = mCandidates.size();
422   size_t prev;
423   for (size_t i = nCand - 1; i > 0; i = prev) {
424     prev = mCandidates[i].prev;
425     mBreaks.push_back(mCandidates[i].offset);
426     mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
427     int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType);
428     if (prev > 0) {
429       flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType);
430     }
431     mFlags.push_back(flags);
432   }
433   std::reverse(mBreaks.begin(), mBreaks.end());
434   std::reverse(mWidths.begin(), mWidths.end());
435   std::reverse(mFlags.begin(), mFlags.end());
436 }
437 
computeBreaksOptimal(bool isRectangle)438 void LineBreaker::computeBreaksOptimal(bool isRectangle) {
439   size_t active = 0;
440   size_t nCand = mCandidates.size();
441   float width = mLineWidths.getLineWidth(0);
442   float shortLineFactor = mJustified ? 0.75f : 0.5f;
443   float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f;
444 
445   // "i" iterates through candidates for the end of the line.
446   for (size_t i = 1; i < nCand; i++) {
447     bool atEnd = i == nCand - 1;
448     float best = SCORE_INFTY;
449     size_t bestPrev = 0;
450     size_t lineNumberLast = 0;
451 
452     if (!isRectangle) {
453       size_t lineNumberLast = mCandidates[active].lineNumber;
454       width = mLineWidths.getLineWidth(lineNumberLast);
455     }
456     ParaWidth leftEdge = mCandidates[i].postBreak - width;
457     float bestHope = 0;
458 
459     // "j" iterates through candidates for the beginning of the line.
460     for (size_t j = active; j < i; j++) {
461       if (!isRectangle) {
462         size_t lineNumber = mCandidates[j].lineNumber;
463         if (lineNumber != lineNumberLast) {
464           float widthNew = mLineWidths.getLineWidth(lineNumber);
465           if (widthNew != width) {
466             leftEdge = mCandidates[i].postBreak - width;
467             bestHope = 0;
468             width = widthNew;
469           }
470           lineNumberLast = lineNumber;
471         }
472       }
473       float jScore = mCandidates[j].score;
474       if (jScore + bestHope >= best)
475         continue;
476       float delta = mCandidates[j].preBreak - leftEdge;
477 
478       // compute width score for line
479 
480       // Note: the "bestHope" optimization makes the assumption that, when delta
481       // is non-negative, widthScore will increase monotonically as successive
482       // candidate breaks are considered.
483       float widthScore = 0.0f;
484       float additionalPenalty = 0.0f;
485       if ((atEnd || !mJustified) && delta < 0) {
486         widthScore = SCORE_OVERFULL;
487       } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
488         // increase penalty for hyphen on last line
489         additionalPenalty =
490             LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
491         // Penalize very short (< 1 - shortLineFactor of total width) lines.
492         float underfill = delta - shortLineFactor * width;
493         widthScore = underfill > 0 ? underfill * underfill : 0;
494       } else {
495         widthScore = delta * delta;
496         if (delta < 0) {
497           if (-delta < maxShrink * (mCandidates[i].postSpaceCount -
498                                     mCandidates[j].preSpaceCount)) {
499             widthScore *= SHRINK_PENALTY_MULTIPLIER;
500           } else {
501             widthScore = SCORE_OVERFULL;
502           }
503         }
504       }
505 
506       if (delta < 0) {
507         active = j + 1;
508       } else {
509         bestHope = widthScore;
510       }
511 
512       float score = jScore + widthScore + additionalPenalty;
513       if (score <= best) {
514         best = score;
515         bestPrev = j;
516       }
517     }
518     mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty;
519     mCandidates[i].prev = bestPrev;
520     mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
521 #if VERBOSE_DEBUG
522     ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score,
523           mCandidates[i].prev);
524 #endif
525   }
526   finishBreaksOptimal();
527 }
528 
computeBreaks()529 size_t LineBreaker::computeBreaks() {
530   if (mStrategy == kBreakStrategy_Greedy) {
531     computeBreaksGreedy();
532   } else {
533     computeBreaksOptimal(mLineWidths.isConstant());
534   }
535   return mBreaks.size();
536 }
537 
finish()538 void LineBreaker::finish() {
539   mWordBreaker.finish();
540   mWidth = 0;
541   mLineWidths.clear();
542   mCandidates.clear();
543   mBreaks.clear();
544   mWidths.clear();
545   mFlags.clear();
546   if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
547     mTextBuf.clear();
548     mTextBuf.shrink_to_fit();
549     mCharWidths.clear();
550     mCharWidths.shrink_to_fit();
551     mHyphBuf.clear();
552     mHyphBuf.shrink_to_fit();
553     mCandidates.shrink_to_fit();
554     mBreaks.shrink_to_fit();
555     mWidths.shrink_to_fit();
556     mFlags.shrink_to_fit();
557   }
558   mStrategy = kBreakStrategy_Greedy;
559   mHyphenationFrequency = kHyphenationFrequency_Normal;
560   mLinePenalty = 0.0f;
561   mJustified = false;
562 }
563 
564 }  // namespace minikin
565