• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define VERBOSE_DEBUG 0
18 
19 #define LOG_TAG "Minikin"
20 
21 #include <limits>
22 
23 #include <log/log.h>
24 
25 #include "LayoutUtils.h"
26 #include <minikin/Layout.h>
27 #include <minikin/LineBreaker.h>
28 
29 using std::vector;
30 
31 namespace minikin {
32 
33 const int CHAR_TAB = 0x0009;
34 
35 // Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
36 // constants are larger than any reasonable actual width score.
37 const float SCORE_INFTY = std::numeric_limits<float>::max();
38 const float SCORE_OVERFULL = 1e12f;
39 const float SCORE_DESPERATE = 1e10f;
40 
41 // Multiplier for hyphen penalty on last line.
42 const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
43 // Penalty assigned to each line break (to try to minimize number of lines)
44 // TODO: when we implement full justification (so spaces can shrink and stretch), this is
45 // probably not the most appropriate method.
46 const float LINE_PENALTY_MULTIPLIER = 2.0f;
47 
48 // Penalty assigned to shrinking the whitepsace.
49 const float SHRINK_PENALTY_MULTIPLIER = 4.0f;
50 
51 // Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for
52 // unreasonably long words. This is somewhat of a heuristic because extremely long words
53 // are possible in some languages. This does mean that very long real words can get
54 // broken by desperate breaks, with no hyphens.
55 const size_t LONGEST_HYPHENATED_WORD = 45;
56 
57 // When the text buffer is within this limit, capacity of vectors is retained at finish(),
58 // to avoid allocation.
59 const size_t MAX_TEXT_BUF_RETAIN = 32678;
60 
61 // Maximum amount that spaces can shrink, in justified text.
62 const float SHRINKABILITY = 1.0 / 3.0;
63 
setLocales(const char * locales,const std::vector<Hyphenator * > & hyphenators)64 void LineBreaker::setLocales(const char* locales, const std::vector<Hyphenator*>& hyphenators) {
65     bool goodLocaleFound = false;
66     const ssize_t numLocales = hyphenators.size();
67     // For now, we ignore all locales except the first valid one.
68     // TODO: Support selecting the locale based on the script of the text.
69     const char* localeStart = locales;
70     for (ssize_t i = 0; i < numLocales - 1; i++) { // Loop over all locales, except the last one.
71         const char* localeEnd = strchr(localeStart, ',');
72         const size_t localeNameLength = localeEnd - localeStart;
73         char localeName[localeNameLength + 1];
74         strncpy(localeName, localeStart, localeNameLength);
75         localeName[localeNameLength] = '\0';
76         mLocale = icu::Locale::createFromName(localeName);
77         goodLocaleFound = !mLocale.isBogus();
78         if (goodLocaleFound) {
79             mHyphenator = hyphenators[i];
80             break;
81         } else {
82             localeStart = localeEnd + 1;
83         }
84     }
85     if (!goodLocaleFound) { // Try the last locale.
86         mLocale = icu::Locale::createFromName(localeStart);
87         if (mLocale.isBogus()) {
88             // No good locale.
89             mLocale = icu::Locale::getRoot();
90             mHyphenator = nullptr;
91         } else {
92             mHyphenator = numLocales == 0 ? nullptr : hyphenators[numLocales - 1];
93         }
94     }
95     mWordBreaker.setLocale(mLocale);
96 }
97 
setText()98 void LineBreaker::setText() {
99     mWordBreaker.setText(mTextBuf.data(), mTextBuf.size());
100 
101     // handle initial break here because addStyleRun may never be called
102     mWordBreaker.next();
103     mCandidates.clear();
104     Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, HyphenationType::DONT_BREAK};
105     mCandidates.push_back(cand);
106 
107     // reset greedy breaker state
108     mBreaks.clear();
109     mWidths.clear();
110     mFlags.clear();
111     mLastBreak = 0;
112     mBestBreak = 0;
113     mBestScore = SCORE_INFTY;
114     mPreBreak = 0;
115     mLastHyphenation = HyphenEdit::NO_EDIT;
116     mFirstTabIndex = INT_MAX;
117     mSpaceCount = 0;
118 }
119 
setLineWidths(float firstWidth,int firstWidthLineCount,float restWidth)120 void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
121     mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
122 }
123 
124 
setIndents(const std::vector<float> & indents)125 void LineBreaker::setIndents(const std::vector<float>& indents) {
126     mLineWidths.setIndents(indents);
127 }
128 
129 // This function determines whether a character is a space that disappears at end of line.
130 // It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]],
131 // plus '\n'.
132 // Note: all such characters are in the BMP, so it's ok to use code units for this.
isLineEndSpace(uint16_t c)133 static bool isLineEndSpace(uint16_t c) {
134     return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) ||
135             c == 0x205F || c == 0x3000;
136 }
137 
138 // Ordinarily, this method measures the text in the range given. However, when paint
139 // is nullptr, it assumes the widths have already been calculated and stored in the
140 // width buffer.
141 // This method finds the candidate word breaks (using the ICU break iterator) and sends them
142 // to addCandidate.
addStyleRun(MinikinPaint * paint,const std::shared_ptr<FontCollection> & typeface,FontStyle style,size_t start,size_t end,bool isRtl)143 float LineBreaker::addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface,
144         FontStyle style, size_t start, size_t end, bool isRtl) {
145     float width = 0.0f;
146     int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
147 
148     float hyphenPenalty = 0.0;
149     if (paint != nullptr) {
150         width = Layout::measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags,
151                 style, *paint, typeface, mCharWidths.data() + start);
152 
153         // a heuristic that seems to perform well
154         hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
155         if (mHyphenationFrequency == kHyphenationFrequency_Normal) {
156             hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
157         }
158 
159         if (mJustified) {
160             // Make hyphenation more aggressive for fully justified text (so that "normal" in
161             // justified mode is the same as "full" in ragged-right).
162             hyphenPenalty *= 0.25;
163         } else {
164             // Line penalty is zero for justified text.
165             mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
166         }
167     }
168 
169     size_t current = (size_t)mWordBreaker.current();
170     size_t afterWord = start;
171     size_t lastBreak = start;
172     ParaWidth lastBreakWidth = mWidth;
173     ParaWidth postBreak = mWidth;
174     size_t postSpaceCount = mSpaceCount;
175     for (size_t i = start; i < end; i++) {
176         uint16_t c = mTextBuf[i];
177         if (c == CHAR_TAB) {
178             mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
179             if (mFirstTabIndex == INT_MAX) {
180                 mFirstTabIndex = (int)i;
181             }
182             // fall back to greedy; other modes don't know how to deal with tabs
183             mStrategy = kBreakStrategy_Greedy;
184         } else {
185             if (isWordSpace(c)) mSpaceCount += 1;
186             mWidth += mCharWidths[i];
187             if (!isLineEndSpace(c)) {
188                 postBreak = mWidth;
189                 postSpaceCount = mSpaceCount;
190                 afterWord = i + 1;
191             }
192         }
193         if (i + 1 == current) {
194             size_t wordStart = mWordBreaker.wordStart();
195             size_t wordEnd = mWordBreaker.wordEnd();
196             if (paint != nullptr && mHyphenator != nullptr &&
197                     mHyphenationFrequency != kHyphenationFrequency_None &&
198                     wordStart >= start && wordEnd > wordStart &&
199                     wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) {
200                 mHyphenator->hyphenate(&mHyphBuf,
201                         &mTextBuf[wordStart],
202                         wordEnd - wordStart,
203                         mLocale);
204 #if VERBOSE_DEBUG
205                 std::string hyphenatedString;
206                 for (size_t j = wordStart; j < wordEnd; j++) {
207                     if (mHyphBuf[j - wordStart] == HyphenationType::BREAK_AND_INSERT_HYPHEN) {
208                         hyphenatedString.push_back('-');
209                     }
210                     // Note: only works with ASCII, should do UTF-8 conversion here
211                     hyphenatedString.push_back(buffer()[j]);
212                 }
213                 ALOGD("hyphenated string: %s", hyphenatedString.c_str());
214 #endif
215 
216                 // measure hyphenated substrings
217                 for (size_t j = wordStart; j < wordEnd; j++) {
218                     HyphenationType hyph = mHyphBuf[j - wordStart];
219                     if (hyph != HyphenationType::DONT_BREAK) {
220                         paint->hyphenEdit = HyphenEdit::editForThisLine(hyph);
221                         const float firstPartWidth = Layout::measureText(mTextBuf.data(),
222                                 lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style,
223                                 *paint, typeface, nullptr);
224                         ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth;
225 
226                         paint->hyphenEdit = HyphenEdit::editForNextLine(hyph);
227                         const float secondPartWidth = Layout::measureText(mTextBuf.data(), j,
228                                 afterWord - j, mTextBuf.size(), bidiFlags, style, *paint,
229                                 typeface, nullptr);
230                         ParaWidth hyphPreBreak = postBreak - secondPartWidth;
231 
232                         addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount,
233                                 hyphenPenalty, hyph);
234 
235                         paint->hyphenEdit = HyphenEdit::NO_EDIT;
236                     }
237                 }
238             }
239 
240             // Skip break for zero-width characters inside replacement span
241             if (paint != nullptr || current == end || mCharWidths[current] > 0) {
242                 float penalty = hyphenPenalty * mWordBreaker.breakBadness();
243                 addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty,
244                         HyphenationType::DONT_BREAK);
245             }
246             lastBreak = current;
247             lastBreakWidth = mWidth;
248             current = (size_t)mWordBreaker.next();
249         }
250     }
251 
252     return width;
253 }
254 
255 // add a word break (possibly for a hyphenated fragment), and add desperate breaks if
256 // needed (ie when word exceeds current line width)
addWordBreak(size_t offset,ParaWidth preBreak,ParaWidth postBreak,size_t preSpaceCount,size_t postSpaceCount,float penalty,HyphenationType hyph)257 void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
258         size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph) {
259     Candidate cand;
260     ParaWidth width = mCandidates.back().preBreak;
261     if (postBreak - width > currentLineWidth()) {
262         // Add desperate breaks.
263         // Note: these breaks are based on the shaping of the (non-broken) original text; they
264         // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
265         size_t i = mCandidates.back().offset;
266         width += mCharWidths[i++];
267         for (; i < offset; i++) {
268             float w = mCharWidths[i];
269             if (w > 0) {
270                 cand.offset = i;
271                 cand.preBreak = width;
272                 cand.postBreak = width;
273                 // postSpaceCount doesn't include trailing spaces
274                 cand.preSpaceCount = postSpaceCount;
275                 cand.postSpaceCount = postSpaceCount;
276                 cand.penalty = SCORE_DESPERATE;
277                 cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
278 #if VERBOSE_DEBUG
279                 ALOGD("desperate cand: %zd %g:%g",
280                         mCandidates.size(), cand.postBreak, cand.preBreak);
281 #endif
282                 addCandidate(cand);
283                 width += w;
284             }
285         }
286     }
287 
288     cand.offset = offset;
289     cand.preBreak = preBreak;
290     cand.postBreak = postBreak;
291     cand.penalty = penalty;
292     cand.preSpaceCount = preSpaceCount;
293     cand.postSpaceCount = postSpaceCount;
294     cand.hyphenType = hyph;
295 #if VERBOSE_DEBUG
296     ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
297 #endif
298     addCandidate(cand);
299 }
300 
301 // Helper method for addCandidate()
pushGreedyBreak()302 void LineBreaker::pushGreedyBreak() {
303     const Candidate& bestCandidate = mCandidates[mBestBreak];
304     pushBreak(bestCandidate.offset, bestCandidate.postBreak - mPreBreak,
305             mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType));
306     mBestScore = SCORE_INFTY;
307 #if VERBOSE_DEBUG
308     ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
309 #endif
310     mLastBreak = mBestBreak;
311     mPreBreak = bestCandidate.preBreak;
312     mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType);
313 }
314 
315 // TODO performance: could avoid populating mCandidates if greedy only
addCandidate(Candidate cand)316 void LineBreaker::addCandidate(Candidate cand) {
317     const size_t candIndex = mCandidates.size();
318     mCandidates.push_back(cand);
319 
320     // mLastBreak is the index of the last line break we decided to do in mCandidates,
321     // and mPreBreak is its preBreak value. mBestBreak is the index of the best line breaking candidate
322     // we have found since then, and mBestScore is its penalty.
323     if (cand.postBreak - mPreBreak > currentLineWidth()) {
324         // This break would create an overfull line, pick the best break and break there (greedy)
325         if (mBestBreak == mLastBreak) {
326             // No good break has been found since last break. Break here.
327             mBestBreak = candIndex;
328         }
329         pushGreedyBreak();
330     }
331 
332     while (mLastBreak != candIndex && cand.postBreak - mPreBreak > currentLineWidth()) {
333         // We should rarely come here. But if we are here, we have broken the line, but the
334         // remaining part still doesn't fit. We now need to break at the second best place after the
335         // last break, but we have not kept that information, so we need to go back and find it.
336         //
337         // In some really rare cases, postBreak - preBreak of a candidate itself may be over the
338         // current line width. We protect ourselves against an infinite loop in that case by
339         // checking that we have not broken the line at this candidate already.
340         for (size_t i = mLastBreak + 1; i < candIndex; i++) {
341             const float penalty = mCandidates[i].penalty;
342             if (penalty <= mBestScore) {
343                 mBestBreak = i;
344                 mBestScore = penalty;
345             }
346         }
347         if (mBestBreak == mLastBreak) {
348             // We didn't find anything good. Break here.
349             mBestBreak = candIndex;
350         }
351         pushGreedyBreak();
352     }
353 
354     if (cand.penalty <= mBestScore) {
355         mBestBreak = candIndex;
356         mBestScore = cand.penalty;
357     }
358 }
359 
pushBreak(int offset,float width,uint8_t hyphenEdit)360 void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) {
361     mBreaks.push_back(offset);
362     mWidths.push_back(width);
363     int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
364     flags |= hyphenEdit;
365     mFlags.push_back(flags);
366     mFirstTabIndex = INT_MAX;
367 }
368 
addReplacement(size_t start,size_t end,float width)369 void LineBreaker::addReplacement(size_t start, size_t end, float width) {
370     mCharWidths[start] = width;
371     std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
372     addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
373 }
374 
375 // Get the width of a space. May return 0 if there are no spaces.
376 // Note: if there are multiple different widths for spaces (for example, because of mixing of
377 // fonts), it's only guaranteed to pick one.
getSpaceWidth() const378 float LineBreaker::getSpaceWidth() const {
379     for (size_t i = 0; i < mTextBuf.size(); i++) {
380         if (isWordSpace(mTextBuf[i])) {
381             return mCharWidths[i];
382         }
383     }
384     return 0.0f;
385 }
386 
currentLineWidth() const387 float LineBreaker::currentLineWidth() const {
388     return mLineWidths.getLineWidth(mBreaks.size());
389 }
390 
computeBreaksGreedy()391 void LineBreaker::computeBreaksGreedy() {
392     // All breaks but the last have been added in addCandidate already.
393     size_t nCand = mCandidates.size();
394     if (nCand == 1 || mLastBreak != nCand - 1) {
395         pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak,
396                 mLastHyphenation);
397         // don't need to update mBestScore, because we're done
398 #if VERBOSE_DEBUG
399         ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
400 #endif
401     }
402 }
403 
404 // Follow "prev" links in mCandidates array, and copy to result arrays.
finishBreaksOptimal()405 void LineBreaker::finishBreaksOptimal() {
406     // clear existing greedy break result
407     mBreaks.clear();
408     mWidths.clear();
409     mFlags.clear();
410     size_t nCand = mCandidates.size();
411     size_t prev;
412     for (size_t i = nCand - 1; i > 0; i = prev) {
413         prev = mCandidates[i].prev;
414         mBreaks.push_back(mCandidates[i].offset);
415         mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
416         int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType);
417         if (prev > 0) {
418             flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType);
419         }
420         mFlags.push_back(flags);
421     }
422     std::reverse(mBreaks.begin(), mBreaks.end());
423     std::reverse(mWidths.begin(), mWidths.end());
424     std::reverse(mFlags.begin(), mFlags.end());
425 }
426 
computeBreaksOptimal(bool isRectangle)427 void LineBreaker::computeBreaksOptimal(bool isRectangle) {
428     size_t active = 0;
429     size_t nCand = mCandidates.size();
430     float width = mLineWidths.getLineWidth(0);
431     float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f;
432 
433     // "i" iterates through candidates for the end of the line.
434     for (size_t i = 1; i < nCand; i++) {
435         bool atEnd = i == nCand - 1;
436         float best = SCORE_INFTY;
437         size_t bestPrev = 0;
438         size_t lineNumberLast = 0;
439 
440         if (!isRectangle) {
441             size_t lineNumberLast = mCandidates[active].lineNumber;
442             width = mLineWidths.getLineWidth(lineNumberLast);
443         }
444         ParaWidth leftEdge = mCandidates[i].postBreak - width;
445         float bestHope = 0;
446 
447         // "j" iterates through candidates for the beginning of the line.
448         for (size_t j = active; j < i; j++) {
449             if (!isRectangle) {
450                 size_t lineNumber = mCandidates[j].lineNumber;
451                 if (lineNumber != lineNumberLast) {
452                     float widthNew = mLineWidths.getLineWidth(lineNumber);
453                     if (widthNew != width) {
454                         leftEdge = mCandidates[i].postBreak - width;
455                         bestHope = 0;
456                         width = widthNew;
457                     }
458                     lineNumberLast = lineNumber;
459                 }
460             }
461             float jScore = mCandidates[j].score;
462             if (jScore + bestHope >= best) continue;
463             float delta = mCandidates[j].preBreak - leftEdge;
464 
465             // compute width score for line
466 
467             // Note: the "bestHope" optimization makes the assumption that, when delta is
468             // non-negative, widthScore will increase monotonically as successive candidate
469             // breaks are considered.
470             float widthScore = 0.0f;
471             float additionalPenalty = 0.0f;
472             if ((atEnd || !mJustified) && delta < 0) {
473                 widthScore = SCORE_OVERFULL;
474             } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
475                 // increase penalty for hyphen on last line
476                 additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
477             } else {
478                 widthScore = delta * delta;
479                 if (delta < 0) {
480                     if (-delta < maxShrink *
481                             (mCandidates[i].postSpaceCount - mCandidates[j].preSpaceCount)) {
482                         widthScore *= SHRINK_PENALTY_MULTIPLIER;
483                     } else {
484                         widthScore = SCORE_OVERFULL;
485                     }
486                 }
487             }
488 
489             if (delta < 0) {
490                 active = j + 1;
491             } else {
492                 bestHope = widthScore;
493             }
494 
495             float score = jScore + widthScore + additionalPenalty;
496             if (score <= best) {
497                 best = score;
498                 bestPrev = j;
499             }
500         }
501         mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty;
502         mCandidates[i].prev = bestPrev;
503         mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
504 #if VERBOSE_DEBUG
505         ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score, mCandidates[i].prev);
506 #endif
507     }
508     finishBreaksOptimal();
509 }
510 
computeBreaks()511 size_t LineBreaker::computeBreaks() {
512     if (mStrategy == kBreakStrategy_Greedy) {
513         computeBreaksGreedy();
514     } else {
515         computeBreaksOptimal(mLineWidths.isConstant());
516     }
517     return mBreaks.size();
518 }
519 
finish()520 void LineBreaker::finish() {
521     mWordBreaker.finish();
522     mWidth = 0;
523     mLineWidths.clear();
524     mCandidates.clear();
525     mBreaks.clear();
526     mWidths.clear();
527     mFlags.clear();
528     if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
529         mTextBuf.clear();
530         mTextBuf.shrink_to_fit();
531         mCharWidths.clear();
532         mCharWidths.shrink_to_fit();
533         mHyphBuf.clear();
534         mHyphBuf.shrink_to_fit();
535         mCandidates.shrink_to_fit();
536         mBreaks.shrink_to_fit();
537         mWidths.shrink_to_fit();
538         mFlags.shrink_to_fit();
539     }
540     mStrategy = kBreakStrategy_Greedy;
541     mHyphenationFrequency = kHyphenationFrequency_Normal;
542     mLinePenalty = 0.0f;
543     mJustified = false;
544 }
545 
546 }  // namespace minikin
547