1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "GreedyLineBreak"
18
19 #include "minikin/Characters.h"
20 #include "minikin/LineBreaker.h"
21 #include "minikin/MeasuredText.h"
22 #include "minikin/Range.h"
23 #include "minikin/U16StringPiece.h"
24
25 #include "HyphenatorMap.h"
26 #include "LineBreakerUtil.h"
27 #include "Locale.h"
28 #include "LocaleListCache.h"
29 #include "WordBreaker.h"
30
31 namespace minikin {
32
33 namespace {
34
35 constexpr uint32_t NOWHERE = 0xFFFFFFFF;
36
37 class GreedyLineBreaker {
38 public:
39 // User of this class must keep measured, lineWidthLimit, tabStop alive until the instance is
40 // destructed.
GreedyLineBreaker(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidthLimits,const TabStops & tabStops,bool enableHyphenation)41 GreedyLineBreaker(const U16StringPiece& textBuf, const MeasuredText& measured,
42 const LineWidth& lineWidthLimits, const TabStops& tabStops,
43 bool enableHyphenation)
44 : mLineWidthLimit(lineWidthLimits.getAt(0)),
45 mTextBuf(textBuf),
46 mMeasuredText(measured),
47 mLineWidthLimits(lineWidthLimits),
48 mTabStops(tabStops),
49 mEnableHyphenation(enableHyphenation) {}
50
51 void process();
52
53 LineBreakResult getResult() const;
54
55 private:
56 struct BreakPoint {
BreakPointminikin::__anon9c931a620111::GreedyLineBreaker::BreakPoint57 BreakPoint(uint32_t offset, float lineWidth, StartHyphenEdit startHyphen,
58 EndHyphenEdit endHyphen)
59 : offset(offset),
60 lineWidth(lineWidth),
61 hyphenEdit(packHyphenEdit(startHyphen, endHyphen)) {}
62
63 uint32_t offset;
64 float lineWidth;
65 HyphenEdit hyphenEdit;
66 };
67
getPrevLineBreakOffset()68 inline uint32_t getPrevLineBreakOffset() {
69 return mBreakPoints.empty() ? 0 : mBreakPoints.back().offset;
70 }
71
72 // Registers the break point and prepares for next line computation.
73 void breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
74 float remainingNextSumOfCharWidths, EndHyphenEdit thisLineEndHyphen,
75 StartHyphenEdit nextLineStartHyphen);
76
77 // Update current line width.
78 void updateLineWidth(uint16_t c, float width);
79
80 // Break line if current line exceeds the line limit.
81 void processLineBreak(uint32_t offset, WordBreaker* breaker, bool doHyphenation);
82
83 // Try to break with previous word boundary.
84 // Returns false if unable to break by word boundary.
85 bool tryLineBreakWithWordBreak();
86
87 // Try to break with hyphenation.
88 // Returns false if unable to hyphenate.
89 //
90 // This method keeps hyphenation until the line width after line break meets the line width
91 // limit.
92 bool tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker);
93
94 // Do line break with each characters.
95 //
96 // This method only breaks at the first offset which has the longest width for the line width
97 // limit. This method don't keep line breaking even if the rest of the word exceeds the line
98 // width limit.
99 // This method return true if there is no characters to be processed.
100 bool doLineBreakWithGraphemeBounds(const Range& range);
101
102 // Info about the line currently processing.
103 uint32_t mLineNum = 0;
104 double mLineWidth = 0;
105 double mSumOfCharWidths = 0;
106 double mLineWidthLimit;
107 StartHyphenEdit mStartHyphenEdit = StartHyphenEdit::NO_EDIT;
108
109 // Previous word break point info.
110 uint32_t mPrevWordBoundsOffset = NOWHERE;
111 double mLineWidthAtPrevWordBoundary = 0;
112 double mSumOfCharWidthsAtPrevWordBoundary = 0;
113 bool mIsPrevWordBreakIsInEmailOrUrl = false;
114
115 // The hyphenator currently used.
116 const Hyphenator* mHyphenator = nullptr;
117
118 // Input parameters.
119 const U16StringPiece& mTextBuf;
120 const MeasuredText& mMeasuredText;
121 const LineWidth& mLineWidthLimits;
122 const TabStops& mTabStops;
123 bool mEnableHyphenation;
124
125 // The result of line breaking.
126 std::vector<BreakPoint> mBreakPoints;
127
128 MINIKIN_PREVENT_COPY_ASSIGN_AND_MOVE(GreedyLineBreaker);
129 };
130
breakLineAt(uint32_t offset,float lineWidth,float remainingNextLineWidth,float remainingNextSumOfCharWidths,EndHyphenEdit thisLineEndHyphen,StartHyphenEdit nextLineStartHyphen)131 void GreedyLineBreaker::breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
132 float remainingNextSumOfCharWidths,
133 EndHyphenEdit thisLineEndHyphen,
134 StartHyphenEdit nextLineStartHyphen) {
135 // First, push the break to result.
136 mBreakPoints.emplace_back(offset, lineWidth, mStartHyphenEdit, thisLineEndHyphen);
137
138 // Update the current line info.
139 mLineWidthLimit = mLineWidthLimits.getAt(++mLineNum);
140 mLineWidth = remainingNextLineWidth;
141 mSumOfCharWidths = remainingNextSumOfCharWidths;
142 mStartHyphenEdit = nextLineStartHyphen;
143 mPrevWordBoundsOffset = NOWHERE;
144 mLineWidthAtPrevWordBoundary = 0;
145 mSumOfCharWidthsAtPrevWordBoundary = 0;
146 mIsPrevWordBreakIsInEmailOrUrl = false;
147 }
148
tryLineBreakWithWordBreak()149 bool GreedyLineBreaker::tryLineBreakWithWordBreak() {
150 if (mPrevWordBoundsOffset == NOWHERE) {
151 return false; // No word break point before..
152 }
153
154 breakLineAt(mPrevWordBoundsOffset, // break offset
155 mLineWidthAtPrevWordBoundary, // line width
156 mLineWidth - mSumOfCharWidthsAtPrevWordBoundary, // remaining next line width
157 // remaining next sum of char widths.
158 mSumOfCharWidths - mSumOfCharWidthsAtPrevWordBoundary, EndHyphenEdit::NO_EDIT,
159 StartHyphenEdit::NO_EDIT); // No hyphen modification.
160 return true;
161 }
162
tryLineBreakWithHyphenation(const Range & range,WordBreaker * breaker)163 bool GreedyLineBreaker::tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker) {
164 if (!mEnableHyphenation || mHyphenator == nullptr) {
165 return false;
166 }
167
168 Run* targetRun = nullptr;
169 for (const auto& run : mMeasuredText.runs) {
170 if (run->getRange().contains(range)) {
171 targetRun = run.get();
172 }
173 }
174
175 if (targetRun == nullptr) {
176 return false; // The target range may lay on multiple run. Unable to hyphenate.
177 }
178
179 const Range targetRange = breaker->wordRange();
180 if (!range.contains(targetRange)) {
181 return false;
182 }
183
184 const std::vector<HyphenationType> hyphenResult =
185 hyphenate(mTextBuf.substr(targetRange), *mHyphenator);
186 Range contextRange = range;
187 uint32_t prevOffset = NOWHERE;
188 float prevWidth = 0;
189
190 // Look up the hyphenation point from the begining.
191 for (uint32_t i = targetRange.getStart(); i < targetRange.getEnd(); ++i) {
192 const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(i)];
193 if (hyph == HyphenationType::DONT_BREAK) {
194 continue; // Not a hyphenation point.
195 }
196
197 const float width =
198 targetRun->measureHyphenPiece(mTextBuf, contextRange.split(i).first,
199 mStartHyphenEdit, editForThisLine(hyph), nullptr);
200
201 if (width <= mLineWidthLimit) {
202 // There are still space, remember current offset and look up next hyphenation point.
203 prevOffset = i;
204 prevWidth = width;
205 continue;
206 }
207
208 if (prevOffset == NOWHERE) {
209 // Even with hyphenation, the piece is too long for line. Give up and break in
210 // character bounds.
211 doLineBreakWithGraphemeBounds(contextRange);
212 } else {
213 // Previous offset is the longest hyphenation piece. Break with it.
214 const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
215 const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
216 const float remainingCharWidths = targetRun->measureHyphenPiece(
217 mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
218 EndHyphenEdit::NO_EDIT, nullptr);
219 breakLineAt(prevOffset, prevWidth,
220 remainingCharWidths - (mSumOfCharWidths - mLineWidth), remainingCharWidths,
221 editForThisLine(hyph), nextLineStartHyphenEdit);
222 }
223
224 if (mLineWidth <= mLineWidthLimit) {
225 // The remaining hyphenation piece is less than line width. No more hyphenation is
226 // needed. Go to next word.
227 return true;
228 }
229
230 // Even after line break, the remaining hyphenation piece is still too long for the limit.
231 // Keep hyphenating for the rest.
232 i = getPrevLineBreakOffset();
233 contextRange.setStart(i); // Update the hyphenation start point.
234 prevOffset = NOWHERE;
235 }
236
237 // Do the same line break at the end of text.
238 // TODO: Remove code duplication. This is the same as in the for loop but extracting function
239 // may not clear.
240 if (prevOffset == NOWHERE) {
241 doLineBreakWithGraphemeBounds(contextRange);
242 } else {
243 const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
244 const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
245 const float remainingCharWidths = targetRun->measureHyphenPiece(
246 mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
247 EndHyphenEdit::NO_EDIT, nullptr);
248
249 breakLineAt(prevOffset, prevWidth, remainingCharWidths - (mSumOfCharWidths - mLineWidth),
250 remainingCharWidths, editForThisLine(hyph), nextLineStartHyphenEdit);
251 }
252
253 return true;
254 }
255
256 // TODO: Respect trailing line end spaces.
doLineBreakWithGraphemeBounds(const Range & range)257 bool GreedyLineBreaker::doLineBreakWithGraphemeBounds(const Range& range) {
258 double width = mMeasuredText.widths[range.getStart()];
259
260 // Starting from + 1 since at least one character needs to be assigned to a line.
261 for (uint32_t i = range.getStart() + 1; i < range.getEnd(); ++i) {
262 const float w = mMeasuredText.widths[i];
263 if (w == 0) {
264 continue; // w == 0 means here is not a grapheme bounds. Don't break here.
265 }
266 if (width + w > mLineWidthLimit) {
267 // Okay, here is the longest position.
268 breakLineAt(i, width, mLineWidth - width, mSumOfCharWidths - width,
269 EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
270
271 // This method only breaks at the first longest offset, since we may want to hyphenate
272 // the rest of the word.
273 return false;
274 } else {
275 width += w;
276 }
277 }
278
279 // Reaching here means even one character (or cluster) doesn't fit the line.
280 // Give up and break at the end of this range.
281 breakLineAt(range.getEnd(), mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
282 return true;
283 }
284
updateLineWidth(uint16_t c,float width)285 void GreedyLineBreaker::updateLineWidth(uint16_t c, float width) {
286 if (c == CHAR_TAB) {
287 mSumOfCharWidths = mTabStops.nextTab(mSumOfCharWidths);
288 mLineWidth = mSumOfCharWidths;
289 } else {
290 mSumOfCharWidths += width;
291 if (!isLineEndSpace(c)) {
292 mLineWidth = mSumOfCharWidths;
293 }
294 }
295 }
296
processLineBreak(uint32_t offset,WordBreaker * breaker,bool doHyphenation)297 void GreedyLineBreaker::processLineBreak(uint32_t offset, WordBreaker* breaker,
298 bool doHyphenation) {
299 while (mLineWidth > mLineWidthLimit) {
300 const Range lineRange(getPrevLineBreakOffset(), offset); // The range we need to address.
301 if (tryLineBreakWithWordBreak()) {
302 continue; // The word in the new line may still be too long for the line limit.
303 } else if (doHyphenation && tryLineBreakWithHyphenation(lineRange, breaker)) {
304 continue; // TODO: we may be able to return here.
305 } else {
306 if (doLineBreakWithGraphemeBounds(lineRange)) {
307 return;
308 }
309 }
310 }
311
312 // There is still spaces, remember current word break point as a candidate and wait next word.
313 const bool isInEmailOrUrl = breaker->breakBadness() != 0;
314 if (mPrevWordBoundsOffset == NOWHERE || mIsPrevWordBreakIsInEmailOrUrl | !isInEmailOrUrl) {
315 mPrevWordBoundsOffset = offset;
316 mLineWidthAtPrevWordBoundary = mLineWidth;
317 mSumOfCharWidthsAtPrevWordBoundary = mSumOfCharWidths;
318 mIsPrevWordBreakIsInEmailOrUrl = isInEmailOrUrl;
319 }
320 }
321
process()322 void GreedyLineBreaker::process() {
323 WordBreaker wordBreaker;
324 wordBreaker.setText(mTextBuf.data(), mTextBuf.size());
325
326 // Following two will be initialized after the first iteration.
327 uint32_t localeListId = LocaleListCache::kInvalidListId;
328 uint32_t nextWordBoundaryOffset = 0;
329 for (const auto& run : mMeasuredText.runs) {
330 const Range range = run->getRange();
331
332 // Update locale if necessary.
333 uint32_t newLocaleListId = run->getLocaleListId();
334 if (localeListId != newLocaleListId) {
335 Locale locale = getEffectiveLocale(newLocaleListId);
336 nextWordBoundaryOffset = wordBreaker.followingWithLocale(locale, range.getStart());
337 mHyphenator = HyphenatorMap::lookup(locale);
338 localeListId = newLocaleListId;
339 }
340
341 for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
342 updateLineWidth(mTextBuf[i], mMeasuredText.widths[i]);
343
344 if ((i + 1) == nextWordBoundaryOffset) {
345 // Only process line break at word boundary and the run can break into some pieces.
346 if (run->canBreak() || nextWordBoundaryOffset == range.getEnd()) {
347 processLineBreak(i + 1, &wordBreaker, run->canBreak());
348 }
349 nextWordBoundaryOffset = wordBreaker.next();
350 }
351 }
352 }
353
354 if (getPrevLineBreakOffset() != mTextBuf.size() && mPrevWordBoundsOffset != NOWHERE) {
355 // The remaining words in the last line.
356 breakLineAt(mPrevWordBoundsOffset, mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT,
357 StartHyphenEdit::NO_EDIT);
358 }
359 }
360
getResult() const361 LineBreakResult GreedyLineBreaker::getResult() const {
362 constexpr int TAB_BIT = 1 << 29; // Must be the same in StaticLayout.java
363
364 LineBreakResult out;
365 uint32_t prevBreakOffset = 0;
366 for (const auto& breakPoint : mBreakPoints) {
367 // TODO: compute these during line breaking if these takes longer time.
368 bool hasTabChar = false;
369 for (uint32_t i = prevBreakOffset; i < breakPoint.offset; ++i) {
370 hasTabChar |= mTextBuf[i] == CHAR_TAB;
371 }
372
373 MinikinExtent extent =
374 mMeasuredText.getExtent(mTextBuf, Range(prevBreakOffset, breakPoint.offset));
375 out.breakPoints.push_back(breakPoint.offset);
376 out.widths.push_back(breakPoint.lineWidth);
377 out.ascents.push_back(extent.ascent);
378 out.descents.push_back(extent.descent);
379 out.flags.push_back((hasTabChar ? TAB_BIT : 0) | static_cast<int>(breakPoint.hyphenEdit));
380
381 prevBreakOffset = breakPoint.offset;
382 }
383 return out;
384 }
385
386 } // namespace
387
breakLineGreedy(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidthLimits,const TabStops & tabStops,bool enableHyphenation)388 LineBreakResult breakLineGreedy(const U16StringPiece& textBuf, const MeasuredText& measured,
389 const LineWidth& lineWidthLimits, const TabStops& tabStops,
390 bool enableHyphenation) {
391 if (textBuf.size() == 0) {
392 return LineBreakResult();
393 }
394 GreedyLineBreaker lineBreaker(textBuf, measured, lineWidthLimits, tabStops, enableHyphenation);
395 lineBreaker.process();
396 return lineBreaker.getResult();
397 }
398
399 } // namespace minikin
400