1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "text_breaker.h"
17
18 #include <cassert>
19
20 #include <unicode/uchar.h>
21
22 #include "measurer.h"
23 #include "texgine/any_span.h"
24 #include "texgine_exception.h"
25 #include "text_span.h"
26 #include "texgine/utils/exlog.h"
27 #ifdef LOGGER_ENABLE_SCOPE
28 #include "texgine/utils/trace.h"
29 #endif
30 #include "text_converter.h"
31 #include "word_breaker.h"
32
33 namespace OHOS {
34 namespace Rosen {
35 namespace TextEngine {
36 #define CN_LEFT_QUOTE 0x201C
37 #define CN_RIGHT_QUOTE 0x201D
38 #define EN_QUOTE 0x22
39 #define CUSTOM_MAX_WIDTH_LIMIT 1e9
40
SetWidthLimit(const double widthLimit)41 void TextBreaker::SetWidthLimit(const double widthLimit)
42 {
43 widthLimit_ = widthLimit;
44 }
45
SetIndents(const std::vector<float> & indents)46 void TextBreaker::SetIndents(const std::vector<float> &indents)
47 {
48 indents_ = indents;
49 }
50
GetIndent(const int index,const std::vector<float> & indents)51 static double GetIndent(const int index, const std::vector<float> &indents)
52 {
53 double indent = 0.0;
54 if (indents.size() > 0 && index < static_cast<int>(indents.size())) {
55 indent = indents[index];
56 } else {
57 indent = indents.size() > 0 ? indents.back() : 0.0;
58 }
59
60 return indent;
61 }
62
CreateNewBoundary(const CharGroups & cgs,std::vector<Boundary> & boundaries,const TypographyStyle & ys,const double & originWidthLimit,int & index)63 void TextBreaker::CreateNewBoundary(const CharGroups &cgs, std::vector<Boundary> &boundaries,
64 const TypographyStyle &ys, const double& originWidthLimit, int& index)
65 {
66 GenNewBoundryByHardBreak(cgs, boundaries);
67 if (ys.wordBreakType != WordBreakType::NORMAL) {
68 GenNewBoundryByWidth(cgs, boundaries, originWidthLimit, index);
69 }
70 GenNewBoundryByTypeface(cgs, boundaries);
71 GenNewBoundryByQuote(cgs, boundaries);
72 preBreak_ = 0;
73 postBreak_ = 0;
74 }
75
WordBreak(std::vector<VariantSpan> & spans,const TypographyStyle & ys,const std::shared_ptr<FontProviders> & fontProviders)76 int TextBreaker::WordBreak(std::vector<VariantSpan> &spans, const TypographyStyle &ys,
77 const std::shared_ptr<FontProviders> &fontProviders)
78 {
79 #ifdef LOGGER_ENABLE_SCOPE
80 ScopedTrace scope("TextBreaker::WordBreak");
81 #endif
82 std::vector<VariantSpan> visitingSpans;
83 std::swap(visitingSpans, spans);
84 int index = 0;
85 double widthLimit = widthLimit_;
86 for (const auto &vspan : visitingSpans) {
87 auto span = vspan.TryToTextSpan();
88 if (span == nullptr) {
89 widthLimit_ -= GetIndent(index, indents_);
90 spans.push_back(vspan);
91 currentWidth_ += vspan.GetWidth();
92 currentWidth_ = currentWidth_ >= widthLimit_ ? 0 : currentWidth_;
93 widthLimit_ = widthLimit;
94 continue;
95 }
96
97 std::vector<uint16_t> &u16vect = span->u16vect_;
98 if (!u16vect.size()) {
99 continue;
100 }
101 widthLimit_ -= GetIndent(index, indents_);
102 if (ys.ellipsis.length() && ys.maxLines == std::numeric_limits<size_t>::max() &&
103 widthLimit_ != CUSTOM_MAX_WIDTH_LIMIT && widthLimit_ && u16vect.size() > widthLimit_) {
104 u16vect.erase(u16vect.begin()+widthLimit_-1, u16vect.end()); // Textoverflow status
105 }
106 auto xs = vspan.GetTextStyle();
107 auto fontCollection = GenerateFontCollection(ys, xs, fontProviders);
108 if (fontCollection == nullptr) {
109 // WordBreak failed
110 return 1;
111 }
112
113 CharGroups cgs;
114 std::vector<Boundary> boundaries;
115 if (Measure(xs, u16vect, *fontCollection, cgs, boundaries)) {
116 return 1;
117 }
118
119 CreateNewBoundary(cgs, boundaries, ys, widthLimit, index);
120 for (auto &[start, end] : boundaries) {
121 const auto &wordcgs = cgs.GetSubFromU16RangeAll(start, end);
122 BreakWord(wordcgs, ys, xs, spans);
123 }
124 widthLimit_ = widthLimit;
125 index++;
126 }
127 // WordBreak successed
128 return 0;
129 }
130
GenerateFontCollection(const TypographyStyle & ys,const TextStyle & xs,const std::shared_ptr<FontProviders> & fontProviders)131 std::shared_ptr<FontCollection> TextBreaker::GenerateFontCollection(const TypographyStyle &ys,
132 const TextStyle &xs, const std::shared_ptr<FontProviders> &fontProviders) noexcept(false)
133 {
134 LOGSCOPED(sl, LOGEX_FUNC_LINE_DEBUG(), "TextBreaker::GenerateFontCollection");
135 auto families = xs.fontFamilies;
136 if (families.empty()) {
137 families = ys.fontFamilies;
138 }
139
140 if (fontProviders == nullptr) {
141 LOGEX_FUNC_LINE(ERROR) << "fontProviders is nullptr";
142 throw TEXGINE_EXCEPTION(INVALID_ARGUMENT);
143 }
144
145 return fontProviders->GenerateFontCollection(families);
146 }
147
Measure(const TextStyle & xs,const std::vector<uint16_t> & u16vect,const FontCollection & fontCollection,CharGroups & cgs,std::vector<Boundary> & boundaries)148 int TextBreaker::Measure(const TextStyle &xs, const std::vector<uint16_t> &u16vect,
149 const FontCollection &fontCollection, CharGroups &cgs, std::vector<Boundary> &boundaries) noexcept(false)
150 {
151 LOGSCOPED(sl, LOGEX_FUNC_LINE_DEBUG(), "TextBreaker::doMeasure");
152 auto measurer = Measurer::Create(u16vect, fontCollection);
153 if (measurer == nullptr) {
154 LOGEX_FUNC_LINE(ERROR) << "Measurer::Create return nullptr";
155 throw TEXGINE_EXCEPTION(API_FAILED);
156 }
157
158 measurer->SetLocale(xs.locale);
159 measurer->SetRTL(false);
160 measurer->SetSize(xs.fontSize);
161 FontStyles style(xs.fontWeight, xs.fontStyle);
162 measurer->SetFontStyle(style);
163 measurer->SetRange(0, u16vect.size());
164 measurer->SetSpacing(xs.letterSpacing, xs.wordSpacing);
165 auto ret = measurer->Measure(cgs);
166 if (ret != 0) {
167 LOGEX_FUNC_LINE(ERROR) << "Measure failed!";
168 return ret;
169 }
170 boundaries = measurer->GetWordBoundary();
171 if (boundaries.size() == 0) {
172 LOGEX_FUNC_LINE(ERROR) << "Measurer GetWordBoundary failed!";
173 return 1;
174 }
175 return 0;
176 }
177
GenNewBoundryByTypeface(CharGroups cgs,std::vector<Boundary> & boundaries)178 void TextBreaker::GenNewBoundryByTypeface(CharGroups cgs, std::vector<Boundary> &boundaries)
179 {
180 std::vector<Boundary> newBoundary;
181 for (auto &[start, end] : boundaries) {
182 size_t newStart = start;
183 size_t newEnd = start;
184 const auto &wordCgs = cgs.GetSubFromU16RangeAll(start, end);
185 auto typeface = wordCgs.Get(0).typeface;
186 for (auto cg = wordCgs.begin(); cg != wordCgs.end(); cg++) {
187 if (typeface == cg->typeface) {
188 newEnd++;
189 continue;
190 }
191
192 newBoundary.push_back({newStart, newEnd});
193 newStart = newEnd++;
194 typeface = cg->typeface;
195 }
196
197 newBoundary.push_back({newStart, end});
198 }
199
200 boundaries = newBoundary;
201 }
202
IsQuote(const uint16_t c)203 bool TextBreaker::IsQuote(const uint16_t c)
204 {
205 return ((c == EN_QUOTE) || (c == CN_LEFT_QUOTE) || (c == CN_RIGHT_QUOTE));
206 }
207
GenNewBoundryByQuote(CharGroups cgs,std::vector<Boundary> & boundaries)208 void TextBreaker::GenNewBoundryByQuote(CharGroups cgs, std::vector<Boundary> &boundaries)
209 {
210 std::vector<Boundary> newBoundary = {{0, 0}};
211 auto boundary = boundaries.begin();
212 bool isEndQuote = false;
213 for (; boundary < boundaries.end() - 1; boundary++) {
214 const auto &prevWordCgs = cgs.GetSubFromU16RangeAll(boundary->leftIndex, boundary->rightIndex);
215 bool isQuote = IsQuote(prevWordCgs.GetBack().chars[0]);
216 if (isQuote && newBoundary.back().rightIndex == boundary->rightIndex) {
217 isEndQuote = false;
218 }
219
220 if (isQuote && !isEndQuote) {
221 newBoundary.push_back({boundary->leftIndex, boundary->rightIndex - 1});
222 newBoundary.push_back({boundary->rightIndex -1, (boundary + 1)->rightIndex});
223 isEndQuote = true;
224 boundary++;
225 }
226
227 if (isQuote && isEndQuote) {
228 if (newBoundary.back().rightIndex == boundary->rightIndex) {
229 isEndQuote = false;
230 continue;
231 }
232 newBoundary.back().rightIndex += boundary->rightIndex;
233 isEndQuote = false;
234 continue;
235 } else {
236 newBoundary.push_back({boundary->leftIndex, boundary->rightIndex});
237 }
238 }
239
240 if (boundary != boundaries.end()) {
241 newBoundary.push_back({boundary->leftIndex, boundary->rightIndex});
242 }
243 newBoundary.erase(newBoundary.begin());
244 boundaries = newBoundary;
245 }
246
GenNewBoundryByWidth(CharGroups cgs,std::vector<Boundary> & boundaries,const double & originWidthLimit,int & index)247 void TextBreaker::GenNewBoundryByWidth(CharGroups cgs, std::vector<Boundary> &boundaries,
248 const double& originWidthLimit, int& index)
249 {
250 std::vector<Boundary> newBoundary;
251 for (auto &[start, end] : boundaries) {
252 size_t newStart = start;
253 size_t newEnd = start;
254 const auto &wordCgs = cgs.GetSubFromU16RangeAll(start, end);
255 double wordWidth = 0;
256 for (auto &cg : wordCgs) {
257 wordWidth += cg.GetWidth();
258 }
259
260 if (currentWidth_ && currentWidth_ + wordWidth > widthLimit_) {
261 currentWidth_ = 0;
262 ++index;
263 widthLimit_ = originWidthLimit - GetIndent(index, indents_);
264 }
265
266 currentWidth_ += wordCgs.begin()->GetWidth();
267 auto prevCg = wordCgs.begin();
268 for (auto cg = wordCgs.begin() + 1; cg != wordCgs.end(); cg++) {
269 if (currentWidth_ + cg->GetWidth() >= widthLimit_) {
270 newEnd += prevCg->chars.size();
271 newBoundary.push_back({newStart, newEnd});
272 currentWidth_ = cg->GetWidth();
273 newStart = newEnd;
274 ++index;
275 widthLimit_ = originWidthLimit - GetIndent(index, indents_);
276 } else {
277 newEnd += prevCg->chars.size();
278 currentWidth_ += cg->GetWidth();
279 }
280 prevCg = cg;
281 }
282
283 if (newEnd != end) {
284 newBoundary.push_back({newStart, end});
285 }
286 }
287
288 boundaries = newBoundary;
289 }
290
GenNewBoundryByHardBreak(CharGroups cgs,std::vector<Boundary> & boundaries)291 void TextBreaker::GenNewBoundryByHardBreak(CharGroups cgs, std::vector<Boundary> &boundaries)
292 {
293 std::vector<Boundary> newBoundary;
294 for (auto &[start, end] : boundaries) {
295 size_t newStart = start;
296 size_t newEnd = start;
297 const auto &wordCgs = cgs.GetSubFromU16RangeAll(start, end);
298 for (auto cg = wordCgs.begin(); cg != wordCgs.end(); cg++) {
299 if (cg->IsHardBreak() && newStart != newEnd) {
300 newBoundary.push_back({newStart, newEnd});
301 }
302
303 if (cg->IsHardBreak()) {
304 newBoundary.push_back({newEnd, newEnd + cg->chars.size()});
305 newStart = newEnd + cg->chars.size();
306 }
307
308 newEnd += cg->chars.size();
309 }
310
311 if (newStart == start) {
312 newBoundary.push_back({newStart, end});
313 }
314 }
315
316 boundaries = newBoundary;
317 }
318
BreakWord(const CharGroups & wordcgs,const TypographyStyle & ys,const TextStyle & xs,std::vector<VariantSpan> & spans)319 void TextBreaker::BreakWord(const CharGroups &wordcgs, const TypographyStyle &ys,
320 const TextStyle &xs, std::vector<VariantSpan> &spans)
321 {
322 size_t rangeOffset = 0;
323 for (size_t i = 0; i < wordcgs.GetNumberOfCharGroup(); i++) {
324 auto &cg = wordcgs.Get(i);
325 postBreak_ += cg.GetWidth();
326 if (u_isWhitespace(cg.chars[0]) == 0 || cg.IsHardBreak()) {
327 // not white space
328 preBreak_ = postBreak_;
329 }
330
331 const auto &breakType = ys.wordBreakType == WordBreakType::NORMAL ?
332 WordBreakType::BREAK_WORD : ys.wordBreakType;
333 bool isBreakAll = (breakType == WordBreakType::BREAK_ALL);
334 bool isBreakWord = (breakType == WordBreakType::BREAK_WORD);
335 bool isFinalCharGroup = (i == wordcgs.GetNumberOfCharGroup() - 1);
336 bool needGenerateSpan = isBreakAll;
337 needGenerateSpan = needGenerateSpan || (isBreakWord && isFinalCharGroup);
338 if (needGenerateSpan == false) {
339 continue;
340 }
341
342 auto currentCgs = wordcgs.GetSub(rangeOffset, i + 1);
343 GenerateSpan(currentCgs, ys, xs, spans);
344 rangeOffset = i + 1;
345 }
346 }
347
GenerateSpan(const CharGroups & currentCgs,const TypographyStyle & ys,const TextStyle & xs,std::vector<VariantSpan> & spans)348 void TextBreaker::GenerateSpan(const CharGroups ¤tCgs, const TypographyStyle &ys,
349 const TextStyle &xs, std::vector<VariantSpan> &spans)
350 {
351 if (!currentCgs.IsValid() || currentCgs.GetSize() == 0) {
352 throw TEXGINE_EXCEPTION(INVALID_ARGUMENT);
353 }
354
355 LOGEX_FUNC_LINE_DEBUG(Logger::SetToNoReturn) << "AddWord " << spans.size()
356 << " " << currentCgs.GetRange() << ": \033[40m'";
357 LOGCEX_DEBUG() << TextConverter::ToStr(currentCgs.ToUTF16()) << "'\033[0m";
358 auto newSpan = std::make_shared<TextSpan>();
359 newSpan->cgs_ = currentCgs;
360 newSpan->postBreak_ = postBreak_;
361 newSpan->preBreak_ = preBreak_;
362 newSpan->typeface_ = currentCgs.Get(0).typeface;
363 double spanWidth = 0.0;
364 for (const auto &cg : currentCgs) {
365 spanWidth += cg.GetWidth();
366 }
367 newSpan->width_ = spanWidth;
368 VariantSpan vs(newSpan);
369 vs.SetTextStyle(xs);
370 spans.push_back(vs);
371 }
372 } // namespace TextEngine
373 } // namespace Rosen
374 } // namespace OHOS
375