• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "text_breaker.h"
17 
18 #include <cassert>
19 
20 #include <unicode/uchar.h>
21 
22 #include "measurer.h"
23 #include "texgine/any_span.h"
24 #include "texgine_exception.h"
25 #include "text_span.h"
26 #include "texgine/utils/exlog.h"
27 #ifdef LOGGER_ENABLE_SCOPE
28 #include "texgine/utils/trace.h"
29 #endif
30 #include "text_converter.h"
31 #include "word_breaker.h"
32 
33 namespace OHOS {
34 namespace Rosen {
35 namespace TextEngine {
36 #define CN_LEFT_QUOTE 0x201C
37 #define CN_RIGHT_QUOTE 0x201D
38 #define EN_QUOTE 0x22
39 #define CUSTOM_MAX_WIDTH_LIMIT 1e9
40 
SetWidthLimit(const double widthLimit)41 void TextBreaker::SetWidthLimit(const double widthLimit)
42 {
43     widthLimit_ = widthLimit;
44 }
45 
SetIndents(const std::vector<float> & indents)46 void TextBreaker::SetIndents(const std::vector<float> &indents)
47 {
48     indents_ = indents;
49 }
50 
GetIndent(const int index,const std::vector<float> & indents)51 static double GetIndent(const int index, const std::vector<float> &indents)
52 {
53     double indent = 0.0;
54     if (indents.size() > 0 && index < static_cast<int>(indents.size())) {
55         indent = indents[index];
56     } else {
57         indent = indents.size() > 0 ? indents.back() : 0.0;
58     }
59 
60     return indent;
61 }
62 
CreateNewBoundary(const CharGroups & cgs,std::vector<Boundary> & boundaries,const TypographyStyle & ys,const double & originWidthLimit,int & index)63 void TextBreaker::CreateNewBoundary(const CharGroups &cgs, std::vector<Boundary> &boundaries,
64     const TypographyStyle &ys, const double& originWidthLimit, int& index)
65 {
66     GenNewBoundryByHardBreak(cgs, boundaries);
67     if (ys.wordBreakType != WordBreakType::NORMAL) {
68         GenNewBoundryByWidth(cgs, boundaries, originWidthLimit, index);
69     }
70     GenNewBoundryByTypeface(cgs, boundaries);
71     GenNewBoundryByQuote(cgs, boundaries);
72     preBreak_ = 0;
73     postBreak_ = 0;
74 }
75 
WordBreak(std::vector<VariantSpan> & spans,const TypographyStyle & ys,const std::shared_ptr<FontProviders> & fontProviders)76 int TextBreaker::WordBreak(std::vector<VariantSpan> &spans, const TypographyStyle &ys,
77     const std::shared_ptr<FontProviders> &fontProviders)
78 {
79 #ifdef LOGGER_ENABLE_SCOPE
80     ScopedTrace scope("TextBreaker::WordBreak");
81 #endif
82     std::vector<VariantSpan> visitingSpans;
83     std::swap(visitingSpans, spans);
84     int index = 0;
85     double widthLimit = widthLimit_;
86     for (const auto &vspan : visitingSpans) {
87         auto span = vspan.TryToTextSpan();
88         if (span == nullptr) {
89             widthLimit_ -= GetIndent(index, indents_);
90             spans.push_back(vspan);
91             currentWidth_ += vspan.GetWidth();
92             currentWidth_ = currentWidth_ >= widthLimit_ ? 0 : currentWidth_;
93             widthLimit_ = widthLimit;
94             continue;
95         }
96 
97         std::vector<uint16_t> &u16vect = span->u16vect_;
98         if (!u16vect.size()) {
99             continue;
100         }
101         widthLimit_ -= GetIndent(index, indents_);
102         if (ys.ellipsis.length() && ys.maxLines == std::numeric_limits<size_t>::max() &&
103             widthLimit_ != CUSTOM_MAX_WIDTH_LIMIT && widthLimit_ && u16vect.size() > widthLimit_) {
104             u16vect.erase(u16vect.begin()+widthLimit_-1, u16vect.end()); // Textoverflow status
105         }
106         auto xs = vspan.GetTextStyle();
107         auto fontCollection = GenerateFontCollection(ys, xs, fontProviders);
108         if (fontCollection == nullptr) {
109             // WordBreak failed
110             return 1;
111         }
112 
113         CharGroups cgs;
114         std::vector<Boundary> boundaries;
115         if (Measure(xs, u16vect, *fontCollection, cgs, boundaries)) {
116             return 1;
117         }
118 
119         CreateNewBoundary(cgs, boundaries, ys, widthLimit, index);
120         for (auto &[start, end] : boundaries) {
121             const auto &wordcgs = cgs.GetSubFromU16RangeAll(start, end);
122             BreakWord(wordcgs, ys, xs, spans);
123         }
124         widthLimit_ = widthLimit;
125         index++;
126     }
127     // WordBreak successed
128     return 0;
129 }
130 
GenerateFontCollection(const TypographyStyle & ys,const TextStyle & xs,const std::shared_ptr<FontProviders> & fontProviders)131 std::shared_ptr<FontCollection> TextBreaker::GenerateFontCollection(const TypographyStyle &ys,
132     const TextStyle &xs, const std::shared_ptr<FontProviders> &fontProviders) noexcept(false)
133 {
134     LOGSCOPED(sl, LOGEX_FUNC_LINE_DEBUG(), "TextBreaker::GenerateFontCollection");
135     auto families = xs.fontFamilies;
136     if (families.empty()) {
137         families = ys.fontFamilies;
138     }
139 
140     if (fontProviders == nullptr) {
141         LOGEX_FUNC_LINE(ERROR) << "fontProviders is nullptr";
142         throw TEXGINE_EXCEPTION(INVALID_ARGUMENT);
143     }
144 
145     return fontProviders->GenerateFontCollection(families);
146 }
147 
Measure(const TextStyle & xs,const std::vector<uint16_t> & u16vect,const FontCollection & fontCollection,CharGroups & cgs,std::vector<Boundary> & boundaries)148 int TextBreaker::Measure(const TextStyle &xs, const std::vector<uint16_t> &u16vect,
149     const FontCollection &fontCollection, CharGroups &cgs, std::vector<Boundary> &boundaries) noexcept(false)
150 {
151     LOGSCOPED(sl, LOGEX_FUNC_LINE_DEBUG(), "TextBreaker::doMeasure");
152     auto measurer = Measurer::Create(u16vect, fontCollection);
153     if (measurer == nullptr) {
154         LOGEX_FUNC_LINE(ERROR) << "Measurer::Create return nullptr";
155         throw TEXGINE_EXCEPTION(API_FAILED);
156     }
157 
158     measurer->SetLocale(xs.locale);
159     measurer->SetRTL(false);
160     measurer->SetSize(xs.fontSize);
161     FontStyles style(xs.fontWeight, xs.fontStyle);
162     measurer->SetFontStyle(style);
163     measurer->SetRange(0, u16vect.size());
164     measurer->SetSpacing(xs.letterSpacing, xs.wordSpacing);
165     auto ret = measurer->Measure(cgs);
166     if (ret != 0) {
167         LOGEX_FUNC_LINE(ERROR) << "Measure failed!";
168         return ret;
169     }
170     boundaries = measurer->GetWordBoundary();
171     if (boundaries.size() == 0) {
172         LOGEX_FUNC_LINE(ERROR) << "Measurer GetWordBoundary failed!";
173         return 1;
174     }
175     return 0;
176 }
177 
GenNewBoundryByTypeface(CharGroups cgs,std::vector<Boundary> & boundaries)178 void TextBreaker::GenNewBoundryByTypeface(CharGroups cgs, std::vector<Boundary> &boundaries)
179 {
180     std::vector<Boundary> newBoundary;
181     for (auto &[start, end] : boundaries) {
182         size_t newStart = start;
183         size_t newEnd = start;
184         const auto &wordCgs = cgs.GetSubFromU16RangeAll(start, end);
185         auto typeface = wordCgs.Get(0).typeface;
186         for (auto cg = wordCgs.begin(); cg != wordCgs.end(); cg++) {
187             if (typeface == cg->typeface) {
188                 newEnd++;
189                 continue;
190             }
191 
192             newBoundary.push_back({newStart, newEnd});
193             newStart = newEnd++;
194             typeface = cg->typeface;
195         }
196 
197         newBoundary.push_back({newStart, end});
198     }
199 
200     boundaries = newBoundary;
201 }
202 
IsQuote(const uint16_t c)203 bool TextBreaker::IsQuote(const uint16_t c)
204 {
205     return ((c == EN_QUOTE) || (c == CN_LEFT_QUOTE) || (c == CN_RIGHT_QUOTE));
206 }
207 
GenNewBoundryByQuote(CharGroups cgs,std::vector<Boundary> & boundaries)208 void TextBreaker::GenNewBoundryByQuote(CharGroups cgs, std::vector<Boundary> &boundaries)
209 {
210     std::vector<Boundary> newBoundary = {{0, 0}};
211     auto boundary = boundaries.begin();
212     bool isEndQuote = false;
213     for (; boundary < boundaries.end() - 1; boundary++) {
214         const auto &prevWordCgs = cgs.GetSubFromU16RangeAll(boundary->leftIndex, boundary->rightIndex);
215         bool isQuote = IsQuote(prevWordCgs.GetBack().chars[0]);
216         if (isQuote && newBoundary.back().rightIndex == boundary->rightIndex) {
217             isEndQuote = false;
218         }
219 
220         if (isQuote && !isEndQuote) {
221             newBoundary.push_back({boundary->leftIndex, boundary->rightIndex - 1});
222             newBoundary.push_back({boundary->rightIndex -1, (boundary + 1)->rightIndex});
223             isEndQuote = true;
224             boundary++;
225         }
226 
227         if (isQuote && isEndQuote) {
228             if (newBoundary.back().rightIndex == boundary->rightIndex) {
229                 isEndQuote = false;
230                 continue;
231             }
232             newBoundary.back().rightIndex += boundary->rightIndex;
233             isEndQuote = false;
234             continue;
235         } else {
236             newBoundary.push_back({boundary->leftIndex, boundary->rightIndex});
237         }
238     }
239 
240     if (boundary != boundaries.end()) {
241         newBoundary.push_back({boundary->leftIndex, boundary->rightIndex});
242     }
243     newBoundary.erase(newBoundary.begin());
244     boundaries = newBoundary;
245 }
246 
GenNewBoundryByWidth(CharGroups cgs,std::vector<Boundary> & boundaries,const double & originWidthLimit,int & index)247 void TextBreaker::GenNewBoundryByWidth(CharGroups cgs, std::vector<Boundary> &boundaries,
248     const double& originWidthLimit, int& index)
249 {
250     std::vector<Boundary> newBoundary;
251     for (auto &[start, end] : boundaries) {
252         size_t newStart = start;
253         size_t newEnd = start;
254         const auto &wordCgs = cgs.GetSubFromU16RangeAll(start, end);
255         double wordWidth = 0;
256         for (auto &cg : wordCgs) {
257             wordWidth += cg.GetWidth();
258         }
259 
260         if (currentWidth_ && currentWidth_ + wordWidth > widthLimit_) {
261             currentWidth_ = 0;
262             ++index;
263             widthLimit_ = originWidthLimit - GetIndent(index, indents_);
264         }
265 
266         currentWidth_ += wordCgs.begin()->GetWidth();
267         auto prevCg = wordCgs.begin();
268         for (auto cg = wordCgs.begin() + 1; cg != wordCgs.end(); cg++) {
269             if (currentWidth_ + cg->GetWidth() >= widthLimit_) {
270                 newEnd += prevCg->chars.size();
271                 newBoundary.push_back({newStart, newEnd});
272                 currentWidth_ = cg->GetWidth();
273                 newStart = newEnd;
274                 ++index;
275                 widthLimit_ = originWidthLimit - GetIndent(index, indents_);
276             } else {
277                 newEnd += prevCg->chars.size();
278                 currentWidth_ += cg->GetWidth();
279             }
280             prevCg = cg;
281         }
282 
283         if (newEnd != end) {
284             newBoundary.push_back({newStart, end});
285         }
286     }
287 
288     boundaries = newBoundary;
289 }
290 
GenNewBoundryByHardBreak(CharGroups cgs,std::vector<Boundary> & boundaries)291 void TextBreaker::GenNewBoundryByHardBreak(CharGroups cgs, std::vector<Boundary> &boundaries)
292 {
293     std::vector<Boundary> newBoundary;
294     for (auto &[start, end] : boundaries) {
295         size_t newStart = start;
296         size_t newEnd = start;
297         const auto &wordCgs = cgs.GetSubFromU16RangeAll(start, end);
298         for (auto cg = wordCgs.begin(); cg != wordCgs.end(); cg++) {
299             if (cg->IsHardBreak() && newStart != newEnd) {
300                 newBoundary.push_back({newStart, newEnd});
301             }
302 
303             if (cg->IsHardBreak()) {
304                 newBoundary.push_back({newEnd, newEnd + cg->chars.size()});
305                 newStart = newEnd + cg->chars.size();
306             }
307 
308             newEnd += cg->chars.size();
309         }
310 
311         if (newStart == start) {
312             newBoundary.push_back({newStart, end});
313         }
314     }
315 
316     boundaries = newBoundary;
317 }
318 
BreakWord(const CharGroups & wordcgs,const TypographyStyle & ys,const TextStyle & xs,std::vector<VariantSpan> & spans)319 void TextBreaker::BreakWord(const CharGroups &wordcgs, const TypographyStyle &ys,
320     const TextStyle &xs, std::vector<VariantSpan> &spans)
321 {
322     size_t rangeOffset = 0;
323     for (size_t i = 0; i < wordcgs.GetNumberOfCharGroup(); i++) {
324         auto &cg = wordcgs.Get(i);
325         postBreak_ += cg.GetWidth();
326         if (u_isWhitespace(cg.chars[0]) == 0 || cg.IsHardBreak()) {
327             // not white space
328             preBreak_ = postBreak_;
329         }
330 
331         const auto &breakType = ys.wordBreakType == WordBreakType::NORMAL ?
332             WordBreakType::BREAK_WORD : ys.wordBreakType;
333         bool isBreakAll = (breakType == WordBreakType::BREAK_ALL);
334         bool isBreakWord = (breakType == WordBreakType::BREAK_WORD);
335         bool isFinalCharGroup = (i == wordcgs.GetNumberOfCharGroup() - 1);
336         bool needGenerateSpan = isBreakAll;
337         needGenerateSpan = needGenerateSpan || (isBreakWord && isFinalCharGroup);
338         if (needGenerateSpan == false) {
339             continue;
340         }
341 
342         auto currentCgs = wordcgs.GetSub(rangeOffset, i + 1);
343         GenerateSpan(currentCgs, ys, xs, spans);
344         rangeOffset = i + 1;
345     }
346 }
347 
GenerateSpan(const CharGroups & currentCgs,const TypographyStyle & ys,const TextStyle & xs,std::vector<VariantSpan> & spans)348 void TextBreaker::GenerateSpan(const CharGroups &currentCgs, const TypographyStyle &ys,
349     const TextStyle &xs, std::vector<VariantSpan> &spans)
350 {
351     if (!currentCgs.IsValid() || currentCgs.GetSize() == 0) {
352         throw TEXGINE_EXCEPTION(INVALID_ARGUMENT);
353     }
354 
355     LOGEX_FUNC_LINE_DEBUG(Logger::SetToNoReturn) << "AddWord " << spans.size()
356         << " " << currentCgs.GetRange() << ": \033[40m'";
357     LOGCEX_DEBUG() << TextConverter::ToStr(currentCgs.ToUTF16()) << "'\033[0m";
358     auto newSpan = std::make_shared<TextSpan>();
359     newSpan->cgs_ = currentCgs;
360     newSpan->postBreak_ = postBreak_;
361     newSpan->preBreak_ = preBreak_;
362     newSpan->typeface_ = currentCgs.Get(0).typeface;
363     double spanWidth = 0.0;
364     for (const auto &cg : currentCgs) {
365         spanWidth += cg.GetWidth();
366     }
367     newSpan->width_ = spanWidth;
368     VariantSpan vs(newSpan);
369     vs.SetTextStyle(xs);
370     spans.push_back(vs);
371 }
372 } // namespace TextEngine
373 } // namespace Rosen
374 } // namespace OHOS
375