1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINIKIN_LINE_BREAKER_UTIL_H
18 #define MINIKIN_LINE_BREAKER_UTIL_H
19 
20 #include <vector>
21 
22 #include "minikin/Hyphenator.h"
23 #include "minikin/MeasuredText.h"
24 #include "minikin/U16StringPiece.h"
25 
26 #include "HyphenatorMap.h"
27 #include "LayoutUtils.h"
28 #include "Locale.h"
29 #include "LocaleListCache.h"
30 #include "MinikinInternal.h"
31 #include "WordBreaker.h"
32 
33 namespace minikin {
34 
35 // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for very large
36 // paragraphs, accuracy could degrade using only 32-bit float. Note however that float is used
37 // extensively on the Java side for this. This is a typedef so that we can easily change it based
38 // on performance/accuracy tradeoff.
39 typedef float ParaWidth;
40 
41 // Hyphenates a string potentially containing non-breaking spaces.
42 std::vector<HyphenationType> hyphenate(const U16StringPiece& string, const Hyphenator& hypenator);
43 
44 // This function determines whether a character is a space that disappears at end of line.
45 // It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], plus '\n'.
46 // Note: all such characters are in the BMP, so it's ok to use code units for this.
isLineEndSpace(uint16_t c)47 inline bool isLineEndSpace(uint16_t c) {
48     return c == '\n' || c == ' '                           // SPACE
49            || c == 0x1680                                  // OGHAM SPACE MARK
50            || (0x2000 <= c && c <= 0x200A && c != 0x2007)  // EN QUAD, EM QUAD, EN SPACE, EM SPACE,
51            // THREE-PER-EM SPACE, FOUR-PER-EM SPACE,
52            // SIX-PER-EM SPACE, PUNCTUATION SPACE,
53            // THIN SPACE, HAIR SPACE
54            || c == 0x205F  // MEDIUM MATHEMATICAL SPACE
55            || c == 0x3000;
56 }
57 
getEffectiveLocale(uint32_t localeListId)58 inline Locale getEffectiveLocale(uint32_t localeListId) {
59     const LocaleList& localeList = LocaleListCache::getById(localeListId);
60     return localeList.empty() ? Locale() : localeList[0];
61 }
62 
63 // Retrieves hyphenation break points from a word.
populateHyphenationPoints(const U16StringPiece & textBuf,const Run & run,const Hyphenator & hyphenator,const Range & contextRange,const Range & hyphenationTargetRange,const std::vector<float> & charWidths,bool ignoreKerning,std::vector<HyphenBreak> * out,LayoutPieces * pieces)64 inline void populateHyphenationPoints(
65         const U16StringPiece& textBuf,         // A text buffer.
66         const Run& run,                        // A run of this region.
67         const Hyphenator& hyphenator,          // A hyphenator to be used for hyphenation.
68         const Range& contextRange,             // A context range for measuring hyphenated piece.
69         const Range& hyphenationTargetRange,   // An actual range for the hyphenation target.
70         const std::vector<float>& charWidths,  // Char width used for hyphen piece estimation.
71         bool ignoreKerning,                    // True use full shaping for hyphenation piece.
72         std::vector<HyphenBreak>* out,         // An output to be appended.
73         LayoutPieces* pieces) {                // An output of layout pieces. Maybe null.
74     if (!run.getRange().contains(contextRange) || !contextRange.contains(hyphenationTargetRange)) {
75         return;
76     }
77 
78     const std::vector<HyphenationType> hyphenResult =
79             hyphenate(textBuf.substr(hyphenationTargetRange), hyphenator);
80     for (uint32_t i = hyphenationTargetRange.getStart(); i < hyphenationTargetRange.getEnd(); ++i) {
81         const HyphenationType hyph = hyphenResult[hyphenationTargetRange.toRangeOffset(i)];
82         if (hyph == HyphenationType::DONT_BREAK) {
83             continue;  // Not a hyphenation point.
84         }
85 
86         if (!ignoreKerning) {
87             auto hyphenPart = contextRange.split(i);
88             U16StringPiece firstText = textBuf.substr(hyphenPart.first);
89             U16StringPiece secondText = textBuf.substr(hyphenPart.second);
90             const float first =
91                     run.measureHyphenPiece(firstText, Range(0, firstText.size()),
92                                            StartHyphenEdit::NO_EDIT /* start hyphen edit */,
93                                            editForThisLine(hyph) /* end hyphen edit */, pieces);
94             const float second =
95                     run.measureHyphenPiece(secondText, Range(0, secondText.size()),
96                                            editForNextLine(hyph) /* start hyphen edit */,
97                                            EndHyphenEdit::NO_EDIT /* end hyphen edit */, pieces);
98 
99             out->emplace_back(i, hyph, first, second);
100         } else {
101             float first = 0;
102             float second = 0;
103             for (uint32_t j = contextRange.getStart(); j < i; ++j) {
104                 first += charWidths[j];
105             }
106             for (uint32_t j = i; j < contextRange.getEnd(); ++j) {
107                 second += charWidths[j];
108             }
109 
110             EndHyphenEdit endEdit = editForThisLine(hyph);
111             StartHyphenEdit startEdit = editForNextLine(hyph);
112 
113             if (endEdit != EndHyphenEdit::NO_EDIT) {
114                 auto [str, strSize] = getHyphenString(endEdit);
115                 first += run.measureText(U16StringPiece(str, strSize));
116             }
117 
118             if (startEdit != StartHyphenEdit::NO_EDIT) {
119                 auto [str, strSize] = getHyphenString(startEdit);
120                 second += run.measureText(U16StringPiece(str, strSize));
121             }
122 
123             out->emplace_back(i, hyph, first, second);
124         }
125     }
126 }
127 
128 // Processes and retrieve informations from characters in the paragraph.
129 struct CharProcessor {
130     // The number of spaces.
131     uint32_t rawSpaceCount = 0;
132 
133     // The number of spaces minus trailing spaces.
134     uint32_t effectiveSpaceCount = 0;
135 
136     // The sum of character width from the paragraph start.
137     ParaWidth sumOfCharWidths = 0.0;
138 
139     // The sum of character width from the paragraph start minus trailing line end spaces.
140     // This means that the line width from the paragraph start if we decided break now.
141     ParaWidth effectiveWidth = 0.0;
142 
143     // The total amount of character widths at the previous word break point.
144     ParaWidth sumOfCharWidthsAtPrevWordBreak = 0.0;
145 
146     // The next word break offset.
147     uint32_t nextWordBreak = 0;
148 
149     // The previous word break offset.
150     uint32_t prevWordBreak = 0;
151 
152     // The width of a space. May be 0 if there are no spaces.
153     // Note: if there are multiple different widths for spaces (for example, because of mixing of
154     // fonts), it's only guaranteed to pick one.
155     float spaceWidth = 0.0f;
156 
157     // The current hyphenator.
158     const Hyphenator* hyphenator = nullptr;
159 
160     // Retrieve the current word range.
wordRangeCharProcessor161     inline Range wordRange() const { return breaker.wordRange(); }
162 
163     // Retrieve the current context range.
contextRangeCharProcessor164     inline Range contextRange() const { return Range(prevWordBreak, nextWordBreak); }
165 
166     // Returns the width from the last word break point.
widthFromLastWordBreakCharProcessor167     inline ParaWidth widthFromLastWordBreak() const {
168         return effectiveWidth - sumOfCharWidthsAtPrevWordBreak;
169     }
170 
171     // Returns the break penalty for the current word break point.
wordBreakPenaltyCharProcessor172     inline int wordBreakPenalty() const { return breaker.breakBadness(); }
173 
CharProcessorCharProcessor174     CharProcessor(const U16StringPiece& text) { breaker.setText(text.data(), text.size()); }
175 
176     // The user of CharProcessor must call updateLocaleIfNecessary with valid locale at least one
177     // time before feeding characters.
updateLocaleIfNecessaryCharProcessor178     void updateLocaleIfNecessary(const Run& run) {
179         uint32_t newLocaleListId = run.getLocaleListId();
180         if (localeListId != newLocaleListId) {
181             Locale locale = getEffectiveLocale(newLocaleListId);
182             nextWordBreak = breaker.followingWithLocale(locale, run.lineBreakStyle(),
183                                                         run.lineBreakWordStyle(),
184                                                         run.getRange().getStart());
185             hyphenator = HyphenatorMap::lookup(locale);
186             localeListId = newLocaleListId;
187         }
188     }
189 
190     // Process one character.
feedCharCharProcessor191     void feedChar(uint32_t idx, uint16_t c, float w, bool canBreakHere) {
192         if (idx == nextWordBreak) {
193             if (canBreakHere) {
194                 prevWordBreak = nextWordBreak;
195                 sumOfCharWidthsAtPrevWordBreak = sumOfCharWidths;
196             }
197             nextWordBreak = breaker.next();
198         }
199         if (isWordSpace(c)) {
200             rawSpaceCount += 1;
201             spaceWidth = w;
202         }
203         sumOfCharWidths += w;
204         if (isLineEndSpace(c)) {
205             // If we break a line on a line-ending space, that space goes away. So postBreak
206             // and postSpaceCount, which keep the width and number of spaces if we decide to
207             // break at this point, don't need to get adjusted.
208         } else {
209             effectiveSpaceCount = rawSpaceCount;
210             effectiveWidth = sumOfCharWidths;
211         }
212     }
213 
214 private:
215     // The current locale list id.
216     uint32_t localeListId = LocaleListCache::kInvalidListId;
217 
218     WordBreaker breaker;
219 };
220 }  // namespace minikin
221 
222 #endif  // MINIKIN_LINE_BREAKER_UTIL_H
223