• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * A module for breaking paragraphs into lines, supporting high quality
19  * hyphenation and justification.
20  */
21 
22 #ifndef MINIKIN_LINE_BREAKER_H
23 #define MINIKIN_LINE_BREAKER_H
24 
25 #ifndef U_USING_ICU_NAMESPACE
26 #define U_USING_ICU_NAMESPACE 0
27 #endif  //  U_USING_ICU_NAMESPACE
28 
29 #include <cmath>
30 #include <vector>
31 #include "minikin/FontCollection.h"
32 #include "minikin/Hyphenator.h"
33 #include "minikin/MinikinFont.h"
34 #include "minikin/WordBreaker.h"
35 #include "unicode/brkiter.h"
36 #include "unicode/locid.h"
37 
38 namespace minikin {
39 
40 enum BreakStrategy {
41   kBreakStrategy_Greedy = 0,
42   kBreakStrategy_HighQuality = 1,
43   kBreakStrategy_Balanced = 2
44 };
45 
46 enum WordBreakType {
47   kWordBreakType_Normal = 0,
48   kWordBreakType_BreakAll = 1,
49   kWordBreakType_BreakWord = 2
50 };
51 
52 enum HyphenationFrequency {
53   kHyphenationFrequency_None = 0,
54   kHyphenationFrequency_Normal = 1,
55   kHyphenationFrequency_Full = 2
56 };
57 
58 bool isLineEndSpace(uint16_t c);
59 
60 // TODO: want to generalize to be able to handle array of line widths
61 class LineWidths {
62  public:
setWidths(float firstWidth,int firstWidthLineCount,float restWidth)63   void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
64     mFirstWidth = firstWidth;
65     mFirstWidthLineCount = firstWidthLineCount;
66     mRestWidth = restWidth;
67   }
setIndents(const std::vector<float> & indents)68   void setIndents(const std::vector<float>& indents) { mIndents = indents; }
isConstant()69   bool isConstant() const {
70     // technically mFirstWidthLineCount == 0 would count too, but doesn't
71     // actually happen
72     return mRestWidth == mFirstWidth && mIndents.empty();
73   }
getLineWidth(int line)74   float getLineWidth(int line) const {
75     float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
76     if (!mIndents.empty()) {
77       if ((size_t)line < mIndents.size()) {
78         width -= mIndents[line];
79       } else {
80         width -= mIndents.back();
81       }
82     }
83     return width;
84   }
clear()85   void clear() { mIndents.clear(); }
86 
87  private:
88   float mFirstWidth;
89   int mFirstWidthLineCount;
90   float mRestWidth;
91   std::vector<float> mIndents;
92 };
93 
94 class LineBreaker {
95  public:
96   const static int kTab_Shift =
97       29;  // keep synchronized with TAB_MASK in StaticLayout.java
98 
99   // Note: Locale persists across multiple invocations (it is not cleaned up by
100   // finish()), explicitly to avoid the cost of creating ICU BreakIterator
101   // objects. It should always be set on the first invocation, but callers are
102   // encouraged not to call again unless locale has actually changed. That logic
103   // could be here but it's better for performance that it's upstream because of
104   // the cost of constructing and comparing the ICU Locale object.
105   // Note: caller is responsible for managing lifetime of hyphenator
106   //
107   // libtxt extension: always use the default locale so that a cached instance
108   // of the ICU break iterator can be reused.
109   void setLocale();
110 
resize(size_t size)111   void resize(size_t size) {
112     mTextBuf.resize(size);
113     mCharWidths.resize(size);
114   }
115 
size()116   size_t size() const { return mTextBuf.size(); }
117 
buffer()118   uint16_t* buffer() { return mTextBuf.data(); }
119 
charWidths()120   float* charWidths() { return mCharWidths.data(); }
121 
122   // set text to current contents of buffer
123   void setText();
124 
125   void setLineWidths(float firstWidth,
126                      int firstWidthLineCount,
127                      float restWidth);
128 
129   void setIndents(const std::vector<float>& indents);
130 
getStrategy()131   BreakStrategy getStrategy() const { return mStrategy; }
132 
setStrategy(BreakStrategy strategy)133   void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
134 
setWordBreakType(WordBreakType wordBreakType)135   void setWordBreakType(WordBreakType wordBreakType) {
136     mWordBreakType = wordBreakType;
137   }
138 
setJustified(bool justified)139   void setJustified(bool justified) { mJustified = justified; }
140 
getHyphenationFrequency()141   HyphenationFrequency getHyphenationFrequency() const {
142     return mHyphenationFrequency;
143   }
144 
setHyphenationFrequency(HyphenationFrequency frequency)145   void setHyphenationFrequency(HyphenationFrequency frequency) {
146     mHyphenationFrequency = frequency;
147   }
148 
149   // TODO: this class is actually fairly close to being general and not tied to
150   // using Minikin to do the shaping of the strings. The main thing that would
151   // need to be changed is having some kind of callback (or virtual class, or
152   // maybe even template), which could easily be instantiated with Minikin's
153   // Layout. Future work for when needed.
154   float addStyleRun(MinikinPaint* paint,
155                     const std::shared_ptr<FontCollection>& typeface,
156                     FontStyle style,
157                     size_t start,
158                     size_t end,
159                     bool isRtl);
160 
161   void addReplacement(size_t start, size_t end, float width);
162 
163   size_t computeBreaks();
164 
165   // libtxt: Add ability to set custom char widths. This allows manual
166   // definition of the widths of arbitrary glyphs. To linebreak properly, call
167   // addStyleRun with nullptr as the paint property, which will lead it to
168   // assume the width has already been calculated. Used for properly breaking
169   // inline placeholders.
170   void setCustomCharWidth(size_t offset, float width);
171 
getBreaks()172   const int* getBreaks() const { return mBreaks.data(); }
173 
getWidths()174   const float* getWidths() const { return mWidths.data(); }
175 
getFlags()176   const int* getFlags() const { return mFlags.data(); }
177 
178   void finish();
179 
180  private:
181   // ParaWidth is used to hold cumulative width from beginning of paragraph.
182   // Note that for very large paragraphs, accuracy could degrade using only
183   // 32-bit float. Note however that float is used extensively on the Java side
184   // for this. This is a typedef so that we can easily change it based on
185   // performance/accuracy tradeoff.
186   typedef double ParaWidth;
187 
188   // A single candidate break
189   struct Candidate {
190     size_t offset;        // offset to text buffer, in code units
191     size_t prev;          // index to previous break
192     ParaWidth preBreak;   // width of text until this point, if we decide to not
193                           // break here
194     ParaWidth postBreak;  // width of text until this point, if we decide to
195                           // break here
196     float penalty;        // penalty of this break (for example, hyphen penalty)
197     float score;          // best score found for this break
198     size_t lineNumber;    // only updated for non-constant line widths
199     size_t preSpaceCount;   // preceding space count before breaking
200     size_t postSpaceCount;  // preceding space count after breaking
201     HyphenationType hyphenType;
202   };
203 
204   float currentLineWidth() const;
205 
206   // Determine whether to split a character string.
207   bool IsSplittingCharacters(ParaWidth postBreak);
208 
209   void addWordBreak(size_t offset,
210                     ParaWidth preBreak,
211                     ParaWidth postBreak,
212                     size_t preSpaceCount,
213                     size_t postSpaceCount,
214                     float penalty,
215                     HyphenationType hyph);
216 
217   void addCandidate(Candidate cand);
218   void pushGreedyBreak();
219 
220   // push an actual break to the output. Takes care of setting flags for tab
221   void pushBreak(int offset, float width, uint8_t hyphenEdit);
222 
223   float getSpaceWidth() const;
224 
225   void computeBreaksGreedy();
226 
227   void computeBreaksOptimal(bool isRectangular);
228 
229   void finishBreaksOptimal();
230 
231   WordBreaker mWordBreaker;
232   icu::Locale mLocale;
233   std::vector<uint16_t> mTextBuf;
234   std::vector<float> mCharWidths;
235 
236   Hyphenator* mHyphenator;
237   std::vector<HyphenationType> mHyphBuf;
238 
239   // layout parameters
240   BreakStrategy mStrategy = kBreakStrategy_Greedy;
241   WordBreakType mWordBreakType = WordBreakType::kWordBreakType_BreakWord;
242   HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal;
243   bool mJustified;
244   LineWidths mLineWidths;
245 
246   // result of line breaking
247   std::vector<int> mBreaks;
248   std::vector<float> mWidths;
249   std::vector<int> mFlags;
250 
251   ParaWidth mWidth = 0;
252   std::vector<Candidate> mCandidates;
253   float mLinePenalty = 0.0f;
254 
255   // the following are state for greedy breaker (updated while adding style
256   // runs)
257   size_t mLastBreak;
258   size_t mBestBreak;
259   float mBestScore;
260   ParaWidth mPreBreak;        // prebreak of last break
261   uint32_t mLastHyphenation;  // hyphen edit of last break kept for next line
262   int mFirstTabIndex;
263   size_t mSpaceCount;
264 };
265 
266 }  // namespace minikin
267 
268 #endif  // MINIKIN_LINE_BREAKER_H
269