• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * A module for breaking paragraphs into lines, supporting high quality
19  * hyphenation and justification.
20  */
21 
22 #ifndef MINIKIN_LINE_BREAKER_H
23 #define MINIKIN_LINE_BREAKER_H
24 
25 #ifndef U_USING_ICU_NAMESPACE
26 #define U_USING_ICU_NAMESPACE 0
27 #endif  //  U_USING_ICU_NAMESPACE
28 
29 #include <cmath>
30 #include <vector>
31 #include "minikin/FontCollection.h"
32 #include "minikin/Hyphenator.h"
33 #include "minikin/MinikinFont.h"
34 #include "minikin/WordBreaker.h"
35 #include "unicode/brkiter.h"
36 #include "unicode/locid.h"
37 
38 namespace minikin {
39 
40 enum BreakStrategy {
41   kBreakStrategy_Greedy = 0,
42   kBreakStrategy_HighQuality = 1,
43   kBreakStrategy_Balanced = 2
44 };
45 
46 enum HyphenationFrequency {
47   kHyphenationFrequency_None = 0,
48   kHyphenationFrequency_Normal = 1,
49   kHyphenationFrequency_Full = 2
50 };
51 
52 bool isLineEndSpace(uint16_t c);
53 
54 // TODO: want to generalize to be able to handle array of line widths
55 class LineWidths {
56  public:
setWidths(float firstWidth,int firstWidthLineCount,float restWidth)57   void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
58     mFirstWidth = firstWidth;
59     mFirstWidthLineCount = firstWidthLineCount;
60     mRestWidth = restWidth;
61   }
setIndents(const std::vector<float> & indents)62   void setIndents(const std::vector<float>& indents) { mIndents = indents; }
isConstant()63   bool isConstant() const {
64     // technically mFirstWidthLineCount == 0 would count too, but doesn't
65     // actually happen
66     return mRestWidth == mFirstWidth && mIndents.empty();
67   }
getLineWidth(int line)68   float getLineWidth(int line) const {
69     float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
70     if (!mIndents.empty()) {
71       if ((size_t)line < mIndents.size()) {
72         width -= mIndents[line];
73       } else {
74         width -= mIndents.back();
75       }
76     }
77     return width;
78   }
clear()79   void clear() { mIndents.clear(); }
80 
81  private:
82   float mFirstWidth;
83   int mFirstWidthLineCount;
84   float mRestWidth;
85   std::vector<float> mIndents;
86 };
87 
88 class LineBreaker {
89  public:
90   const static int kTab_Shift =
91       29;  // keep synchronized with TAB_MASK in StaticLayout.java
92 
93   // Note: Locale persists across multiple invocations (it is not cleaned up by
94   // finish()), explicitly to avoid the cost of creating ICU BreakIterator
95   // objects. It should always be set on the first invocation, but callers are
96   // encouraged not to call again unless locale has actually changed. That logic
97   // could be here but it's better for performance that it's upstream because of
98   // the cost of constructing and comparing the ICU Locale object.
99   // Note: caller is responsible for managing lifetime of hyphenator
100   //
101   // libtxt extension: always use the default locale so that a cached instance
102   // of the ICU break iterator can be reused.
103   void setLocale();
104 
resize(size_t size)105   void resize(size_t size) {
106     mTextBuf.resize(size);
107     mCharWidths.resize(size);
108   }
109 
size()110   size_t size() const { return mTextBuf.size(); }
111 
buffer()112   uint16_t* buffer() { return mTextBuf.data(); }
113 
charWidths()114   float* charWidths() { return mCharWidths.data(); }
115 
116   // set text to current contents of buffer
117   void setText();
118 
119   void setLineWidths(float firstWidth,
120                      int firstWidthLineCount,
121                      float restWidth);
122 
123   void setIndents(const std::vector<float>& indents);
124 
getStrategy()125   BreakStrategy getStrategy() const { return mStrategy; }
126 
setStrategy(BreakStrategy strategy)127   void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
128 
setJustified(bool justified)129   void setJustified(bool justified) { mJustified = justified; }
130 
getHyphenationFrequency()131   HyphenationFrequency getHyphenationFrequency() const {
132     return mHyphenationFrequency;
133   }
134 
setHyphenationFrequency(HyphenationFrequency frequency)135   void setHyphenationFrequency(HyphenationFrequency frequency) {
136     mHyphenationFrequency = frequency;
137   }
138 
139   // TODO: this class is actually fairly close to being general and not tied to
140   // using Minikin to do the shaping of the strings. The main thing that would
141   // need to be changed is having some kind of callback (or virtual class, or
142   // maybe even template), which could easily be instantiated with Minikin's
143   // Layout. Future work for when needed.
144   float addStyleRun(MinikinPaint* paint,
145                     const std::shared_ptr<FontCollection>& typeface,
146                     FontStyle style,
147                     size_t start,
148                     size_t end,
149                     bool isRtl);
150 
151   void addReplacement(size_t start, size_t end, float width);
152 
153   size_t computeBreaks();
154 
155   // libtxt: Add ability to set custom char widths. This allows manual
156   // definition of the widths of arbitrary glyphs. To linebreak properly, call
157   // addStyleRun with nullptr as the paint property, which will lead it to
158   // assume the width has already been calculated. Used for properly breaking
159   // inline placeholders.
160   void setCustomCharWidth(size_t offset, float width);
161 
getBreaks()162   const int* getBreaks() const { return mBreaks.data(); }
163 
getWidths()164   const float* getWidths() const { return mWidths.data(); }
165 
getFlags()166   const int* getFlags() const { return mFlags.data(); }
167 
168   void finish();
169 
170  private:
171   // ParaWidth is used to hold cumulative width from beginning of paragraph.
172   // Note that for very large paragraphs, accuracy could degrade using only
173   // 32-bit float. Note however that float is used extensively on the Java side
174   // for this. This is a typedef so that we can easily change it based on
175   // performance/accuracy tradeoff.
176   typedef double ParaWidth;
177 
178   // A single candidate break
179   struct Candidate {
180     size_t offset;        // offset to text buffer, in code units
181     size_t prev;          // index to previous break
182     ParaWidth preBreak;   // width of text until this point, if we decide to not
183                           // break here
184     ParaWidth postBreak;  // width of text until this point, if we decide to
185                           // break here
186     float penalty;        // penalty of this break (for example, hyphen penalty)
187     float score;          // best score found for this break
188     size_t lineNumber;    // only updated for non-constant line widths
189     size_t preSpaceCount;   // preceding space count before breaking
190     size_t postSpaceCount;  // preceding space count after breaking
191     HyphenationType hyphenType;
192   };
193 
194   float currentLineWidth() const;
195 
196   void addWordBreak(size_t offset,
197                     ParaWidth preBreak,
198                     ParaWidth postBreak,
199                     size_t preSpaceCount,
200                     size_t postSpaceCount,
201                     float penalty,
202                     HyphenationType hyph);
203 
204   void addCandidate(Candidate cand);
205   void pushGreedyBreak();
206 
207   // push an actual break to the output. Takes care of setting flags for tab
208   void pushBreak(int offset, float width, uint8_t hyphenEdit);
209 
210   float getSpaceWidth() const;
211 
212   void computeBreaksGreedy();
213 
214   void computeBreaksOptimal(bool isRectangular);
215 
216   void finishBreaksOptimal();
217 
218   WordBreaker mWordBreaker;
219   icu::Locale mLocale;
220   std::vector<uint16_t> mTextBuf;
221   std::vector<float> mCharWidths;
222 
223   Hyphenator* mHyphenator;
224   std::vector<HyphenationType> mHyphBuf;
225 
226   // layout parameters
227   BreakStrategy mStrategy = kBreakStrategy_Greedy;
228   HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal;
229   bool mJustified;
230   LineWidths mLineWidths;
231 
232   // result of line breaking
233   std::vector<int> mBreaks;
234   std::vector<float> mWidths;
235   std::vector<int> mFlags;
236 
237   ParaWidth mWidth = 0;
238   std::vector<Candidate> mCandidates;
239   float mLinePenalty = 0.0f;
240 
241   // the following are state for greedy breaker (updated while adding style
242   // runs)
243   size_t mLastBreak;
244   size_t mBestBreak;
245   float mBestScore;
246   ParaWidth mPreBreak;        // prebreak of last break
247   uint32_t mLastHyphenation;  // hyphen edit of last break kept for next line
248   int mFirstTabIndex;
249   size_t mSpaceCount;
250 };
251 
252 }  // namespace minikin
253 
254 #endif  // MINIKIN_LINE_BREAKER_H
255