1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * A module for breaking paragraphs into lines, supporting high quality 19 * hyphenation and justification. 20 */ 21 22 #ifndef MINIKIN_LINE_BREAKER_H 23 #define MINIKIN_LINE_BREAKER_H 24 25 #include <gtest/gtest_prod.h> 26 #include "unicode/brkiter.h" 27 #include "unicode/locid.h" 28 #include <cmath> 29 #include <vector> 30 #include "minikin/FontCollection.h" 31 #include "minikin/Hyphenator.h" 32 #include "minikin/MinikinFont.h" 33 #include "minikin/WordBreaker.h" 34 35 namespace minikin { 36 37 enum BreakStrategy { 38 kBreakStrategy_Greedy = 0, 39 kBreakStrategy_HighQuality = 1, 40 kBreakStrategy_Balanced = 2 41 }; 42 43 enum HyphenationFrequency { 44 kHyphenationFrequency_None = 0, 45 kHyphenationFrequency_Normal = 1, 46 kHyphenationFrequency_Full = 2 47 }; 48 49 // TODO: want to generalize to be able to handle array of line widths 50 class LineWidths { 51 public: setWidths(float firstWidth,int firstWidthLineCount,float restWidth)52 void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) { 53 mFirstWidth = firstWidth; 54 mFirstWidthLineCount = firstWidthLineCount; 55 mRestWidth = restWidth; 56 } setIndents(const std::vector<float> & indents)57 void setIndents(const std::vector<float>& indents) { 58 mIndents = indents; 59 } isConstant()60 bool isConstant() const { 61 // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen 62 return mRestWidth == mFirstWidth && mIndents.empty(); 63 } getLineWidth(int line)64 float getLineWidth(int line) const { 65 float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth; 66 if (!mIndents.empty()) { 67 if ((size_t)line < mIndents.size()) { 68 width -= mIndents[line]; 69 } else { 70 width -= mIndents.back(); 71 } 72 } 73 return width; 74 } clear()75 void clear() { 76 mIndents.clear(); 77 } 78 private: 79 float mFirstWidth; 80 int mFirstWidthLineCount; 81 float mRestWidth; 82 std::vector<float> mIndents; 83 }; 84 85 class TabStops { 86 public: set(const int * stops,size_t nStops,int tabWidth)87 void set(const int* stops, size_t nStops, int tabWidth) { 88 if (stops != nullptr) { 89 mStops.assign(stops, stops + nStops); 90 } else { 91 mStops.clear(); 92 } 93 mTabWidth = tabWidth; 94 } nextTab(float widthSoFar)95 float nextTab(float widthSoFar) const { 96 for (size_t i = 0; i < mStops.size(); i++) { 97 if (mStops[i] > widthSoFar) { 98 return mStops[i]; 99 } 100 } 101 return floor(widthSoFar / mTabWidth + 1) * mTabWidth; 102 } 103 private: 104 std::vector<int> mStops; 105 int mTabWidth; 106 }; 107 108 class LineBreaker { 109 public: 110 const static int kTab_Shift = 29; // keep synchronized with TAB_MASK in StaticLayout.java 111 112 // Note: Locale persists across multiple invocations (it is not cleaned up by finish()), 113 // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always 114 // be set on the first invocation, but callers are encouraged not to call again unless 115 // locale has actually changed. 116 // That logic could be here but it's better for performance that it's upstream because of 117 // the cost of constructing and comparing the ICU Locale object. 118 // Note: caller is responsible for managing lifetime of hyphenator 119 void setLocales(const char* locales, const std::vector<Hyphenator*>& hyphenators); 120 resize(size_t size)121 void resize(size_t size) { 122 mTextBuf.resize(size); 123 mCharWidths.resize(size); 124 } 125 size()126 size_t size() const { 127 return mTextBuf.size(); 128 } 129 buffer()130 uint16_t* buffer() { 131 return mTextBuf.data(); 132 } 133 charWidths()134 float* charWidths() { 135 return mCharWidths.data(); 136 } 137 138 // set text to current contents of buffer 139 void setText(); 140 141 void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth); 142 143 void setIndents(const std::vector<float>& indents); 144 setTabStops(const int * stops,size_t nStops,int tabWidth)145 void setTabStops(const int* stops, size_t nStops, int tabWidth) { 146 mTabStops.set(stops, nStops, tabWidth); 147 } 148 getStrategy()149 BreakStrategy getStrategy() const { return mStrategy; } 150 setStrategy(BreakStrategy strategy)151 void setStrategy(BreakStrategy strategy) { mStrategy = strategy; } 152 setJustified(bool justified)153 void setJustified(bool justified) { mJustified = justified; } 154 getHyphenationFrequency()155 HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; } 156 setHyphenationFrequency(HyphenationFrequency frequency)157 void setHyphenationFrequency(HyphenationFrequency frequency) { 158 mHyphenationFrequency = frequency; 159 } 160 161 // TODO: this class is actually fairly close to being general and not tied to using 162 // Minikin to do the shaping of the strings. The main thing that would need to be changed 163 // is having some kind of callback (or virtual class, or maybe even template), which could 164 // easily be instantiated with Minikin's Layout. Future work for when needed. 165 float addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface, 166 FontStyle style, size_t start, size_t end, bool isRtl); 167 168 void addReplacement(size_t start, size_t end, float width); 169 170 size_t computeBreaks(); 171 getBreaks()172 const int* getBreaks() const { 173 return mBreaks.data(); 174 } 175 getWidths()176 const float* getWidths() const { 177 return mWidths.data(); 178 } 179 getFlags()180 const int* getFlags() const { 181 return mFlags.data(); 182 } 183 184 void finish(); 185 186 private: 187 // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for 188 // very large paragraphs, accuracy could degrade using only 32-bit float. Note however 189 // that float is used extensively on the Java side for this. This is a typedef so that 190 // we can easily change it based on performance/accuracy tradeoff. 191 typedef double ParaWidth; 192 193 // A single candidate break 194 struct Candidate { 195 size_t offset; // offset to text buffer, in code units 196 size_t prev; // index to previous break 197 ParaWidth preBreak; // width of text until this point, if we decide to not break here 198 ParaWidth postBreak; // width of text until this point, if we decide to break here 199 float penalty; // penalty of this break (for example, hyphen penalty) 200 float score; // best score found for this break 201 size_t lineNumber; // only updated for non-constant line widths 202 size_t preSpaceCount; // preceding space count before breaking 203 size_t postSpaceCount; // preceding space count after breaking 204 HyphenationType hyphenType; 205 }; 206 207 float currentLineWidth() const; 208 209 void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, 210 size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph); 211 212 void addCandidate(Candidate cand); 213 void pushGreedyBreak(); 214 215 // push an actual break to the output. Takes care of setting flags for tab 216 void pushBreak(int offset, float width, uint8_t hyphenEdit); 217 218 float getSpaceWidth() const; 219 220 void computeBreaksGreedy(); 221 222 void computeBreaksOptimal(bool isRectangular); 223 224 void finishBreaksOptimal(); 225 226 WordBreaker mWordBreaker; 227 icu::Locale mLocale; 228 std::vector<uint16_t>mTextBuf; 229 std::vector<float>mCharWidths; 230 231 Hyphenator* mHyphenator; 232 std::vector<HyphenationType> mHyphBuf; 233 234 // layout parameters 235 BreakStrategy mStrategy = kBreakStrategy_Greedy; 236 HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal; 237 bool mJustified; 238 LineWidths mLineWidths; 239 TabStops mTabStops; 240 241 // result of line breaking 242 std::vector<int> mBreaks; 243 std::vector<float> mWidths; 244 std::vector<int> mFlags; 245 246 ParaWidth mWidth = 0; 247 std::vector<Candidate> mCandidates; 248 float mLinePenalty = 0.0f; 249 250 // the following are state for greedy breaker (updated while adding style runs) 251 size_t mLastBreak; 252 size_t mBestBreak; 253 float mBestScore; 254 ParaWidth mPreBreak; // prebreak of last break 255 uint32_t mLastHyphenation; // hyphen edit of last break kept for next line 256 int mFirstTabIndex; 257 size_t mSpaceCount; 258 259 FRIEND_TEST(LineBreakerTest, setLocales); 260 }; 261 262 } // namespace minikin 263 264 #endif // MINIKIN_LINE_BREAKER_H 265