1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * A module for breaking paragraphs into lines, supporting high quality 19 * hyphenation and justification. 20 */ 21 22 #ifndef MINIKIN_LINE_BREAKER_H 23 #define MINIKIN_LINE_BREAKER_H 24 25 #ifndef U_USING_ICU_NAMESPACE 26 #define U_USING_ICU_NAMESPACE 0 27 #endif // U_USING_ICU_NAMESPACE 28 29 #include <cmath> 30 #include <vector> 31 #include "minikin/FontCollection.h" 32 #include "minikin/Hyphenator.h" 33 #include "minikin/MinikinFont.h" 34 #include "minikin/WordBreaker.h" 35 #include "unicode/brkiter.h" 36 #include "unicode/locid.h" 37 38 namespace minikin { 39 40 enum BreakStrategy { 41 kBreakStrategy_Greedy = 0, 42 kBreakStrategy_HighQuality = 1, 43 kBreakStrategy_Balanced = 2 44 }; 45 46 enum WordBreakType { 47 kWordBreakType_Normal = 0, 48 kWordBreakType_BreakAll = 1, 49 kWordBreakType_BreakWord = 2 50 }; 51 52 enum HyphenationFrequency { 53 kHyphenationFrequency_None = 0, 54 kHyphenationFrequency_Normal = 1, 55 kHyphenationFrequency_Full = 2 56 }; 57 58 bool isLineEndSpace(uint16_t c); 59 60 // TODO: want to generalize to be able to handle array of line widths 61 class LineWidths { 62 public: setWidths(float firstWidth,int firstWidthLineCount,float restWidth)63 void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) { 64 mFirstWidth = firstWidth; 65 mFirstWidthLineCount = firstWidthLineCount; 66 mRestWidth = restWidth; 67 } setIndents(const std::vector<float> & indents)68 void setIndents(const std::vector<float>& indents) { mIndents = indents; } isConstant()69 bool isConstant() const { 70 // technically mFirstWidthLineCount == 0 would count too, but doesn't 71 // actually happen 72 return mRestWidth == mFirstWidth && mIndents.empty(); 73 } getLineWidth(int line)74 float getLineWidth(int line) const { 75 float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth; 76 if (!mIndents.empty()) { 77 if ((size_t)line < mIndents.size()) { 78 width -= mIndents[line]; 79 } else { 80 width -= mIndents.back(); 81 } 82 } 83 return width; 84 } clear()85 void clear() { mIndents.clear(); } 86 87 private: 88 float mFirstWidth; 89 int mFirstWidthLineCount; 90 float mRestWidth; 91 std::vector<float> mIndents; 92 }; 93 94 class LineBreaker { 95 public: 96 const static int kTab_Shift = 97 29; // keep synchronized with TAB_MASK in StaticLayout.java 98 99 // Note: Locale persists across multiple invocations (it is not cleaned up by 100 // finish()), explicitly to avoid the cost of creating ICU BreakIterator 101 // objects. It should always be set on the first invocation, but callers are 102 // encouraged not to call again unless locale has actually changed. That logic 103 // could be here but it's better for performance that it's upstream because of 104 // the cost of constructing and comparing the ICU Locale object. 105 // Note: caller is responsible for managing lifetime of hyphenator 106 // 107 // libtxt extension: always use the default locale so that a cached instance 108 // of the ICU break iterator can be reused. 109 void setLocale(); 110 resize(size_t size)111 void resize(size_t size) { 112 mTextBuf.resize(size); 113 mCharWidths.resize(size); 114 } 115 size()116 size_t size() const { return mTextBuf.size(); } 117 buffer()118 uint16_t* buffer() { return mTextBuf.data(); } 119 charWidths()120 float* charWidths() { return mCharWidths.data(); } 121 122 // set text to current contents of buffer 123 void setText(); 124 125 void setLineWidths(float firstWidth, 126 int firstWidthLineCount, 127 float restWidth); 128 129 void setIndents(const std::vector<float>& indents); 130 getStrategy()131 BreakStrategy getStrategy() const { return mStrategy; } 132 setStrategy(BreakStrategy strategy)133 void setStrategy(BreakStrategy strategy) { mStrategy = strategy; } 134 setWordBreakType(WordBreakType wordBreakType)135 void setWordBreakType(WordBreakType wordBreakType) { 136 mWordBreakType = wordBreakType; 137 } 138 setJustified(bool justified)139 void setJustified(bool justified) { mJustified = justified; } 140 getHyphenationFrequency()141 HyphenationFrequency getHyphenationFrequency() const { 142 return mHyphenationFrequency; 143 } 144 setHyphenationFrequency(HyphenationFrequency frequency)145 void setHyphenationFrequency(HyphenationFrequency frequency) { 146 mHyphenationFrequency = frequency; 147 } 148 149 // TODO: this class is actually fairly close to being general and not tied to 150 // using Minikin to do the shaping of the strings. The main thing that would 151 // need to be changed is having some kind of callback (or virtual class, or 152 // maybe even template), which could easily be instantiated with Minikin's 153 // Layout. Future work for when needed. 154 float addStyleRun(MinikinPaint* paint, 155 const std::shared_ptr<FontCollection>& typeface, 156 FontStyle style, 157 size_t start, 158 size_t end, 159 bool isRtl); 160 161 void addReplacement(size_t start, size_t end, float width); 162 163 size_t computeBreaks(); 164 165 // libtxt: Add ability to set custom char widths. This allows manual 166 // definition of the widths of arbitrary glyphs. To linebreak properly, call 167 // addStyleRun with nullptr as the paint property, which will lead it to 168 // assume the width has already been calculated. Used for properly breaking 169 // inline placeholders. 170 void setCustomCharWidth(size_t offset, float width); 171 getBreaks()172 const int* getBreaks() const { return mBreaks.data(); } 173 getWidths()174 const float* getWidths() const { return mWidths.data(); } 175 getFlags()176 const int* getFlags() const { return mFlags.data(); } 177 178 void finish(); 179 180 private: 181 // ParaWidth is used to hold cumulative width from beginning of paragraph. 182 // Note that for very large paragraphs, accuracy could degrade using only 183 // 32-bit float. Note however that float is used extensively on the Java side 184 // for this. This is a typedef so that we can easily change it based on 185 // performance/accuracy tradeoff. 186 typedef double ParaWidth; 187 188 // A single candidate break 189 struct Candidate { 190 size_t offset; // offset to text buffer, in code units 191 size_t prev; // index to previous break 192 ParaWidth preBreak; // width of text until this point, if we decide to not 193 // break here 194 ParaWidth postBreak; // width of text until this point, if we decide to 195 // break here 196 float penalty; // penalty of this break (for example, hyphen penalty) 197 float score; // best score found for this break 198 size_t lineNumber; // only updated for non-constant line widths 199 size_t preSpaceCount; // preceding space count before breaking 200 size_t postSpaceCount; // preceding space count after breaking 201 HyphenationType hyphenType; 202 }; 203 204 float currentLineWidth() const; 205 206 // Determine whether to split a character string. 207 bool IsSplittingCharacters(ParaWidth postBreak); 208 209 void addWordBreak(size_t offset, 210 ParaWidth preBreak, 211 ParaWidth postBreak, 212 size_t preSpaceCount, 213 size_t postSpaceCount, 214 float penalty, 215 HyphenationType hyph); 216 217 void addCandidate(Candidate cand); 218 void pushGreedyBreak(); 219 220 // push an actual break to the output. Takes care of setting flags for tab 221 void pushBreak(int offset, float width, uint8_t hyphenEdit); 222 223 float getSpaceWidth() const; 224 225 void computeBreaksGreedy(); 226 227 void computeBreaksOptimal(bool isRectangular); 228 229 void finishBreaksOptimal(); 230 231 WordBreaker mWordBreaker; 232 icu::Locale mLocale; 233 std::vector<uint16_t> mTextBuf; 234 std::vector<float> mCharWidths; 235 236 Hyphenator* mHyphenator; 237 std::vector<HyphenationType> mHyphBuf; 238 239 // layout parameters 240 BreakStrategy mStrategy = kBreakStrategy_Greedy; 241 WordBreakType mWordBreakType = WordBreakType::kWordBreakType_BreakWord; 242 HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal; 243 bool mJustified; 244 LineWidths mLineWidths; 245 246 // result of line breaking 247 std::vector<int> mBreaks; 248 std::vector<float> mWidths; 249 std::vector<int> mFlags; 250 251 ParaWidth mWidth = 0; 252 std::vector<Candidate> mCandidates; 253 float mLinePenalty = 0.0f; 254 255 // the following are state for greedy breaker (updated while adding style 256 // runs) 257 size_t mLastBreak; 258 size_t mBestBreak; 259 float mBestScore; 260 ParaWidth mPreBreak; // prebreak of last break 261 uint32_t mLastHyphenation; // hyphen edit of last break kept for next line 262 int mFirstTabIndex; 263 size_t mSpaceCount; 264 }; 265 266 } // namespace minikin 267 268 #endif // MINIKIN_LINE_BREAKER_H 269