1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * A module for breaking paragraphs into lines, supporting high quality 19 * hyphenation and justification. 20 */ 21 22 #ifndef MINIKIN_LINE_BREAKER_H 23 #define MINIKIN_LINE_BREAKER_H 24 25 #ifndef U_USING_ICU_NAMESPACE 26 #define U_USING_ICU_NAMESPACE 0 27 #endif // U_USING_ICU_NAMESPACE 28 29 #include <cmath> 30 #include <vector> 31 #include "minikin/FontCollection.h" 32 #include "minikin/Hyphenator.h" 33 #include "minikin/MinikinFont.h" 34 #include "minikin/WordBreaker.h" 35 #include "unicode/brkiter.h" 36 #include "unicode/locid.h" 37 38 namespace minikin { 39 40 enum BreakStrategy { 41 kBreakStrategy_Greedy = 0, 42 kBreakStrategy_HighQuality = 1, 43 kBreakStrategy_Balanced = 2 44 }; 45 46 enum HyphenationFrequency { 47 kHyphenationFrequency_None = 0, 48 kHyphenationFrequency_Normal = 1, 49 kHyphenationFrequency_Full = 2 50 }; 51 52 bool isLineEndSpace(uint16_t c); 53 54 // TODO: want to generalize to be able to handle array of line widths 55 class LineWidths { 56 public: setWidths(float firstWidth,int firstWidthLineCount,float restWidth)57 void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) { 58 mFirstWidth = firstWidth; 59 mFirstWidthLineCount = firstWidthLineCount; 60 mRestWidth = restWidth; 61 } setIndents(const std::vector<float> & indents)62 void setIndents(const std::vector<float>& indents) { mIndents = indents; } isConstant()63 bool isConstant() const { 64 // technically mFirstWidthLineCount == 0 would count too, but doesn't 65 // actually happen 66 return mRestWidth == mFirstWidth && mIndents.empty(); 67 } getLineWidth(int line)68 float getLineWidth(int line) const { 69 float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth; 70 if (!mIndents.empty()) { 71 if ((size_t)line < mIndents.size()) { 72 width -= mIndents[line]; 73 } else { 74 width -= mIndents.back(); 75 } 76 } 77 return width; 78 } clear()79 void clear() { mIndents.clear(); } 80 81 private: 82 float mFirstWidth; 83 int mFirstWidthLineCount; 84 float mRestWidth; 85 std::vector<float> mIndents; 86 }; 87 88 class LineBreaker { 89 public: 90 const static int kTab_Shift = 91 29; // keep synchronized with TAB_MASK in StaticLayout.java 92 93 // Note: Locale persists across multiple invocations (it is not cleaned up by 94 // finish()), explicitly to avoid the cost of creating ICU BreakIterator 95 // objects. It should always be set on the first invocation, but callers are 96 // encouraged not to call again unless locale has actually changed. That logic 97 // could be here but it's better for performance that it's upstream because of 98 // the cost of constructing and comparing the ICU Locale object. 99 // Note: caller is responsible for managing lifetime of hyphenator 100 // 101 // libtxt extension: always use the default locale so that a cached instance 102 // of the ICU break iterator can be reused. 103 void setLocale(); 104 resize(size_t size)105 void resize(size_t size) { 106 mTextBuf.resize(size); 107 mCharWidths.resize(size); 108 } 109 size()110 size_t size() const { return mTextBuf.size(); } 111 buffer()112 uint16_t* buffer() { return mTextBuf.data(); } 113 charWidths()114 float* charWidths() { return mCharWidths.data(); } 115 116 // set text to current contents of buffer 117 void setText(); 118 119 void setLineWidths(float firstWidth, 120 int firstWidthLineCount, 121 float restWidth); 122 123 void setIndents(const std::vector<float>& indents); 124 getStrategy()125 BreakStrategy getStrategy() const { return mStrategy; } 126 setStrategy(BreakStrategy strategy)127 void setStrategy(BreakStrategy strategy) { mStrategy = strategy; } 128 setJustified(bool justified)129 void setJustified(bool justified) { mJustified = justified; } 130 getHyphenationFrequency()131 HyphenationFrequency getHyphenationFrequency() const { 132 return mHyphenationFrequency; 133 } 134 setHyphenationFrequency(HyphenationFrequency frequency)135 void setHyphenationFrequency(HyphenationFrequency frequency) { 136 mHyphenationFrequency = frequency; 137 } 138 139 // TODO: this class is actually fairly close to being general and not tied to 140 // using Minikin to do the shaping of the strings. The main thing that would 141 // need to be changed is having some kind of callback (or virtual class, or 142 // maybe even template), which could easily be instantiated with Minikin's 143 // Layout. Future work for when needed. 144 float addStyleRun(MinikinPaint* paint, 145 const std::shared_ptr<FontCollection>& typeface, 146 FontStyle style, 147 size_t start, 148 size_t end, 149 bool isRtl); 150 151 void addReplacement(size_t start, size_t end, float width); 152 153 size_t computeBreaks(); 154 155 // libtxt: Add ability to set custom char widths. This allows manual 156 // definition of the widths of arbitrary glyphs. To linebreak properly, call 157 // addStyleRun with nullptr as the paint property, which will lead it to 158 // assume the width has already been calculated. Used for properly breaking 159 // inline placeholders. 160 void setCustomCharWidth(size_t offset, float width); 161 getBreaks()162 const int* getBreaks() const { return mBreaks.data(); } 163 getWidths()164 const float* getWidths() const { return mWidths.data(); } 165 getFlags()166 const int* getFlags() const { return mFlags.data(); } 167 168 void finish(); 169 170 private: 171 // ParaWidth is used to hold cumulative width from beginning of paragraph. 172 // Note that for very large paragraphs, accuracy could degrade using only 173 // 32-bit float. Note however that float is used extensively on the Java side 174 // for this. This is a typedef so that we can easily change it based on 175 // performance/accuracy tradeoff. 176 typedef double ParaWidth; 177 178 // A single candidate break 179 struct Candidate { 180 size_t offset; // offset to text buffer, in code units 181 size_t prev; // index to previous break 182 ParaWidth preBreak; // width of text until this point, if we decide to not 183 // break here 184 ParaWidth postBreak; // width of text until this point, if we decide to 185 // break here 186 float penalty; // penalty of this break (for example, hyphen penalty) 187 float score; // best score found for this break 188 size_t lineNumber; // only updated for non-constant line widths 189 size_t preSpaceCount; // preceding space count before breaking 190 size_t postSpaceCount; // preceding space count after breaking 191 HyphenationType hyphenType; 192 }; 193 194 float currentLineWidth() const; 195 196 void addWordBreak(size_t offset, 197 ParaWidth preBreak, 198 ParaWidth postBreak, 199 size_t preSpaceCount, 200 size_t postSpaceCount, 201 float penalty, 202 HyphenationType hyph); 203 204 void addCandidate(Candidate cand); 205 void pushGreedyBreak(); 206 207 // push an actual break to the output. Takes care of setting flags for tab 208 void pushBreak(int offset, float width, uint8_t hyphenEdit); 209 210 float getSpaceWidth() const; 211 212 void computeBreaksGreedy(); 213 214 void computeBreaksOptimal(bool isRectangular); 215 216 void finishBreaksOptimal(); 217 218 WordBreaker mWordBreaker; 219 icu::Locale mLocale; 220 std::vector<uint16_t> mTextBuf; 221 std::vector<float> mCharWidths; 222 223 Hyphenator* mHyphenator; 224 std::vector<HyphenationType> mHyphBuf; 225 226 // layout parameters 227 BreakStrategy mStrategy = kBreakStrategy_Greedy; 228 HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal; 229 bool mJustified; 230 LineWidths mLineWidths; 231 232 // result of line breaking 233 std::vector<int> mBreaks; 234 std::vector<float> mWidths; 235 std::vector<int> mFlags; 236 237 ParaWidth mWidth = 0; 238 std::vector<Candidate> mCandidates; 239 float mLinePenalty = 0.0f; 240 241 // the following are state for greedy breaker (updated while adding style 242 // runs) 243 size_t mLastBreak; 244 size_t mBestBreak; 245 float mBestScore; 246 ParaWidth mPreBreak; // prebreak of last break 247 uint32_t mLastHyphenation; // hyphen edit of last break kept for next line 248 int mFirstTabIndex; 249 size_t mSpaceCount; 250 }; 251 252 } // namespace minikin 253 254 #endif // MINIKIN_LINE_BREAKER_H 255