/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef MINIKIN_LINE_BREAKER_UTIL_H #define MINIKIN_LINE_BREAKER_UTIL_H #include #include "minikin/Hyphenator.h" #include "minikin/MeasuredText.h" #include "minikin/U16StringPiece.h" #include "HyphenatorMap.h" #include "LayoutUtils.h" #include "Locale.h" #include "LocaleListCache.h" #include "MinikinInternal.h" #include "WordBreaker.h" namespace minikin { // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for very large // paragraphs, accuracy could degrade using only 32-bit float. Note however that float is used // extensively on the Java side for this. This is a typedef so that we can easily change it based // on performance/accuracy tradeoff. typedef float ParaWidth; // Hyphenates a string potentially containing non-breaking spaces. std::vector hyphenate(const U16StringPiece& string, const Hyphenator& hypenator); // This function determines whether a character is a space that disappears at end of line. // It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], plus '\n'. // Note: all such characters are in the BMP, so it's ok to use code units for this. inline bool isLineEndSpace(uint16_t c) { return c == '\n' || c == ' ' // SPACE || c == 0x1680 // OGHAM SPACE MARK || (0x2000 <= c && c <= 0x200A && c != 0x2007) // EN QUAD, EM QUAD, EN SPACE, EM SPACE, // THREE-PER-EM SPACE, FOUR-PER-EM SPACE, // SIX-PER-EM SPACE, PUNCTUATION SPACE, // THIN SPACE, HAIR SPACE || c == 0x205F // MEDIUM MATHEMATICAL SPACE || c == 0x3000; } inline Locale getEffectiveLocale(uint32_t localeListId) { const LocaleList& localeList = LocaleListCache::getById(localeListId); return localeList.empty() ? Locale() : localeList[0]; } // Retrieves hyphenation break points from a word. inline void populateHyphenationPoints( const U16StringPiece& textBuf, // A text buffer. const Run& run, // A run of this region. const Hyphenator& hyphenator, // A hyphenator to be used for hyphenation. const Range& contextRange, // A context range for measuring hyphenated piece. const Range& hyphenationTargetRange, // An actual range for the hyphenation target. const std::vector& charWidths, // Char width used for hyphen piece estimation. bool ignoreKerning, // True use full shaping for hyphenation piece. std::vector* out, // An output to be appended. LayoutPieces* pieces) { // An output of layout pieces. Maybe null. if (!run.getRange().contains(contextRange) || !contextRange.contains(hyphenationTargetRange)) { return; } const std::vector hyphenResult = hyphenate(textBuf.substr(hyphenationTargetRange), hyphenator); for (uint32_t i = hyphenationTargetRange.getStart(); i < hyphenationTargetRange.getEnd(); ++i) { const HyphenationType hyph = hyphenResult[hyphenationTargetRange.toRangeOffset(i)]; if (hyph == HyphenationType::DONT_BREAK) { continue; // Not a hyphenation point. } if (!ignoreKerning) { auto hyphenPart = contextRange.split(i); U16StringPiece firstText = textBuf.substr(hyphenPart.first); U16StringPiece secondText = textBuf.substr(hyphenPart.second); const float first = run.measureHyphenPiece(firstText, Range(0, firstText.size()), StartHyphenEdit::NO_EDIT /* start hyphen edit */, editForThisLine(hyph) /* end hyphen edit */, pieces); const float second = run.measureHyphenPiece(secondText, Range(0, secondText.size()), editForNextLine(hyph) /* start hyphen edit */, EndHyphenEdit::NO_EDIT /* end hyphen edit */, pieces); out->emplace_back(i, hyph, first, second); } else { float first = 0; float second = 0; for (uint32_t j = contextRange.getStart(); j < i; ++j) { first += charWidths[j]; } for (uint32_t j = i; j < contextRange.getEnd(); ++j) { second += charWidths[j]; } EndHyphenEdit endEdit = editForThisLine(hyph); StartHyphenEdit startEdit = editForNextLine(hyph); if (endEdit != EndHyphenEdit::NO_EDIT) { auto [str, strSize] = getHyphenString(endEdit); first += run.measureText(U16StringPiece(str, strSize)); } if (startEdit != StartHyphenEdit::NO_EDIT) { auto [str, strSize] = getHyphenString(startEdit); second += run.measureText(U16StringPiece(str, strSize)); } out->emplace_back(i, hyph, first, second); } } } // Processes and retrieve informations from characters in the paragraph. struct CharProcessor { // The number of spaces. uint32_t rawSpaceCount = 0; // The number of spaces minus trailing spaces. uint32_t effectiveSpaceCount = 0; // The sum of character width from the paragraph start. ParaWidth sumOfCharWidths = 0.0; // The sum of character width from the paragraph start minus trailing line end spaces. // This means that the line width from the paragraph start if we decided break now. ParaWidth effectiveWidth = 0.0; // The total amount of character widths at the previous word break point. ParaWidth sumOfCharWidthsAtPrevWordBreak = 0.0; // The next word break offset. uint32_t nextWordBreak = 0; // The previous word break offset. uint32_t prevWordBreak = 0; // The width of a space. May be 0 if there are no spaces. // Note: if there are multiple different widths for spaces (for example, because of mixing of // fonts), it's only guaranteed to pick one. float spaceWidth = 0.0f; // The current hyphenator. const Hyphenator* hyphenator = nullptr; // Retrieve the current word range. inline Range wordRange() const { return breaker.wordRange(); } // Retrieve the current context range. inline Range contextRange() const { return Range(prevWordBreak, nextWordBreak); } // Returns the width from the last word break point. inline ParaWidth widthFromLastWordBreak() const { return effectiveWidth - sumOfCharWidthsAtPrevWordBreak; } // Returns the break penalty for the current word break point. inline int wordBreakPenalty() const { return breaker.breakBadness(); } CharProcessor(const U16StringPiece& text) { breaker.setText(text.data(), text.size()); } // The user of CharProcessor must call updateLocaleIfNecessary with valid locale at least one // time before feeding characters. void updateLocaleIfNecessary(const Run& run) { uint32_t newLocaleListId = run.getLocaleListId(); if (localeListId != newLocaleListId) { Locale locale = getEffectiveLocale(newLocaleListId); nextWordBreak = breaker.followingWithLocale(locale, run.lineBreakStyle(), run.lineBreakWordStyle(), run.getRange().getStart()); hyphenator = HyphenatorMap::lookup(locale); localeListId = newLocaleListId; } } // Process one character. void feedChar(uint32_t idx, uint16_t c, float w, bool canBreakHere) { if (idx == nextWordBreak) { if (canBreakHere) { prevWordBreak = nextWordBreak; sumOfCharWidthsAtPrevWordBreak = sumOfCharWidths; } nextWordBreak = breaker.next(); } if (isWordSpace(c)) { rawSpaceCount += 1; spaceWidth = w; } sumOfCharWidths += w; if (isLineEndSpace(c)) { // If we break a line on a line-ending space, that space goes away. So postBreak // and postSpaceCount, which keep the width and number of spaces if we decide to // break at this point, don't need to get adjusted. } else { effectiveSpaceCount = rawSpaceCount; effectiveWidth = sumOfCharWidths; } } private: // The current locale list id. uint32_t localeListId = LocaleListCache::kInvalidListId; WordBreaker breaker; }; } // namespace minikin #endif // MINIKIN_LINE_BREAKER_UTIL_H