1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * A wrapper around ICU's line break iterator, that gives customized line 19 * break opportunities, as well as identifying words for the purpose of 20 * hyphenation. 21 */ 22 23 #ifndef MINIKIN_WORD_BREAKER_H 24 #define MINIKIN_WORD_BREAKER_H 25 26 #include <unicode/ubrk.h> 27 28 #include <list> 29 #include <memory> 30 #include <mutex> 31 32 #include "Locale.h" 33 #include "minikin/IcuUtils.h" 34 #include "minikin/LineBreakStyle.h" 35 #include "minikin/Macros.h" 36 #include "minikin/Range.h" 37 38 namespace minikin { 39 40 class BreakIterator { 41 public: BreakIterator()42 BreakIterator() {} ~BreakIterator()43 virtual ~BreakIterator() {} 44 virtual void setText(UText* text, size_t size) = 0; 45 virtual bool isBoundary(int32_t i) = 0; 46 virtual int32_t following(size_t i) = 0; 47 virtual int32_t next() = 0; 48 }; 49 50 // A class interface for providing pooling implementation of ICU's line breaker. 51 // The implementation can be customized for testing purposes. 52 class ICULineBreakerPool { 53 public: 54 struct Slot { SlotSlot55 Slot() : localeId(0), breaker(nullptr) {} SlotSlot56 Slot(uint64_t localeId, LineBreakStyle lbStyle, LineBreakWordStyle lbWordStyle, 57 std::unique_ptr<BreakIterator>&& breaker) 58 : localeId(localeId), 59 lbStyle(lbStyle), 60 lbWordStyle(lbWordStyle), 61 breaker(std::move(breaker)) {} 62 63 Slot(Slot&& other) = default; 64 Slot& operator=(Slot&& other) = default; 65 66 // Forbid copy and assignment. 67 Slot(const Slot&) = delete; 68 Slot& operator=(const Slot&) = delete; 69 70 uint64_t localeId; 71 LineBreakStyle lbStyle; 72 LineBreakWordStyle lbWordStyle; 73 std::unique_ptr<BreakIterator> breaker; 74 }; ~ICULineBreakerPool()75 virtual ~ICULineBreakerPool() {} 76 virtual Slot acquire(const Locale& locale, LineBreakStyle lbStyle, 77 LineBreakWordStyle lbWordStyle) = 0; 78 virtual void release(Slot&& slot) = 0; 79 }; 80 81 // An singleton implementation of the ICU line breaker pool. 82 // Since creating ICU line breaker instance takes some time. Pool it for later use. 83 class ICULineBreakerPoolImpl : public ICULineBreakerPool { 84 public: 85 Slot acquire(const Locale& locale, LineBreakStyle lbStyle, 86 LineBreakWordStyle lbWordStyle) override; 87 void release(Slot&& slot) override; 88 getInstance()89 static ICULineBreakerPoolImpl& getInstance() { 90 static ICULineBreakerPoolImpl pool; 91 return pool; 92 } 93 94 protected: 95 // protected for testing purposes. 96 static constexpr size_t MAX_POOL_SIZE = 4; ICULineBreakerPoolImpl()97 ICULineBreakerPoolImpl(){}; // singleton. getPoolSize()98 size_t getPoolSize() const { 99 std::lock_guard<std::mutex> lock(mMutex); 100 return mPool.size(); 101 } 102 103 private: 104 std::list<Slot> mPool GUARDED_BY(mMutex); 105 mutable std::mutex mMutex; 106 }; 107 108 class ICUBreakIterator : public BreakIterator { 109 public: ICUBreakIterator(IcuUbrkUniquePtr && breaker)110 ICUBreakIterator(IcuUbrkUniquePtr&& breaker) : mBreaker(std::move(breaker)) {} ~ICUBreakIterator()111 virtual ~ICUBreakIterator() {} 112 virtual void setText(UText* text, size_t size); 113 virtual bool isBoundary(int32_t i); 114 virtual int32_t following(size_t i); 115 virtual int32_t next(); 116 117 private: 118 IcuUbrkUniquePtr mBreaker; 119 }; 120 121 class NoBreakBreakIterator : public BreakIterator { 122 public: NoBreakBreakIterator()123 NoBreakBreakIterator() {} ~NoBreakBreakIterator()124 virtual ~NoBreakBreakIterator() {} 125 setText(UText *,size_t size)126 virtual void setText(UText*, size_t size) { mSize = size; } isBoundary(int32_t i)127 virtual bool isBoundary(int32_t i) { return i == 0 || i == static_cast<int32_t>(mSize); } following(size_t)128 virtual int32_t following(size_t) { return mSize; } next()129 virtual int32_t next() { return mSize; } 130 131 private: 132 size_t mSize = 0; 133 }; 134 135 class WordBreaker { 136 public: ~WordBreaker()137 virtual ~WordBreaker() { finish(); } 138 139 WordBreaker(); 140 141 void setText(const uint16_t* data, size_t size); 142 143 // Advance iterator to next word break with current locale. Return offset, or -1 if EOT 144 ssize_t next(); 145 146 // Advance iterator to the break just after "from" with using the new provided locale. 147 // Return offset, or -1 if EOT 148 ssize_t followingWithLocale(const Locale& locale, LineBreakStyle lbStyle, 149 LineBreakWordStyle lbWordStyle, size_t from); 150 151 // Current offset of iterator, equal to 0 at BOT or last return from next() 152 ssize_t current() const; 153 154 // After calling next(), wordStart() and wordEnd() are offsets defining the previous 155 // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation. 156 ssize_t wordStart() const; 157 158 ssize_t wordEnd() const; 159 160 // Returns the range from wordStart() to wordEnd(). 161 // If wordEnd() <= wordStart(), returns empty range. wordRange()162 inline Range wordRange() const { 163 const uint32_t start = wordStart(); 164 const uint32_t end = wordEnd(); 165 return start < end ? Range(start, end) : Range(end, end); 166 } 167 168 int breakBadness() const; 169 170 void finish(); 171 172 protected: 173 // protected virtual for testing purpose. 174 // Caller must release the pool. 175 WordBreaker(ICULineBreakerPool* pool); 176 177 private: 178 int32_t iteratorNext(); 179 void detectEmailOrUrl(); 180 ssize_t findNextBreakInEmailOrUrl(); 181 182 // Doesn't take ownership. Must not be nullptr. Must be set in constructor. 183 ICULineBreakerPool* mPool; 184 185 ICULineBreakerPool::Slot mIcuBreaker; 186 187 std::unique_ptr<UText, decltype(&utext_close)> mUText; 188 const uint16_t* mText = nullptr; 189 size_t mTextSize; 190 ssize_t mLast; 191 ssize_t mCurrent; 192 193 // state for the email address / url detector 194 ssize_t mScanOffset; 195 bool mInEmailOrUrl; 196 }; 197 198 } // namespace minikin 199 200 #endif // MINIKIN_WORD_BREAKER_H 201