1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * A wrapper around ICU's line break iterator, that gives customized line 19 * break opportunities, as well as identifying words for the purpose of 20 * hyphenation. 21 */ 22 23 #ifndef MINIKIN_WORD_BREAKER_H 24 #define MINIKIN_WORD_BREAKER_H 25 26 #include <unicode/ubrk.h> 27 28 #include <list> 29 #include <memory> 30 #include <mutex> 31 32 #include "Locale.h" 33 #include "minikin/IcuUtils.h" 34 #include "minikin/LineBreakStyle.h" 35 #include "minikin/Macros.h" 36 #include "minikin/Range.h" 37 38 namespace minikin { 39 40 // A class interface for providing pooling implementation of ICU's line breaker. 41 // The implementation can be customized for testing purposes. 42 class ICULineBreakerPool { 43 public: 44 struct Slot { SlotSlot45 Slot() : localeId(0), breaker(nullptr) {} SlotSlot46 Slot(uint64_t localeId, LineBreakStyle lbStyle, LineBreakWordStyle lbWordStyle, 47 IcuUbrkUniquePtr&& breaker) 48 : localeId(localeId), 49 lbStyle(lbStyle), 50 lbWordStyle(lbWordStyle), 51 breaker(std::move(breaker)) {} 52 53 Slot(Slot&& other) = default; 54 Slot& operator=(Slot&& other) = default; 55 56 // Forbid copy and assignment. 57 Slot(const Slot&) = delete; 58 Slot& operator=(const Slot&) = delete; 59 60 uint64_t localeId; 61 LineBreakStyle lbStyle; 62 LineBreakWordStyle lbWordStyle; 63 IcuUbrkUniquePtr breaker; 64 }; ~ICULineBreakerPool()65 virtual ~ICULineBreakerPool() {} 66 virtual Slot acquire(const Locale& locale, LineBreakStyle lbStyle, 67 LineBreakWordStyle lbWordStyle) = 0; 68 virtual void release(Slot&& slot) = 0; 69 }; 70 71 // An singleton implementation of the ICU line breaker pool. 72 // Since creating ICU line breaker instance takes some time. Pool it for later use. 73 class ICULineBreakerPoolImpl : public ICULineBreakerPool { 74 public: 75 Slot acquire(const Locale& locale, LineBreakStyle lbStyle, 76 LineBreakWordStyle lbWordStyle) override; 77 void release(Slot&& slot) override; 78 getInstance()79 static ICULineBreakerPoolImpl& getInstance() { 80 static ICULineBreakerPoolImpl pool; 81 return pool; 82 } 83 84 protected: 85 // protected for testing purposes. 86 static constexpr size_t MAX_POOL_SIZE = 4; ICULineBreakerPoolImpl()87 ICULineBreakerPoolImpl(){}; // singleton. getPoolSize()88 size_t getPoolSize() const { 89 std::lock_guard<std::mutex> lock(mMutex); 90 return mPool.size(); 91 } 92 93 private: 94 std::list<Slot> mPool GUARDED_BY(mMutex); 95 mutable std::mutex mMutex; 96 }; 97 98 class WordBreaker { 99 public: ~WordBreaker()100 virtual ~WordBreaker() { finish(); } 101 102 WordBreaker(); 103 104 void setText(const uint16_t* data, size_t size); 105 106 // Advance iterator to next word break with current locale. Return offset, or -1 if EOT 107 ssize_t next(); 108 109 // Advance iterator to the break just after "from" with using the new provided locale. 110 // Return offset, or -1 if EOT 111 ssize_t followingWithLocale(const Locale& locale, LineBreakStyle lbStyle, 112 LineBreakWordStyle lbWordStyle, size_t from); 113 114 // Current offset of iterator, equal to 0 at BOT or last return from next() 115 ssize_t current() const; 116 117 // After calling next(), wordStart() and wordEnd() are offsets defining the previous 118 // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation. 119 ssize_t wordStart() const; 120 121 ssize_t wordEnd() const; 122 123 // Returns the range from wordStart() to wordEnd(). 124 // If wordEnd() <= wordStart(), returns empty range. wordRange()125 inline Range wordRange() const { 126 const uint32_t start = wordStart(); 127 const uint32_t end = wordEnd(); 128 return start < end ? Range(start, end) : Range(end, end); 129 } 130 131 int breakBadness() const; 132 133 void finish(); 134 135 protected: 136 // protected virtual for testing purpose. 137 // Caller must release the pool. 138 WordBreaker(ICULineBreakerPool* pool); 139 140 private: 141 int32_t iteratorNext(); 142 void detectEmailOrUrl(); 143 ssize_t findNextBreakInEmailOrUrl(); 144 145 // Doesn't take ownership. Must not be nullptr. Must be set in constructor. 146 ICULineBreakerPool* mPool; 147 148 ICULineBreakerPool::Slot mIcuBreaker; 149 150 std::unique_ptr<UText, decltype(&utext_close)> mUText; 151 const uint16_t* mText = nullptr; 152 size_t mTextSize; 153 ssize_t mLast; 154 ssize_t mCurrent; 155 156 // state for the email address / url detector 157 ssize_t mScanOffset; 158 bool mInEmailOrUrl; 159 }; 160 161 } // namespace minikin 162 163 #endif // MINIKIN_WORD_BREAKER_H 164