1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * A wrapper around ICU's line break iterator, that gives customized line 19 * break opportunities, as well as identifying words for the purpose of 20 * hyphenation. 21 */ 22 23 #ifndef MINIKIN_WORD_BREAKER_H 24 #define MINIKIN_WORD_BREAKER_H 25 26 #include <unicode/ubrk.h> 27 28 #include <list> 29 #include <mutex> 30 31 #include "Locale.h" 32 #include "minikin/IcuUtils.h" 33 #include "minikin/LineBreakStyle.h" 34 #include "minikin/Macros.h" 35 #include "minikin/Range.h" 36 37 namespace minikin { 38 39 // A class interface for providing pooling implementation of ICU's line breaker. 40 // The implementation can be customized for testing purposes. 41 class ICULineBreakerPool { 42 public: 43 struct Slot { SlotSlot44 Slot() : localeId(0), breaker(nullptr) {} SlotSlot45 Slot(uint64_t localeId, LineBreakStyle lbStyle, LineBreakWordStyle lbWordStyle, 46 IcuUbrkUniquePtr&& breaker) 47 : localeId(localeId), 48 lbStyle(lbStyle), 49 lbWordStyle(lbWordStyle), 50 breaker(std::move(breaker)) {} 51 52 Slot(Slot&& other) = default; 53 Slot& operator=(Slot&& other) = default; 54 55 // Forbid copy and assignment. 56 Slot(const Slot&) = delete; 57 Slot& operator=(const Slot&) = delete; 58 59 uint64_t localeId; 60 LineBreakStyle lbStyle; 61 LineBreakWordStyle lbWordStyle; 62 IcuUbrkUniquePtr breaker; 63 }; ~ICULineBreakerPool()64 virtual ~ICULineBreakerPool() {} 65 virtual Slot acquire(const Locale& locale, LineBreakStyle lbStyle, 66 LineBreakWordStyle lbWordStyle) = 0; 67 virtual void release(Slot&& slot) = 0; 68 }; 69 70 // An singleton implementation of the ICU line breaker pool. 71 // Since creating ICU line breaker instance takes some time. Pool it for later use. 72 class ICULineBreakerPoolImpl : public ICULineBreakerPool { 73 public: 74 Slot acquire(const Locale& locale, LineBreakStyle lbStyle, 75 LineBreakWordStyle lbWordStyle) override; 76 void release(Slot&& slot) override; 77 getInstance()78 static ICULineBreakerPoolImpl& getInstance() { 79 static ICULineBreakerPoolImpl pool; 80 return pool; 81 } 82 83 protected: 84 // protected for testing purposes. 85 static constexpr size_t MAX_POOL_SIZE = 4; ICULineBreakerPoolImpl()86 ICULineBreakerPoolImpl(){}; // singleton. getPoolSize()87 size_t getPoolSize() const { 88 std::lock_guard<std::mutex> lock(mMutex); 89 return mPool.size(); 90 } 91 92 private: 93 std::list<Slot> mPool GUARDED_BY(mMutex); 94 mutable std::mutex mMutex; 95 }; 96 97 class WordBreaker { 98 public: ~WordBreaker()99 virtual ~WordBreaker() { finish(); } 100 101 WordBreaker(); 102 103 void setText(const uint16_t* data, size_t size); 104 105 // Advance iterator to next word break with current locale. Return offset, or -1 if EOT 106 ssize_t next(); 107 108 // Advance iterator to the break just after "from" with using the new provided locale. 109 // Return offset, or -1 if EOT 110 ssize_t followingWithLocale(const Locale& locale, LineBreakStyle lbStyle, 111 LineBreakWordStyle lbWordStyle, size_t from); 112 113 // Current offset of iterator, equal to 0 at BOT or last return from next() 114 ssize_t current() const; 115 116 // After calling next(), wordStart() and wordEnd() are offsets defining the previous 117 // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation. 118 ssize_t wordStart() const; 119 120 ssize_t wordEnd() const; 121 122 // Returns the range from wordStart() to wordEnd(). 123 // If wordEnd() <= wordStart(), returns empty range. wordRange()124 inline Range wordRange() const { 125 const uint32_t start = wordStart(); 126 const uint32_t end = wordEnd(); 127 return start < end ? Range(start, end) : Range(end, end); 128 } 129 130 int breakBadness() const; 131 132 void finish(); 133 134 protected: 135 // protected virtual for testing purpose. 136 // Caller must release the pool. 137 WordBreaker(ICULineBreakerPool* pool); 138 139 private: 140 int32_t iteratorNext(); 141 void detectEmailOrUrl(); 142 ssize_t findNextBreakInEmailOrUrl(); 143 144 // Doesn't take ownership. Must not be nullptr. Must be set in constructor. 145 ICULineBreakerPool* mPool; 146 147 ICULineBreakerPool::Slot mIcuBreaker; 148 149 UText mUText = UTEXT_INITIALIZER; 150 const uint16_t* mText = nullptr; 151 size_t mTextSize; 152 ssize_t mLast; 153 ssize_t mCurrent; 154 155 // state for the email address / url detector 156 ssize_t mScanOffset; 157 bool mInEmailOrUrl; 158 }; 159 160 } // namespace minikin 161 162 #endif // MINIKIN_WORD_BREAKER_H 163