1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * A wrapper around ICU's line break iterator, that gives customized line 19 * break opportunities, as well as identifying words for the purpose of 20 * hyphenation. 21 */ 22 23 #ifndef MINIKIN_WORD_BREAKER_H 24 #define MINIKIN_WORD_BREAKER_H 25 26 #include <list> 27 #include <mutex> 28 29 #include <unicode/ubrk.h> 30 31 #include "minikin/IcuUtils.h" 32 #include "minikin/Macros.h" 33 #include "minikin/Range.h" 34 35 #include "Locale.h" 36 37 namespace minikin { 38 39 // A class interface for providing pooling implementation of ICU's line breaker. 40 // The implementation can be customized for testing purposes. 41 class ICULineBreakerPool { 42 public: 43 struct Slot { SlotSlot44 Slot() : localeId(0), breaker(nullptr) {} SlotSlot45 Slot(uint64_t localeId, IcuUbrkUniquePtr&& breaker) 46 : localeId(localeId), breaker(std::move(breaker)) {} 47 48 Slot(Slot&& other) = default; 49 Slot& operator=(Slot&& other) = default; 50 51 // Forbid copy and assignment. 52 Slot(const Slot&) = delete; 53 Slot& operator=(const Slot&) = delete; 54 55 uint64_t localeId; 56 IcuUbrkUniquePtr breaker; 57 }; ~ICULineBreakerPool()58 virtual ~ICULineBreakerPool() {} 59 virtual Slot acquire(const Locale& locale) = 0; 60 virtual void release(Slot&& slot) = 0; 61 }; 62 63 // An singleton implementation of the ICU line breaker pool. 64 // Since creating ICU line breaker instance takes some time. Pool it for later use. 65 class ICULineBreakerPoolImpl : public ICULineBreakerPool { 66 public: 67 Slot acquire(const Locale& locale) override; 68 void release(Slot&& slot) override; 69 getInstance()70 static ICULineBreakerPoolImpl& getInstance() { 71 static ICULineBreakerPoolImpl pool; 72 return pool; 73 } 74 75 protected: 76 // protected for testing purposes. 77 static constexpr size_t MAX_POOL_SIZE = 4; ICULineBreakerPoolImpl()78 ICULineBreakerPoolImpl(){}; // singleton. getPoolSize()79 size_t getPoolSize() const { 80 std::lock_guard<std::mutex> lock(mMutex); 81 return mPool.size(); 82 } 83 84 private: 85 std::list<Slot> mPool GUARDED_BY(mMutex); 86 mutable std::mutex mMutex; 87 }; 88 89 class WordBreaker { 90 public: ~WordBreaker()91 virtual ~WordBreaker() { finish(); } 92 93 WordBreaker(); 94 95 void setText(const uint16_t* data, size_t size); 96 97 // Advance iterator to next word break with current locale. Return offset, or -1 if EOT 98 ssize_t next(); 99 100 // Advance iterator to the break just after "from" with using the new provided locale. 101 // Return offset, or -1 if EOT 102 ssize_t followingWithLocale(const Locale& locale, size_t from); 103 104 // Current offset of iterator, equal to 0 at BOT or last return from next() 105 ssize_t current() const; 106 107 // After calling next(), wordStart() and wordEnd() are offsets defining the previous 108 // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation. 109 ssize_t wordStart() const; 110 111 ssize_t wordEnd() const; 112 113 // Returns the range from wordStart() to wordEnd(). 114 // If wordEnd() <= wordStart(), returns empty range. wordRange()115 inline Range wordRange() const { 116 const uint32_t start = wordStart(); 117 const uint32_t end = wordEnd(); 118 return start < end ? Range(start, end) : Range(end, end); 119 } 120 121 int breakBadness() const; 122 123 void finish(); 124 125 protected: 126 // protected virtual for testing purpose. 127 // Caller must release the pool. 128 WordBreaker(ICULineBreakerPool* pool); 129 130 private: 131 int32_t iteratorNext(); 132 void detectEmailOrUrl(); 133 ssize_t findNextBreakInEmailOrUrl(); 134 135 // Doesn't take ownership. Must not be nullptr. Must be set in constructor. 136 ICULineBreakerPool* mPool; 137 138 ICULineBreakerPool::Slot mIcuBreaker; 139 140 UText mUText = UTEXT_INITIALIZER; 141 const uint16_t* mText = nullptr; 142 size_t mTextSize; 143 ssize_t mLast; 144 ssize_t mCurrent; 145 146 // state for the email address / url detector 147 ssize_t mScanOffset; 148 bool mInEmailOrUrl; 149 }; 150 151 } // namespace minikin 152 153 #endif // MINIKIN_WORD_BREAKER_H 154