• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * A wrapper around ICU's line break iterator, that gives customized line
19  * break opportunities, as well as identifying words for the purpose of
20  * hyphenation.
21  */
22 
23 #ifndef MINIKIN_WORD_BREAKER_H
24 #define MINIKIN_WORD_BREAKER_H
25 
26 #include <unicode/ubrk.h>
27 
28 #include <list>
29 #include <memory>
30 #include <mutex>
31 
32 #include "Locale.h"
33 #include "minikin/IcuUtils.h"
34 #include "minikin/LineBreakStyle.h"
35 #include "minikin/Macros.h"
36 #include "minikin/Range.h"
37 
38 namespace minikin {
39 
40 // A class interface for providing pooling implementation of ICU's line breaker.
41 // The implementation can be customized for testing purposes.
42 class ICULineBreakerPool {
43 public:
44     struct Slot {
SlotSlot45         Slot() : localeId(0), breaker(nullptr) {}
SlotSlot46         Slot(uint64_t localeId, LineBreakStyle lbStyle, LineBreakWordStyle lbWordStyle,
47              IcuUbrkUniquePtr&& breaker)
48                 : localeId(localeId),
49                   lbStyle(lbStyle),
50                   lbWordStyle(lbWordStyle),
51                   breaker(std::move(breaker)) {}
52 
53         Slot(Slot&& other) = default;
54         Slot& operator=(Slot&& other) = default;
55 
56         // Forbid copy and assignment.
57         Slot(const Slot&) = delete;
58         Slot& operator=(const Slot&) = delete;
59 
60         uint64_t localeId;
61         LineBreakStyle lbStyle;
62         LineBreakWordStyle lbWordStyle;
63         IcuUbrkUniquePtr breaker;
64     };
~ICULineBreakerPool()65     virtual ~ICULineBreakerPool() {}
66     virtual Slot acquire(const Locale& locale, LineBreakStyle lbStyle,
67                          LineBreakWordStyle lbWordStyle) = 0;
68     virtual void release(Slot&& slot) = 0;
69 };
70 
71 // An singleton implementation of the ICU line breaker pool.
72 // Since creating ICU line breaker instance takes some time. Pool it for later use.
73 class ICULineBreakerPoolImpl : public ICULineBreakerPool {
74 public:
75     Slot acquire(const Locale& locale, LineBreakStyle lbStyle,
76                  LineBreakWordStyle lbWordStyle) override;
77     void release(Slot&& slot) override;
78 
getInstance()79     static ICULineBreakerPoolImpl& getInstance() {
80         static ICULineBreakerPoolImpl pool;
81         return pool;
82     }
83 
84 protected:
85     // protected for testing purposes.
86     static constexpr size_t MAX_POOL_SIZE = 4;
ICULineBreakerPoolImpl()87     ICULineBreakerPoolImpl(){};  // singleton.
getPoolSize()88     size_t getPoolSize() const {
89         std::lock_guard<std::mutex> lock(mMutex);
90         return mPool.size();
91     }
92 
93 private:
94     std::list<Slot> mPool GUARDED_BY(mMutex);
95     mutable std::mutex mMutex;
96 };
97 
98 class WordBreaker {
99 public:
~WordBreaker()100     virtual ~WordBreaker() { finish(); }
101 
102     WordBreaker();
103 
104     void setText(const uint16_t* data, size_t size);
105 
106     // Advance iterator to next word break with current locale. Return offset, or -1 if EOT
107     ssize_t next();
108 
109     // Advance iterator to the break just after "from" with using the new provided locale.
110     // Return offset, or -1 if EOT
111     ssize_t followingWithLocale(const Locale& locale, LineBreakStyle lbStyle,
112                                 LineBreakWordStyle lbWordStyle, size_t from);
113 
114     // Current offset of iterator, equal to 0 at BOT or last return from next()
115     ssize_t current() const;
116 
117     // After calling next(), wordStart() and wordEnd() are offsets defining the previous
118     // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation.
119     ssize_t wordStart() const;
120 
121     ssize_t wordEnd() const;
122 
123     // Returns the range from wordStart() to wordEnd().
124     // If wordEnd() <= wordStart(), returns empty range.
wordRange()125     inline Range wordRange() const {
126         const uint32_t start = wordStart();
127         const uint32_t end = wordEnd();
128         return start < end ? Range(start, end) : Range(end, end);
129     }
130 
131     int breakBadness() const;
132 
133     void finish();
134 
135 protected:
136     // protected virtual for testing purpose.
137     // Caller must release the pool.
138     WordBreaker(ICULineBreakerPool* pool);
139 
140 private:
141     int32_t iteratorNext();
142     void detectEmailOrUrl();
143     ssize_t findNextBreakInEmailOrUrl();
144 
145     // Doesn't take ownership. Must not be nullptr. Must be set in constructor.
146     ICULineBreakerPool* mPool;
147 
148     ICULineBreakerPool::Slot mIcuBreaker;
149 
150     std::unique_ptr<UText, decltype(&utext_close)> mUText;
151     const uint16_t* mText = nullptr;
152     size_t mTextSize;
153     ssize_t mLast;
154     ssize_t mCurrent;
155 
156     // state for the email address / url detector
157     ssize_t mScanOffset;
158     bool mInEmailOrUrl;
159 };
160 
161 }  // namespace minikin
162 
163 #endif  // MINIKIN_WORD_BREAKER_H
164