• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * A wrapper around ICU's line break iterator, that gives customized line
19  * break opportunities, as well as identifying words for the purpose of
20  * hyphenation.
21  */
22 
23 #ifndef MINIKIN_WORD_BREAKER_H
24 #define MINIKIN_WORD_BREAKER_H
25 
26 #include <unicode/ubrk.h>
27 
28 #include <list>
29 #include <mutex>
30 
31 #include "Locale.h"
32 #include "minikin/IcuUtils.h"
33 #include "minikin/LineBreakStyle.h"
34 #include "minikin/Macros.h"
35 #include "minikin/Range.h"
36 
37 namespace minikin {
38 
39 // A class interface for providing pooling implementation of ICU's line breaker.
40 // The implementation can be customized for testing purposes.
41 class ICULineBreakerPool {
42 public:
43     struct Slot {
SlotSlot44         Slot() : localeId(0), breaker(nullptr) {}
SlotSlot45         Slot(uint64_t localeId, LineBreakStyle lbStyle, LineBreakWordStyle lbWordStyle,
46              IcuUbrkUniquePtr&& breaker)
47                 : localeId(localeId),
48                   lbStyle(lbStyle),
49                   lbWordStyle(lbWordStyle),
50                   breaker(std::move(breaker)) {}
51 
52         Slot(Slot&& other) = default;
53         Slot& operator=(Slot&& other) = default;
54 
55         // Forbid copy and assignment.
56         Slot(const Slot&) = delete;
57         Slot& operator=(const Slot&) = delete;
58 
59         uint64_t localeId;
60         LineBreakStyle lbStyle;
61         LineBreakWordStyle lbWordStyle;
62         IcuUbrkUniquePtr breaker;
63     };
~ICULineBreakerPool()64     virtual ~ICULineBreakerPool() {}
65     virtual Slot acquire(const Locale& locale, LineBreakStyle lbStyle,
66                          LineBreakWordStyle lbWordStyle) = 0;
67     virtual void release(Slot&& slot) = 0;
68 };
69 
70 // An singleton implementation of the ICU line breaker pool.
71 // Since creating ICU line breaker instance takes some time. Pool it for later use.
72 class ICULineBreakerPoolImpl : public ICULineBreakerPool {
73 public:
74     Slot acquire(const Locale& locale, LineBreakStyle lbStyle,
75                  LineBreakWordStyle lbWordStyle) override;
76     void release(Slot&& slot) override;
77 
getInstance()78     static ICULineBreakerPoolImpl& getInstance() {
79         static ICULineBreakerPoolImpl pool;
80         return pool;
81     }
82 
83 protected:
84     // protected for testing purposes.
85     static constexpr size_t MAX_POOL_SIZE = 4;
ICULineBreakerPoolImpl()86     ICULineBreakerPoolImpl(){};  // singleton.
getPoolSize()87     size_t getPoolSize() const {
88         std::lock_guard<std::mutex> lock(mMutex);
89         return mPool.size();
90     }
91 
92 private:
93     std::list<Slot> mPool GUARDED_BY(mMutex);
94     mutable std::mutex mMutex;
95 };
96 
97 class WordBreaker {
98 public:
~WordBreaker()99     virtual ~WordBreaker() { finish(); }
100 
101     WordBreaker();
102 
103     void setText(const uint16_t* data, size_t size);
104 
105     // Advance iterator to next word break with current locale. Return offset, or -1 if EOT
106     ssize_t next();
107 
108     // Advance iterator to the break just after "from" with using the new provided locale.
109     // Return offset, or -1 if EOT
110     ssize_t followingWithLocale(const Locale& locale, LineBreakStyle lbStyle,
111                                 LineBreakWordStyle lbWordStyle, size_t from);
112 
113     // Current offset of iterator, equal to 0 at BOT or last return from next()
114     ssize_t current() const;
115 
116     // After calling next(), wordStart() and wordEnd() are offsets defining the previous
117     // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation.
118     ssize_t wordStart() const;
119 
120     ssize_t wordEnd() const;
121 
122     // Returns the range from wordStart() to wordEnd().
123     // If wordEnd() <= wordStart(), returns empty range.
wordRange()124     inline Range wordRange() const {
125         const uint32_t start = wordStart();
126         const uint32_t end = wordEnd();
127         return start < end ? Range(start, end) : Range(end, end);
128     }
129 
130     int breakBadness() const;
131 
132     void finish();
133 
134 protected:
135     // protected virtual for testing purpose.
136     // Caller must release the pool.
137     WordBreaker(ICULineBreakerPool* pool);
138 
139 private:
140     int32_t iteratorNext();
141     void detectEmailOrUrl();
142     ssize_t findNextBreakInEmailOrUrl();
143 
144     // Doesn't take ownership. Must not be nullptr. Must be set in constructor.
145     ICULineBreakerPool* mPool;
146 
147     ICULineBreakerPool::Slot mIcuBreaker;
148 
149     UText mUText = UTEXT_INITIALIZER;
150     const uint16_t* mText = nullptr;
151     size_t mTextSize;
152     ssize_t mLast;
153     ssize_t mCurrent;
154 
155     // state for the email address / url detector
156     ssize_t mScanOffset;
157     bool mInEmailOrUrl;
158 };
159 
160 }  // namespace minikin
161 
162 #endif  // MINIKIN_WORD_BREAKER_H
163