1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * A wrapper around ICU's line break iterator, that gives customized line 19 * break opportunities, as well as identifying words for the purpose of 20 * hyphenation. 21 */ 22 23 #ifndef MINIKIN_WORD_BREAKER_H 24 #define MINIKIN_WORD_BREAKER_H 25 26 #include <memory> 27 #include "unicode/brkiter.h" 28 #include "utils/WindowsUtils.h" 29 30 namespace minikin { 31 32 class WordBreaker { 33 public: ~WordBreaker()34 ~WordBreaker() { finish(); } 35 36 // libtxt extension: always use the default locale so that a cached instance 37 // of the ICU break iterator can be reused. 38 void setLocale(); 39 40 void setText(const uint16_t* data, size_t size); 41 42 // Advance iterator to next word break. Return offset, or -1 if EOT 43 ssize_t next(); 44 45 // Current offset of iterator, equal to 0 at BOT or last return from next() 46 ssize_t current() const; 47 48 // After calling next(), wordStart() and wordEnd() are offsets defining the 49 // previous word. If wordEnd <= wordStart, it's not a word for the purpose of 50 // hyphenation. 51 ssize_t wordStart() const; 52 53 ssize_t wordEnd() const; 54 55 int breakBadness() const; 56 57 void finish(); 58 59 private: 60 int32_t iteratorNext(); 61 void detectEmailOrUrl(); 62 ssize_t findNextBreakInEmailOrUrl(); 63 64 std::unique_ptr<icu::BreakIterator> mBreakIterator; 65 UText mUText = UTEXT_INITIALIZER; 66 const uint16_t* mText = nullptr; 67 size_t mTextSize; 68 ssize_t mLast; 69 ssize_t mCurrent; 70 bool mIteratorWasReset; 71 72 // state for the email address / url detector 73 ssize_t mScanOffset; 74 bool mInEmailOrUrl; 75 }; 76 77 } // namespace minikin 78 79 #endif // MINIKIN_WORD_BREAKER_H 80