• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * A wrapper around ICU's line break iterator, that gives customized line
19  * break opportunities, as well as identifying words for the purpose of
20  * hyphenation.
21  */
22 
23 #ifndef MINIKIN_WORD_BREAKER_H
24 #define MINIKIN_WORD_BREAKER_H
25 
26 #include <memory>
27 #include "unicode/brkiter.h"
28 #include "utils/WindowsUtils.h"
29 
30 namespace minikin {
31 
32 class WordBreaker {
33  public:
~WordBreaker()34   ~WordBreaker() { finish(); }
35 
36   // libtxt extension: always use the default locale so that a cached instance
37   // of the ICU break iterator can be reused.
38   void setLocale();
39 
40   void setText(const uint16_t* data, size_t size);
41 
42   // Advance iterator to next word break. Return offset, or -1 if EOT
43   ssize_t next();
44 
45   // Current offset of iterator, equal to 0 at BOT or last return from next()
46   ssize_t current() const;
47 
48   // After calling next(), wordStart() and wordEnd() are offsets defining the
49   // previous word. If wordEnd <= wordStart, it's not a word for the purpose of
50   // hyphenation.
51   ssize_t wordStart() const;
52 
53   ssize_t wordEnd() const;
54 
55   int breakBadness() const;
56 
57   void finish();
58 
59  private:
60   int32_t iteratorNext();
61   void detectEmailOrUrl();
62   ssize_t findNextBreakInEmailOrUrl();
63 
64   std::unique_ptr<icu::BreakIterator> mBreakIterator;
65   UText mUText = UTEXT_INITIALIZER;
66   const uint16_t* mText = nullptr;
67   size_t mTextSize;
68   ssize_t mLast;
69   ssize_t mCurrent;
70   bool mIteratorWasReset;
71 
72   // state for the email address / url detector
73   ssize_t mScanOffset;
74   bool mInEmailOrUrl;
75 };
76 
77 }  // namespace minikin
78 
79 #endif  // MINIKIN_WORD_BREAKER_H
80