• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2020 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 #ifndef SkUnicode_DEFINED
8 #define SkUnicode_DEFINED
9 #include "include/core/SkSpan.h"
10 #include "include/core/SkString.h"
11 #include "include/core/SkTypes.h"
12 #include "include/private/SkBitmaskEnum.h" // IWYU pragma: keep
13 #include "include/private/base/SkTArray.h"
14 #include "src/base/SkUTF.h"
15 
16 #include <cstddef>
17 #include <cstdint>
18 #include <memory>
19 #include <string>
20 #include <vector>
21 
22 #if !defined(SKUNICODE_IMPLEMENTATION)
23     #define SKUNICODE_IMPLEMENTATION 0
24 #endif
25 
26 #if !defined(SKUNICODE_API)
27     #if defined(SKUNICODE_DLL)
28         #if defined(_MSC_VER)
29             #if SKUNICODE_IMPLEMENTATION
30                 #define SKUNICODE_API __declspec(dllexport)
31             #else
32                 #define SKUNICODE_API __declspec(dllimport)
33             #endif
34         #else
35             #define SKUNICODE_API __attribute__((visibility("default")))
36         #endif
37     #else
38         #define SKUNICODE_API
39     #endif
40 #endif
41 
42 class SKUNICODE_API SkBidiIterator {
43 public:
44     typedef int32_t Position;
45     typedef uint8_t Level;
46     struct Region {
RegionRegion47         Region(Position start, Position end, Level level)
48             : start(start), end(end), level(level) { }
49         Position start;
50         Position end;
51         Level level;
52     };
53     enum Direction {
54         kLTR,
55         kRTL,
56     };
57     virtual ~SkBidiIterator() = default;
58     virtual Position getLength() = 0;
59     virtual Level getLevelAt(Position) = 0;
60 };
61 
62 class SKUNICODE_API SkBreakIterator {
63 public:
64     typedef int32_t Position;
65     typedef int32_t Status;
66     virtual ~SkBreakIterator() = default;
67     virtual Position first() = 0;
68     virtual Position current() = 0;
69     virtual Position next() = 0;
70     virtual Status status() = 0;
71     virtual bool isDone() = 0;
72     virtual bool setText(const char utftext8[], int utf8Units) = 0;
73     virtual bool setText(const char16_t utftext16[], int utf16Units) = 0;
74 };
75 
76 class SKUNICODE_API SkUnicode {
77     public:
78         enum CodeUnitFlags {
79             kNoCodeUnitFlag = 0x00,
80             kPartOfWhiteSpaceBreak = 0x01,
81             kGraphemeStart = 0x02,
82             kSoftLineBreakBefore = 0x04,
83             kHardLineBreakBefore = 0x08,
84             kPartOfIntraWordBreak = 0x10,
85             kControl = 0x20,
86             kTabulation = 0x40,
87             kGlyphClusterStart = 0x80,
88         };
89         enum class TextDirection {
90             kLTR,
91             kRTL,
92         };
93         typedef size_t Position;
94         typedef uint8_t BidiLevel;
95         struct BidiRegion {
BidiRegionBidiRegion96             BidiRegion(Position start, Position end, BidiLevel level)
97               : start(start), end(end), level(level) { }
98             Position start;
99             Position end;
100             BidiLevel level;
101         };
102         enum class LineBreakType {
103             kSoftLineBreak = 0,
104             kHardLineBreak = 100,
105         };
106 
107         enum class BreakType {
108             kWords,
109             kGraphemes,
110             kLines
111         };
112         struct LineBreakBefore {
LineBreakBeforeLineBreakBefore113             LineBreakBefore(Position pos, LineBreakType breakType)
114               : pos(pos), breakType(breakType) { }
115             Position pos;
116             LineBreakType breakType;
117         };
118 
119         virtual ~SkUnicode() = default;
120 
121         virtual SkString toUpper(const SkString&) = 0;
122 
123         // Methods used in SkShaper and SkText
124         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
125             (const uint16_t text[], int count, SkBidiIterator::Direction) = 0;
126         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
127             (const char text[], int count, SkBidiIterator::Direction) = 0;
128         virtual std::unique_ptr<SkBreakIterator> makeBreakIterator
129             (const char locale[], BreakType breakType) = 0;
130         virtual std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) = 0;
131 
132         // Methods used in SkParagraph
133         static bool isTabulation(SkUnicode::CodeUnitFlags flags);
134         static bool isHardLineBreak(SkUnicode::CodeUnitFlags flags);
135         static bool isSoftLineBreak(SkUnicode::CodeUnitFlags flags);
136         static bool isGraphemeStart(SkUnicode::CodeUnitFlags flags);
137         static bool isControl(SkUnicode::CodeUnitFlags flags);
138         static bool isPartOfWhiteSpaceBreak(SkUnicode::CodeUnitFlags flags);
139         static bool extractBidi(const char utf8[],
140                                 int utf8Units,
141                                 TextDirection dir,
142                                 std::vector<BidiRegion>* bidiRegions);
143         virtual bool getBidiRegions(const char utf8[],
144                                     int utf8Units,
145                                     TextDirection dir,
146                                     std::vector<BidiRegion>* results) = 0;
147         virtual bool getWords(const char utf8[], int utf8Units, const char* locale,
148                               std::vector<Position>* results) = 0;
149         virtual bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs,
150                                       SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
151         virtual bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
152                                       SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
153 
154         static SkString convertUtf16ToUtf8(const char16_t * utf16, int utf16Units);
155         static SkString convertUtf16ToUtf8(const std::u16string& utf16);
156         static std::u16string convertUtf8ToUtf16(const char* utf8, int utf8Units);
157         static std::u16string convertUtf8ToUtf16(const SkString& utf8);
158 
159         template <typename Appender8, typename Appender16>
extractUtfConversionMapping(SkSpan<const char> utf8,Appender8 && appender8,Appender16 && appender16)160         static bool extractUtfConversionMapping(SkSpan<const char> utf8, Appender8&& appender8, Appender16&& appender16) {
161             size_t size8 = 0;
162             size_t size16 = 0;
163             auto ptr = utf8.begin();
164             auto end = utf8.end();
165             while (ptr < end) {
166 
167                 size_t index = ptr - utf8.begin();
168                 SkUnichar u = SkUTF::NextUTF8(&ptr, end);
169 
170                 // All UTF8 code units refer to the same codepoint
171                 size_t next = ptr - utf8.begin();
172                 for (auto i = index; i < next; ++i) {
173                     //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
174                     appender16(size8);
175                     ++size16;
176                 }
177                 //SkASSERT(fUTF16IndexForUTF8Index.size() == next);
178                 SkASSERT(size16 == next);
179                 if (size16 != next) {
180                     return false;
181                 }
182 
183                 // One or two UTF16 code units refer to the same codepoint
184                 uint16_t buffer[2];
185                 size_t count = SkUTF::ToUTF16(u, buffer);
186                 //fUTF8IndexForUTF16Index.emplace_back(index);
187                 appender8(index);
188                 ++size8;
189                 if (count > 1) {
190                     //fUTF8IndexForUTF16Index.emplace_back(index);
191                     appender8(index);
192                     ++size8;
193                 }
194             }
195             //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
196             appender16(size8);
197             ++size16;
198             //fUTF8IndexForUTF16Index.emplace_back(fText.size());
199             appender8(utf8.size());
200             ++size8;
201 
202             return true;
203         }
204 
205         template <typename Callback>
forEachCodepoint(const char * utf8,int32_t utf8Units,Callback && callback)206         void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) {
207             const char* current = utf8;
208             const char* end = utf8 + utf8Units;
209             while (current < end) {
210                 auto before = current - utf8;
211                 SkUnichar unichar = SkUTF::NextUTF8(&current, end);
212                 if (unichar < 0) unichar = 0xFFFD;
213                 auto after = current - utf8;
214                 uint16_t buffer[2];
215                 size_t count = SkUTF::ToUTF16(unichar, buffer);
216                 callback(unichar, before, after, count);
217             }
218         }
219 
220         template <typename Callback>
forEachCodepoint(const char16_t * utf16,int32_t utf16Units,Callback && callback)221         void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) {
222             const char16_t* current = utf16;
223             const char16_t* end = utf16 + utf16Units;
224             while (current < end) {
225                 auto before = current - utf16;
226                 SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)&current, (const uint16_t*)end);
227                 auto after = current - utf16;
228                 callback(unichar, before, after);
229             }
230         }
231 
232         template <typename Callback>
forEachBidiRegion(const uint16_t utf16[],int utf16Units,SkBidiIterator::Direction dir,Callback && callback)233         void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) {
234             auto iter = makeBidiIterator(utf16, utf16Units, dir);
235             const uint16_t* start16 = utf16;
236             const uint16_t* end16 = utf16 + utf16Units;
237             SkBidiIterator::Level currentLevel = 0;
238 
239             SkBidiIterator::Position pos16 = 0;
240             while (pos16 <= iter->getLength()) {
241                 auto level = iter->getLevelAt(pos16);
242                 if (pos16 == 0) {
243                     currentLevel = level;
244                 } else if (level != currentLevel) {
245                     callback(pos16, start16 - utf16, currentLevel);
246                     currentLevel = level;
247                 }
248                 if (start16 == end16) {
249                     break;
250                 }
251                 SkUnichar u = SkUTF::NextUTF16(&start16, end16);
252                 pos16 += SkUTF::ToUTF16(u);
253             }
254         }
255 
256         template <typename Callback>
forEachBreak(const char16_t utf16[],int utf16Units,SkUnicode::BreakType type,Callback && callback)257         void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) {
258             auto iter = makeBreakIterator(type);
259             iter->setText(utf16, utf16Units);
260             auto pos = iter->first();
261             do {
262                 callback(pos, iter->status());
263                 pos = iter->next();
264             } while (!iter->isDone());
265         }
266 
267         virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0;
268 
269         virtual std::unique_ptr<SkUnicode> copy() = 0;
270 
271         static std::unique_ptr<SkUnicode> Make();
272 
273         static std::unique_ptr<SkUnicode> MakeIcuBasedUnicode();
274 
275         static std::unique_ptr<SkUnicode> MakeClientBasedUnicode(
276                 SkSpan<char> text,
277                 std::vector<SkUnicode::Position> words,
278                 std::vector<SkUnicode::Position> graphemeBreaks,
279                 std::vector<SkUnicode::LineBreakBefore> lineBreaks);
280 };
281 
282 namespace sknonstd {
283     template <> struct is_bitmask_enum<SkUnicode::CodeUnitFlags> : std::true_type {};
284 }  // namespace sknonstd
285 #endif // SkUnicode_DEFINED
286