• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2020 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 #ifndef SkUnicode_DEFINED
8 #define SkUnicode_DEFINED
9 #include "include/core/SkRefCnt.h"
10 #include "include/core/SkSpan.h"
11 #include "include/core/SkString.h"
12 #include "include/core/SkTypes.h"
13 #include "include/private/base/SkTArray.h"
14 #include "include/private/base/SkTo.h"
15 #include "src/base/SkUTF.h"
16 #include <cstddef>
17 #include <cstdint>
18 #include <memory>
19 #include <string>
20 #include <vector>
21 namespace sknonstd { template <typename T> struct is_bitmask_enum; }
22 
23 #if !defined(SKUNICODE_IMPLEMENTATION)
24     #define SKUNICODE_IMPLEMENTATION 0
25 #endif
26 
27 #if !defined(SKUNICODE_API)
28     #if defined(SKUNICODE_DLL)
29         #if defined(_MSC_VER)
30             #if SKUNICODE_IMPLEMENTATION
31                 #define SKUNICODE_API __declspec(dllexport)
32             #else
33                 #define SKUNICODE_API __declspec(dllimport)
34             #endif
35         #else
36             #define SKUNICODE_API __attribute__((visibility("default")))
37         #endif
38     #else
39         #define SKUNICODE_API
40     #endif
41 #endif
42 
43 class SKUNICODE_API SkBidiIterator {
44 public:
45     typedef int32_t Position;
46     typedef uint8_t Level;
47     struct Region {
RegionRegion48         Region(Position start, Position end, Level level)
49             : start(start), end(end), level(level) { }
50         Position start;
51         Position end;
52         Level level;
53     };
54     enum Direction {
55         kLTR,
56         kRTL,
57     };
58     virtual ~SkBidiIterator() = default;
59     virtual Position getLength() = 0;
60     virtual Level getLevelAt(Position) = 0;
61 };
62 
63 class SKUNICODE_API SkBreakIterator {
64 public:
65     typedef int32_t Position;
66     typedef int32_t Status;
67     virtual ~SkBreakIterator() = default;
68     virtual Position first() = 0;
69     virtual Position current() = 0;
70     virtual Position next() = 0;
71     virtual Status status() = 0;
72     virtual bool isDone() = 0;
73     virtual bool setText(const char utftext8[], int utf8Units) = 0;
74     virtual bool setText(const char16_t utftext16[], int utf16Units) = 0;
75 };
76 
77 class SKUNICODE_API SkUnicode : public SkRefCnt {
78     public:
79         enum CodeUnitFlags {
80             kNoCodeUnitFlag = 0x00,
81             kPartOfWhiteSpaceBreak = 0x01,
82             kGraphemeStart = 0x02,
83             kSoftLineBreakBefore = 0x04,
84             kHardLineBreakBefore = 0x08,
85             kPartOfIntraWordBreak = 0x10,
86             kControl = 0x20,
87             kTabulation = 0x40,
88             kGlyphClusterStart = 0x80,
89             kIdeographic = 0x100,
90             kEmoji = 0x200,
91             kWordBreak = 0x400,
92             kSentenceBreak = 0x800,
93 #ifdef ENABLE_TEXT_ENHANCE
94             kCombine = 0x1000,
95             kPunctuation = 0x2000,
96             kEllipsis = 0x4000,
97 #endif
98         };
99         enum class TextDirection {
100             kLTR,
101             kRTL,
102         };
103         typedef size_t Position;
104         typedef uint8_t BidiLevel;
105         struct BidiRegion {
BidiRegionBidiRegion106             BidiRegion(Position start, Position end, BidiLevel level)
107               : start(start), end(end), level(level) { }
108             Position start;
109             Position end;
110             BidiLevel level;
111         };
112         enum class LineBreakType {
113             kSoftLineBreak = 0,
114             kHardLineBreak = 100,
115         };
116 
117         enum class BreakType { kWords, kGraphemes, kLines, kSentences };
118         struct LineBreakBefore {
LineBreakBeforeLineBreakBefore119             LineBreakBefore(Position pos, LineBreakType breakType)
120               : pos(pos), breakType(breakType) { }
121             Position pos;
122             LineBreakType breakType;
123         };
124 
125         ~SkUnicode() override = default;
126 
127         // deprecated
128         virtual SkString toUpper(const SkString&) = 0;
129         virtual SkString toUpper(const SkString&, const char* locale) = 0;
130 
131         virtual bool isControl(SkUnichar utf8) = 0;
132         virtual bool isWhitespace(SkUnichar utf8) = 0;
133         virtual bool isSpace(SkUnichar utf8) = 0;
134         virtual bool isTabulation(SkUnichar utf8) = 0;
135         virtual bool isHardBreak(SkUnichar utf8) = 0;
136         /**
137          * Returns if a code point may start an emoji sequence.
138          * Returns true for '#', '*', and '0'-'9' since they may start an emoji sequence.
139          * To determine if a list of code points begins with an emoji sequence, use
140          * getEmojiSequence.
141          **/
142         virtual bool isEmoji(SkUnichar utf8) = 0;
143         virtual bool isEmojiComponent(SkUnichar utf8) = 0;
144         virtual bool isEmojiModifierBase(SkUnichar utf8) = 0;
145         virtual bool isEmojiModifier(SkUnichar utf8) = 0;
146         virtual bool isRegionalIndicator(SkUnichar utf8) = 0;
147         virtual bool isIdeographic(SkUnichar utf8) = 0;
148 
149         // Methods used in SkShaper and SkText
150         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
151             (const uint16_t text[], int count, SkBidiIterator::Direction) = 0;
152         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
153             (const char text[], int count, SkBidiIterator::Direction) = 0;
154         virtual std::unique_ptr<SkBreakIterator> makeBreakIterator
155             (const char locale[], BreakType breakType) = 0;
156         virtual std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) = 0;
157 
158         // Methods used in SkParagraph
159         static bool hasTabulationFlag(SkUnicode::CodeUnitFlags flags);
160         static bool hasHardLineBreakFlag(SkUnicode::CodeUnitFlags flags);
161         static bool hasSoftLineBreakFlag(SkUnicode::CodeUnitFlags flags);
162         static bool hasGraphemeStartFlag(SkUnicode::CodeUnitFlags flags);
163         static bool hasControlFlag(SkUnicode::CodeUnitFlags flags);
164         static bool hasPartOfWhiteSpaceBreakFlag(SkUnicode::CodeUnitFlags flags);
165 #ifdef ENABLE_TEXT_ENHANCE
166         static bool isPunctuation(SkUnichar utf8);
167         static bool isEllipsis(SkUnichar utf8);
168 #endif
169 
170         static bool extractBidi(const char utf8[],
171                                 int utf8Units,
172                                 TextDirection dir,
173                                 std::vector<BidiRegion>* bidiRegions);
174         virtual bool getBidiRegions(const char utf8[],
175                                     int utf8Units,
176                                     TextDirection dir,
177                                     std::vector<BidiRegion>* results) = 0;
178         // Returns results in utf16
179         virtual bool getWords(const char utf8[], int utf8Units, const char* locale,
180                               std::vector<Position>* results) = 0;
181         virtual bool getUtf8Words(const char utf8[],
182                                   int utf8Units,
183                                   const char* locale,
184                                   std::vector<Position>* results) = 0;
185         virtual bool getSentences(const char utf8[],
186                                   int utf8Units,
187                                   const char* locale,
188                                   std::vector<Position>* results) = 0;
189 #ifdef ENABLE_TEXT_ENHANCE
190         virtual bool computeCodeUnitFlags(
191                 char utf8[], int utf8Units, bool replaceTabs, const char locale[],
192                 skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
193         virtual bool computeCodeUnitFlags(
194                 char16_t utf16[], int utf16Units, bool replaceTabs, const char locale[],
195                 skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
196 #else
197         virtual bool computeCodeUnitFlags(
198                 char utf8[], int utf8Units, bool replaceTabs,
199                 skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
200         virtual bool computeCodeUnitFlags(
201                 char16_t utf16[], int utf16Units, bool replaceTabs,
202                 skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
203 #endif
204 
205         static SkString convertUtf16ToUtf8(const char16_t * utf16, int utf16Units);
206         static SkString convertUtf16ToUtf8(const std::u16string& utf16);
207         static std::u16string convertUtf8ToUtf16(const char* utf8, int utf8Units);
208         static std::u16string convertUtf8ToUtf16(const SkString& utf8);
209 
210         template <typename Appender8, typename Appender16>
extractUtfConversionMapping(SkSpan<const char> utf8,Appender8 && appender8,Appender16 && appender16)211         static bool extractUtfConversionMapping(SkSpan<const char> utf8, Appender8&& appender8, Appender16&& appender16) {
212             size_t size8 = 0;
213             size_t size16 = 0;
214             auto ptr = utf8.begin();
215             auto end = utf8.end();
216             while (ptr < end) {
217 
218                 size_t index = SkToSizeT(ptr - utf8.begin());
219                 SkUnichar u = SkUTF::NextUTF8(&ptr, end);
220 
221                 // All UTF8 code units refer to the same codepoint
222                 size_t next = SkToSizeT(ptr - utf8.begin());
223                 for (auto i = index; i < next; ++i) {
224                     //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
225                     appender16(size8);
226                     ++size16;
227                 }
228                 //SkASSERT(fUTF16IndexForUTF8Index.size() == next);
229                 SkASSERT(size16 == next);
230                 if (size16 != next) {
231                     return false;
232                 }
233 
234                 // One or two UTF16 code units refer to the same codepoint
235                 uint16_t buffer[2];
236                 size_t count = SkUTF::ToUTF16(u, buffer);
237                 //fUTF8IndexForUTF16Index.emplace_back(index);
238                 appender8(index);
239                 ++size8;
240                 if (count > 1) {
241                     //fUTF8IndexForUTF16Index.emplace_back(index);
242                     appender8(index);
243                     ++size8;
244                 }
245             }
246             //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
247             appender16(size8);
248             ++size16;
249             //fUTF8IndexForUTF16Index.emplace_back(fText.size());
250             appender8(utf8.size());
251             ++size8;
252 
253             return true;
254         }
255 
256         template <typename Callback>
forEachCodepoint(const char * utf8,int32_t utf8Units,Callback && callback)257         void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) {
258             const char* current = utf8;
259             const char* end = utf8 + utf8Units;
260             while (current < end) {
261                 auto before = current - utf8;
262                 SkUnichar unichar = SkUTF::NextUTF8(&current, end);
263                 if (unichar < 0) unichar = 0xFFFD;
264                 auto after = current - utf8;
265                 uint16_t buffer[2];
266                 size_t count = SkUTF::ToUTF16(unichar, buffer);
267                 callback(unichar, before, after, count);
268             }
269         }
270 
271         template <typename Callback>
forEachCodepoint(const char16_t * utf16,int32_t utf16Units,Callback && callback)272         void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) {
273             const char16_t* current = utf16;
274             const char16_t* end = utf16 + utf16Units;
275             while (current < end) {
276                 auto before = current - utf16;
277                 SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)&current, (const uint16_t*)end);
278                 auto after = current - utf16;
279                 callback(unichar, before, after);
280             }
281         }
282 
283         template <typename Callback>
forEachBidiRegion(const uint16_t utf16[],int utf16Units,SkBidiIterator::Direction dir,Callback && callback)284         void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) {
285             auto iter = makeBidiIterator(utf16, utf16Units, dir);
286             const uint16_t* start16 = utf16;
287             const uint16_t* end16 = utf16 + utf16Units;
288             SkBidiIterator::Level currentLevel = 0;
289 
290             SkBidiIterator::Position pos16 = 0;
291             while (pos16 <= iter->getLength()) {
292                 auto level = iter->getLevelAt(pos16);
293                 if (pos16 == 0) {
294                     currentLevel = level;
295                 } else if (level != currentLevel) {
296                     callback(pos16, start16 - utf16, currentLevel);
297                     currentLevel = level;
298                 }
299                 if (start16 == end16) {
300                     break;
301                 }
302                 SkUnichar u = SkUTF::NextUTF16(&start16, end16);
303                 pos16 += SkUTF::ToUTF16(u);
304             }
305         }
306 
307         template <typename Callback>
308 #ifdef ENABLE_TEXT_ENHANCE
forEachBreak(const char16_t utf16[],int utf16Units,SkUnicode::BreakType type,const char locale[],Callback && callback)309         void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type,
310             const char locale[], Callback&& callback) {
311             auto iter = makeBreakIterator(type);
312 #else
313         void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) {
314             auto iter = makeBreakIterator(type);
315 #endif
316             iter->setText(utf16, utf16Units);
317             auto pos = iter->first();
318             do {
319                 callback(pos, iter->status());
320                 pos = iter->next();
321             } while (!iter->isDone());
322         }
323 
324         virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0;
325 };
326 
327 namespace sknonstd {
328 template <> struct is_bitmask_enum<SkUnicode::CodeUnitFlags> : std::true_type {};
329 }  // namespace sknonstd
330 
331 #endif // SkUnicode_DEFINED
332