• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2020 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 #ifndef SkUnicode_DEFINED
8 #define SkUnicode_DEFINED
9 #include "include/core/SkSpan.h"
10 #include "include/core/SkString.h"
11 #include "include/core/SkTypes.h"
12 #include "include/private/SkBitmaskEnum.h" // IWYU pragma: keep
13 #include "include/private/SkTArray.h"
14 #include "include/private/SkTo.h"
15 #include "src/utils/SkUTF.h"
16 
17 #include <cstddef>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <vector>
22 
23 #if !defined(SKUNICODE_IMPLEMENTATION)
24     #define SKUNICODE_IMPLEMENTATION 0
25 #endif
26 
27 #if !defined(SKUNICODE_API)
28     #if defined(SKUNICODE_DLL)
29         #if defined(_MSC_VER)
30             #if SKUNICODE_IMPLEMENTATION
31                 #define SKUNICODE_API __declspec(dllexport)
32             #else
33                 #define SKUNICODE_API __declspec(dllimport)
34             #endif
35         #else
36             #define SKUNICODE_API __attribute__((visibility("default")))
37         #endif
38     #else
39         #define SKUNICODE_API
40     #endif
41 #endif
42 
43 class SKUNICODE_API SkBidiIterator {
44 public:
45     typedef int32_t Position;
46     typedef uint8_t Level;
47     struct Region {
RegionRegion48         Region(Position start, Position end, Level level)
49             : start(start), end(end), level(level) { }
50         Position start;
51         Position end;
52         Level level;
53     };
54     enum Direction {
55         kLTR,
56         kRTL,
57     };
58     virtual ~SkBidiIterator() = default;
59     virtual Position getLength() = 0;
60     virtual Level getLevelAt(Position) = 0;
61 };
62 
63 class SKUNICODE_API SkBreakIterator {
64 public:
65     typedef int32_t Position;
66     typedef int32_t Status;
67     virtual ~SkBreakIterator() = default;
68     virtual Position first() = 0;
69     virtual Position current() = 0;
70     virtual Position next() = 0;
71     virtual Status status() = 0;
72     virtual bool isDone() = 0;
73     virtual bool setText(const char utftext8[], int utf8Units) = 0;
74     virtual bool setText(const char16_t utftext16[], int utf16Units) = 0;
75 };
76 
77 class SKUNICODE_API SkUnicode {
78     public:
79         enum CodeUnitFlags {
80             kNoCodeUnitFlag = 0x00,
81             kPartOfWhiteSpaceBreak = 0x01,
82             kGraphemeStart = 0x02,
83             kSoftLineBreakBefore = 0x04,
84             kHardLineBreakBefore = 0x08,
85             kPartOfIntraWordBreak = 0x10,
86             kControl = 0x20,
87             kTabulation = 0x40,
88             kGlyphClusterStart = 0x80,
89             kIdeographic = 0x100,
90 #ifdef OHOS_SUPPORT
91             kCombine = 0x200,
92             kPunctuation = 0x400,
93             kEllipsis = 0x800,
94 #endif
95         };
96         enum class TextDirection {
97             kLTR,
98             kRTL,
99         };
100         typedef size_t Position;
101         typedef uint8_t BidiLevel;
102         struct BidiRegion {
BidiRegionBidiRegion103             BidiRegion(Position start, Position end, BidiLevel level)
104               : start(start), end(end), level(level) { }
105             Position start;
106             Position end;
107             BidiLevel level;
108         };
109         enum class LineBreakType {
110             kSoftLineBreak = 0,
111             kHardLineBreak = 100,
112         };
113 
114         enum class BreakType {
115             kWords,
116             kGraphemes,
117             kLines
118         };
119         struct LineBreakBefore {
LineBreakBeforeLineBreakBefore120             LineBreakBefore(Position pos, LineBreakType breakType)
121               : pos(pos), breakType(breakType) { }
122             Position pos;
123             LineBreakType breakType;
124         };
125 
126         virtual ~SkUnicode() = default;
127 
128         virtual SkString toUpper(const SkString&) = 0;
129 
130         // Methods used in SkShaper and SkText
131         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
132             (const uint16_t text[], int count, SkBidiIterator::Direction) = 0;
133         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
134             (const char text[], int count, SkBidiIterator::Direction) = 0;
135         virtual std::unique_ptr<SkBreakIterator> makeBreakIterator
136             (const char locale[], BreakType breakType) = 0;
137         virtual std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) = 0;
138 
139         // Methods used in SkParagraph
140         static bool isTabulation(SkUnicode::CodeUnitFlags flags);
141         static bool isHardLineBreak(SkUnicode::CodeUnitFlags flags);
142         static bool isSoftLineBreak(SkUnicode::CodeUnitFlags flags);
143         static bool isGraphemeStart(SkUnicode::CodeUnitFlags flags);
144         static bool isControl(SkUnicode::CodeUnitFlags flags);
145         static bool isPartOfWhiteSpaceBreak(SkUnicode::CodeUnitFlags flags);
146         static bool isIdeographic(SkUnichar utf8);
147 #ifdef OHOS_SUPPORT
148         static bool isPunctuation(SkUnichar utf8);
149         static bool isEllipsis(SkUnichar utf8);
150 #endif
151         static bool extractBidi(const char utf8[],
152                                 int utf8Units,
153                                 TextDirection dir,
154                                 std::vector<BidiRegion>* bidiRegions);
155         virtual bool getBidiRegions(const char utf8[],
156                                     int utf8Units,
157                                     TextDirection dir,
158                                     std::vector<BidiRegion>* results) = 0;
159         virtual bool getWords(const char utf8[], int utf8Units, const char* locale,
160                               std::vector<Position>* results) = 0;
161 #ifdef OHOS_SUPPORT
162         virtual bool computeCodeUnitFlags(
163                 char utf8[], int utf8Units, bool replaceTabs, const char locale[],
164                 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
165         virtual bool computeCodeUnitFlags(
166                 char16_t utf16[], int utf16Units, bool replaceTabs, const char locale[],
167                 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
168 #else
169         virtual bool computeCodeUnitFlags(
170                 char utf8[], int utf8Units, bool replaceTabs,
171                 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
172         virtual bool computeCodeUnitFlags(
173                 char16_t utf16[], int utf16Units, bool replaceTabs,
174                 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
175 #endif
176 
177         static SkString convertUtf16ToUtf8(const char16_t * utf16, int utf16Units);
178         static SkString convertUtf16ToUtf8(const std::u16string& utf16);
179         static std::u16string convertUtf8ToUtf16(const char* utf8, int utf8Units);
180         static std::u16string convertUtf8ToUtf16(const SkString& utf8);
181 
182         template <typename Appender8, typename Appender16>
extractUtfConversionMapping(SkSpan<const char> utf8,Appender8 && appender8,Appender16 && appender16)183         static bool extractUtfConversionMapping(SkSpan<const char> utf8, Appender8&& appender8, Appender16&& appender16) {
184             size_t size8 = 0;
185             size_t size16 = 0;
186             auto ptr = utf8.begin();
187             auto end = utf8.end();
188             while (ptr < end) {
189 
190                 size_t index = SkToSizeT(ptr - utf8.begin());
191                 SkUnichar u = SkUTF::NextUTF8(&ptr, end);
192 
193                 // All UTF8 code units refer to the same codepoint
194                 size_t next = SkToSizeT(ptr - utf8.begin());
195                 for (auto i = index; i < next; ++i) {
196                     //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
197                     appender16(size8);
198                     ++size16;
199                 }
200                 //SkASSERT(fUTF16IndexForUTF8Index.size() == next);
201                 SkASSERT(size16 == next);
202                 if (size16 != next) {
203                     return false;
204                 }
205 
206                 // One or two UTF16 code units refer to the same codepoint
207                 uint16_t buffer[2];
208                 size_t count = SkUTF::ToUTF16(u, buffer);
209                 //fUTF8IndexForUTF16Index.emplace_back(index);
210                 appender8(index);
211                 ++size8;
212                 if (count > 1) {
213                     //fUTF8IndexForUTF16Index.emplace_back(index);
214                     appender8(index);
215                     ++size8;
216                 }
217             }
218             //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
219             appender16(size8);
220             ++size16;
221             //fUTF8IndexForUTF16Index.emplace_back(fText.size());
222             appender8(utf8.size());
223             ++size8;
224 
225             return true;
226         }
227 
228         template <typename Callback>
forEachCodepoint(const char * utf8,int32_t utf8Units,Callback && callback)229         void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) {
230             const char* current = utf8;
231             const char* end = utf8 + utf8Units;
232             while (current < end) {
233                 auto before = current - utf8;
234                 SkUnichar unichar = SkUTF::NextUTF8(&current, end);
235                 if (unichar < 0) unichar = 0xFFFD;
236                 auto after = current - utf8;
237                 uint16_t buffer[2];
238                 size_t count = SkUTF::ToUTF16(unichar, buffer);
239                 callback(unichar, before, after, count);
240             }
241         }
242 
243         template <typename Callback>
forEachCodepoint(const char16_t * utf16,int32_t utf16Units,Callback && callback)244         void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) {
245             const char16_t* current = utf16;
246             const char16_t* end = utf16 + utf16Units;
247             while (current < end) {
248                 auto before = current - utf16;
249                 SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)&current, (const uint16_t*)end);
250                 auto after = current - utf16;
251                 callback(unichar, before, after);
252             }
253         }
254 
255         template <typename Callback>
forEachBidiRegion(const uint16_t utf16[],int utf16Units,SkBidiIterator::Direction dir,Callback && callback)256         void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) {
257             auto iter = makeBidiIterator(utf16, utf16Units, dir);
258             const uint16_t* start16 = utf16;
259             const uint16_t* end16 = utf16 + utf16Units;
260             SkBidiIterator::Level currentLevel = 0;
261 
262             SkBidiIterator::Position pos16 = 0;
263             while (pos16 <= iter->getLength()) {
264                 auto level = iter->getLevelAt(pos16);
265                 if (pos16 == 0) {
266                     currentLevel = level;
267                 } else if (level != currentLevel) {
268                     callback(pos16, start16 - utf16, currentLevel);
269                     currentLevel = level;
270                 }
271                 if (start16 == end16) {
272                     break;
273                 }
274                 SkUnichar u = SkUTF::NextUTF16(&start16, end16);
275                 pos16 += SkUTF::ToUTF16(u);
276             }
277         }
278 
279         template <typename Callback>
280 #ifdef OHOS_SUPPORT
forEachBreak(const char16_t utf16[],int utf16Units,SkUnicode::BreakType type,const char locale[],Callback && callback)281         void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type,
282             const char locale[], Callback&& callback) {
283             auto iter = makeBreakIterator(type);
284 #else
285         void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) {
286             auto iter = makeBreakIterator(type);
287 #endif
288             iter->setText(utf16, utf16Units);
289             auto pos = iter->first();
290             do {
291                 callback(pos, iter->status());
292                 pos = iter->next();
293             } while (!iter->isDone());
294         }
295 
296         virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0;
297 
298         virtual std::unique_ptr<SkUnicode> copy() = 0;
299 
300         static std::unique_ptr<SkUnicode> Make();
301 
302         static std::unique_ptr<SkUnicode> MakeIcuBasedUnicode();
303 
304         static std::unique_ptr<SkUnicode> MakeClientBasedUnicode(
305                 SkSpan<char> text,
306                 std::vector<SkUnicode::Position> words,
307                 std::vector<SkUnicode::Position> graphemeBreaks,
308                 std::vector<SkUnicode::LineBreakBefore> lineBreaks);
309 };
310 
311 namespace sknonstd {
312     template <> struct is_bitmask_enum<SkUnicode::CodeUnitFlags> : std::true_type {};
313 }  // namespace sknonstd
314 #endif // SkUnicode_DEFINED
315