1 /*
2 * Copyright 2022 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 #include "include/core/SkSpan.h"
8 #include "include/core/SkString.h"
9 #include "include/core/SkTypes.h"
10 #include "include/private/SkBitmaskEnum.h"
11 #include "include/private/SkTArray.h"
12 #include "include/private/SkTo.h"
13 #include "modules/skunicode/include/SkUnicode.h"
14 #include "modules/skunicode/src/SkUnicode_client.h"
15 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
16 #include "src/utils/SkUTF.h"
17
18 #include <algorithm>
19 #include <cstdint>
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #include <array>
25 #include <unicode/ubidi.h>
26 #include <unicode/ubrk.h>
27 #include <unicode/uchar.h>
28 #include <unicode/uloc.h>
29 #include <unicode/uscript.h>
30 #include <unicode/ustring.h>
31 #include <unicode/utext.h>
32 #include <unicode/utypes.h>
33
34
35 #ifndef SK_UNICODE_ICU_IMPLEMENTATION
36
errorName(UErrorCode status)37 const char* SkUnicode_IcuBidi::errorName(UErrorCode status) {
38 return u_errorName_skia(status);
39 }
bidi_close(UBiDi * bidi)40 void SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) {
41 ubidi_close_skia(bidi);
42 }
bidi_getDirection(const UBiDi * bidi)43 UBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) {
44 return ubidi_getDirection_skia(bidi);
45 }
bidi_getLength(const UBiDi * bidi)46 SkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) {
47 return ubidi_getLength_skia(bidi);
48 }
bidi_getLevelAt(const UBiDi * bidi,int pos)49 SkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) {
50 return ubidi_getLevelAt_skia(bidi, pos);
51 }
bidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)52 UBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) {
53 return ubidi_openSized_skia(maxLength, maxRunCount, pErrorCode);
54 }
bidi_setPara(UBiDi * bidi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * status)55 void SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi,
56 const UChar* text,
57 int32_t length,
58 UBiDiLevel paraLevel,
59 UBiDiLevel* embeddingLevels,
60 UErrorCode* status) {
61 return ubidi_setPara_skia(bidi, text, length, paraLevel, embeddingLevels, status);
62 }
bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])63 void SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],
64 int levelsCount,
65 int32_t logicalFromVisual[]) {
66 ubidi_reorderVisual_skia(runLevels, levelsCount, logicalFromVisual);
67 }
68 #endif
69
70 class SkUnicode_client : public SkUnicode {
71 public:
72 struct Data {
73 SkSpan<const char> fText8;
74 SkSpan<const char16_t> fText16;
75 std::vector<Position> fWords;
76 std::vector<SkUnicode::Position> fGraphemeBreaks;
77 std::vector<SkUnicode::LineBreakBefore> fLineBreaks;
DataSkUnicode_client::Data78 Data(SkSpan<char> text,
79 std::vector<SkUnicode::Position> words,
80 std::vector<SkUnicode::Position> graphemeBreaks,
81 std::vector<SkUnicode::LineBreakBefore> lineBreaks)
82 : fText8(text)
83 , fText16(SkSpan<const char16_t>(nullptr, 0))
84 , fWords(std::move(words))
85 , fGraphemeBreaks(std::move(graphemeBreaks))
86 , fLineBreaks(std::move(lineBreaks)) {
87 }
88
resetSkUnicode_client::Data89 void reset() {
90 fText8 = SkSpan<const char>(nullptr, 0);
91 fText16 = SkSpan<const char16_t>(nullptr, 0);
92 fGraphemeBreaks.clear();
93 fLineBreaks.clear();
94 }
95 };
96 SkUnicode_client() = delete;
SkUnicode_client(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)97 SkUnicode_client(SkSpan<char> text,
98 std::vector<SkUnicode::Position> words,
99 std::vector<SkUnicode::Position> graphemeBreaks,
100 std::vector<SkUnicode::LineBreakBefore> lineBreaks)
101 : fData(std::make_shared<Data>(text,
102 std::move(words),
103 std::move(graphemeBreaks),
104 std::move(lineBreaks))) { }
SkUnicode_client(const SkUnicode_client * origin)105 SkUnicode_client(const SkUnicode_client* origin)
106 : fData(origin->fData) {}
107
108
copy()109 std::unique_ptr<SkUnicode> copy() override {
110 return std::make_unique<SkUnicode_client>(this);
111 }
112
113 ~SkUnicode_client() override = default;
114
reset()115 void reset() { fData->reset(); }
116 // For SkShaper
117 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
118 SkBidiIterator::Direction dir) override;
119 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
120 int count,
121 SkBidiIterator::Direction dir) override;
122 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
123 BreakType breakType) override;
124 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
125 // For SkParagraph
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)126 bool getBidiRegions(const char utf8[],
127 int utf8Units,
128 TextDirection dir,
129 std::vector<BidiRegion>* results) override {
130 return SkUnicode::extractBidi(utf8, utf8Units, dir, results);
131 }
132
133 // TODO: Take if from the Client or hard code here?
isControl(SkUnichar utf8)134 static bool isControl(SkUnichar utf8) {
135 return (utf8 < ' ') || (utf8 >= 0x7f && utf8 <= 0x9f) ||
136 (utf8 >= 0x200D && utf8 <= 0x200F) ||
137 (utf8 >= 0x202A && utf8 <= 0x202E);
138 }
139
isWhitespace(SkUnichar unichar)140 static bool isWhitespace(SkUnichar unichar) {
141 static constexpr std::array<SkUnichar, 21> whitespaces {
142 0x0009, // character tabulation
143 0x000A, // line feed
144 0x000B, // line tabulation
145 0x000C, // form feed
146 0x000D, // carriage return
147 0x0020, // space
148 //0x0085, // next line
149 //0x00A0, // no-break space
150 0x1680, // ogham space mark
151 0x2000, // en quad
152 0x2001, // em quad
153 0x2002, // en space
154 0x2003, // em space
155 0x2004, // three-per-em space
156 0x2005, // four-per-em space
157 0x2006, // six-per-em space
158 //0x2007, // figure space
159 0x2008, // punctuation space
160 0x2009, // thin space
161 0x200A, // hair space
162 0x2028, // line separator
163 0x2029, // paragraph separator
164 //0x202F, // narrow no-break space
165 0x205F, // medium mathematical space
166 0x3000};// ideographic space
167 return std::find(whitespaces.begin(), whitespaces.end(), unichar) != whitespaces.end();
168 }
169
isSpace(SkUnichar unichar)170 static bool isSpace(SkUnichar unichar) {
171 static constexpr std::array<SkUnichar, 25> spaces {
172 0x0009, // character tabulation
173 0x000A, // line feed
174 0x000B, // line tabulation
175 0x000C, // form feed
176 0x000D, // carriage return
177 0x0020, // space
178 0x0085, // next line
179 0x00A0, // no-break space
180 0x1680, // ogham space mark
181 0x2000, // en quad
182 0x2001, // em quad
183 0x2002, // en space
184 0x2003, // em space
185 0x2004, // three-per-em space
186 0x2005, // four-per-em space
187 0x2006, // six-per-em space
188 0x2007, // figure space
189 0x2008, // punctuation space
190 0x2009, // thin space
191 0x200A, // hair space
192 0x2028, // line separator
193 0x2029, // paragraph separator
194 0x202F, // narrow no-break space
195 0x205F, // medium mathematical space
196 0x3000}; // ideographic space
197 return std::find(spaces.begin(), spaces.end(), unichar) != spaces.end();
198 }
199
isTabulation(SkUnichar utf8)200 static bool isTabulation(SkUnichar utf8) {
201 return utf8 == '\t';
202 }
203
isHardBreak(SkUnichar utf8)204 static bool isHardBreak(SkUnichar utf8) {
205 return utf8 == '\n';
206 }
207
isIdeographic(SkUnichar unichar)208 static bool isIdeographic(SkUnichar unichar) {
209 static constexpr std::array<std::pair<SkUnichar, SkUnichar>, 8> ranges {{
210 {4352, 4607}, // Hangul Jamo
211 {11904, 42191}, // CJK_Radicals
212 {43072, 43135}, // Phags_Pa
213 {44032, 55215}, // Hangul_Syllables
214 {63744, 64255}, // CJK_Compatibility_Ideographs
215 {65072, 65103}, // CJK_Compatibility_Forms
216 {65381, 65500}, // Katakana_Hangul_Halfwidth
217 {131072, 196607} // Supplementary_Ideographic_Plane
218 }};
219 for (auto range : ranges) {
220 if (range.first <= unichar && range.second > unichar) {
221 return true;
222 }
223 }
224 return false;
225 }
226
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)227 bool computeCodeUnitFlags(char utf8[],
228 int utf8Units,
229 bool replaceTabs,
230 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
231 results->clear();
232 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
233 for (auto& lineBreak : fData->fLineBreaks) {
234 (*results)[lineBreak.pos] |=
235 lineBreak.breakType == LineBreakType::kHardLineBreak
236 ? CodeUnitFlags::kHardLineBreakBefore
237 : CodeUnitFlags::kSoftLineBreakBefore;
238 }
239 for (auto& grapheme : fData->fGraphemeBreaks) {
240 (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
241 }
242 const char* current = utf8;
243 const char* end = utf8 + utf8Units;
244 while (current < end) {
245 auto before = current - utf8;
246 SkUnichar unichar = SkUTF::NextUTF8(¤t, end);
247 if (unichar < 0) unichar = 0xFFFD;
248 auto after = current - utf8;
249 if (replaceTabs && SkUnicode_client::isTabulation(unichar)) {
250 results->at(before) |= SkUnicode::kTabulation;
251 if (replaceTabs) {
252 unichar = ' ';
253 utf8[before] = ' ';
254 }
255 }
256 for (auto i = before; i < after; ++i) {
257 if (SkUnicode_client::isSpace(unichar)) {
258 results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
259 }
260 if (SkUnicode_client::isWhitespace(unichar)) {
261 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
262 }
263 if (SkUnicode_client::isControl(unichar)) {
264 results->at(i) |= SkUnicode::kControl;
265 }
266 if (SkUnicode_client::isIdeographic(unichar)) {
267 results->at(i) |= SkUnicode::kIdeographic;
268 }
269 }
270 }
271 return true;
272 }
273
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)274 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
275 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
276 results->clear();
277 results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
278 for (auto& lineBreak : fData->fLineBreaks) {
279 (*results)[lineBreak.pos] |=
280 lineBreak.breakType == LineBreakType::kHardLineBreak
281 ? CodeUnitFlags::kHardLineBreakBefore
282 : CodeUnitFlags::kSoftLineBreakBefore;
283 }
284 for (auto& grapheme : fData->fGraphemeBreaks) {
285 (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
286 }
287 return true;
288 }
289
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)290 bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
291 *results = fData->fWords;
292 return true;
293 }
294
toUpper(const SkString & str)295 SkString toUpper(const SkString& str) override {
296 SkASSERT(false);
297 return SkString(fData->fText8.data(), fData->fText8.size());
298 }
299
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])300 void reorderVisual(const BidiLevel runLevels[],
301 int levelsCount,
302 int32_t logicalFromVisual[]) override {
303 SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
304 }
305 private:
306 friend class SkBreakIterator_client;
307
308 std::shared_ptr<Data> fData;
309 };
310
311 class SkBreakIterator_client: public SkBreakIterator {
312 std::shared_ptr<SkUnicode_client::Data> fData;
313 Position fLastResult;
314 Position fStart;
315 Position fEnd;
316 public:
SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data)317 explicit SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data) : fData(data) { }
first()318 Position first() override
319 { return fData->fLineBreaks[fStart + (fLastResult = 0)].pos; }
current()320 Position current() override
321 { return fData->fLineBreaks[fStart + fLastResult].pos; }
next()322 Position next() override
323 { return fData->fLineBreaks[fStart + fLastResult + 1].pos; }
status()324 Status status() override {
325 return fData->fLineBreaks[fStart + fLastResult].breakType ==
326 SkUnicode::LineBreakType::kHardLineBreak
327 ? SkUnicode::CodeUnitFlags::kHardLineBreakBefore
328 : SkUnicode::CodeUnitFlags::kSoftLineBreakBefore;
329 }
isDone()330 bool isDone() override { return fStart + fLastResult == fEnd; }
setText(const char utftext8[],int utf8Units)331 bool setText(const char utftext8[], int utf8Units) override {
332 SkASSERT(utftext8 >= fData->fText8.data() &&
333 utf8Units <= SkToS16(fData->fText8.size()));
334 fStart = utftext8 - fData->fText8.data();
335 fEnd = fStart + utf8Units;
336 fLastResult = 0;
337 return true;
338 }
setText(const char16_t utftext16[],int utf16Units)339 bool setText(const char16_t utftext16[], int utf16Units) override {
340 SkASSERT(utftext16 >= fData->fText16.data() &&
341 utf16Units <= SkToS16(fData->fText16.size()));
342 fStart = utftext16 - fData->fText16.data();
343 fEnd = fStart + utf16Units;
344 fLastResult = 0;
345 return true;
346 }
347 };
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)348 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const uint16_t text[], int count,
349 SkBidiIterator::Direction dir) {
350 return SkUnicode::makeBidiIterator(text, count, dir);
351 }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)352 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const char text[],
353 int count,
354 SkBidiIterator::Direction dir) {
355 return SkUnicode::makeBidiIterator(text, count, dir);
356 }
makeBreakIterator(const char locale[],BreakType breakType)357 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(const char locale[],
358 BreakType breakType) {
359 return std::make_unique<SkBreakIterator_client>(fData);
360 }
makeBreakIterator(BreakType breakType)361 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(BreakType breakType) {
362 return std::make_unique<SkBreakIterator_client>(fData);
363 }
364
MakeClientBasedUnicode(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)365 std::unique_ptr<SkUnicode> SkUnicode::MakeClientBasedUnicode(
366 SkSpan<char> text,
367 std::vector<SkUnicode::Position> words,
368 std::vector<SkUnicode::Position> graphemeBreaks,
369 std::vector<SkUnicode::LineBreakBefore> lineBreaks) {
370 return std::make_unique<SkUnicode_client>(text, words, graphemeBreaks, lineBreaks);
371 }
372
373