1 /*
2 * Copyright 2020 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/core/SkString.h"
9 #include "include/core/SkTypes.h"
10 #include "include/private/SkBitmaskEnum.h"
11 #include "include/private/SkMutex.h"
12 #include "include/private/SkOnce.h"
13 #include "include/private/SkTArray.h"
14 #include "include/private/SkTemplates.h"
15 #include "include/private/SkTo.h"
16 #include "modules/skunicode/include/SkUnicode.h"
17 #include "modules/skunicode/src/SkUnicode_icu.h"
18 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
19 #include "src/utils/SkUTF.h"
20 #include "include/private/SkTHash.h"
21 #include <unicode/umachine.h>
22 #include <functional>
23 #include <string>
24 #include <utility>
25 #include <vector>
26
27 #if defined(SK_USING_THIRD_PARTY_ICU)
28 #include "SkLoadICU.h"
29 #endif
30
ICULib()31 static const SkICULib* ICULib() {
32 static const auto gICU = SkLoadICULib();
33
34 return gICU.get();
35 }
36
37 // sk_* wrappers for ICU funcs
38 #define SKICU_FUNC(funcname) \
39 template <typename... Args> \
40 auto sk_##funcname(Args&&... args) -> decltype(funcname(std::forward<Args>(args)...)) { \
41 return ICULib()->f_##funcname(std::forward<Args>(args)...); \
42 } \
43
44 SKICU_EMIT_FUNCS
45 #undef SKICU_FUNC
46
errorName(UErrorCode status)47 const char* SkUnicode_IcuBidi::errorName(UErrorCode status) {
48 return sk_u_errorName(status);
49 }
50
bidi_close(UBiDi * bidi)51 void SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) {
52 sk_ubidi_close(bidi);
53 }
bidi_getDirection(const UBiDi * bidi)54 UBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) {
55 return sk_ubidi_getDirection(bidi);
56 }
bidi_getLength(const UBiDi * bidi)57 SkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) {
58 return sk_ubidi_getLength(bidi);
59 }
bidi_getLevelAt(const UBiDi * bidi,int pos)60 SkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) {
61 return sk_ubidi_getLevelAt(bidi, pos);
62 }
bidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)63 UBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) {
64 return sk_ubidi_openSized(maxLength, maxRunCount, pErrorCode);
65 }
bidi_setPara(UBiDi * bidi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * status)66 void SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi,
67 const UChar* text,
68 int32_t length,
69 UBiDiLevel paraLevel,
70 UBiDiLevel* embeddingLevels,
71 UErrorCode* status) {
72 return sk_ubidi_setPara(bidi, text, length, paraLevel, embeddingLevels, status);
73 }
bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])74 void SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],
75 int levelsCount,
76 int32_t logicalFromVisual[]) {
77 sk_ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
78 }
79
sk_ubrk_clone(const UBreakIterator * bi,UErrorCode * status)80 static inline UBreakIterator* sk_ubrk_clone(const UBreakIterator* bi, UErrorCode* status) {
81 const auto* icu = ICULib();
82 SkASSERT(icu->f_ubrk_clone_ || icu->f_ubrk_safeClone_);
83 return icu->f_ubrk_clone_
84 ? icu->f_ubrk_clone_(bi, status)
85 : icu->f_ubrk_safeClone_(bi, nullptr, nullptr, status);
86 }
87
utext_close_wrapper(UText * ut)88 static UText* utext_close_wrapper(UText* ut) {
89 return sk_utext_close(ut);
90 }
ubrk_close_wrapper(UBreakIterator * bi)91 static void ubrk_close_wrapper(UBreakIterator* bi) {
92 sk_ubrk_close(bi);
93 }
94
95 using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close),
96 utext_close_wrapper>>;
97 using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close),
98 ubrk_close_wrapper>>;
99 /** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
utf8_next(const char ** ptr,const char * end)100 static inline SkUnichar utf8_next(const char** ptr, const char* end) {
101 SkUnichar val = SkUTF::NextUTF8(ptr, end);
102 return val < 0 ? 0xFFFD : val;
103 }
104
convertType(SkUnicode::BreakType type)105 static UBreakIteratorType convertType(SkUnicode::BreakType type) {
106 switch (type) {
107 case SkUnicode::BreakType::kLines: return UBRK_LINE;
108 case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER;
109 case SkUnicode::BreakType::kWords: return UBRK_WORD;
110 default:
111 return UBRK_CHARACTER;
112 }
113 }
114
115 class SkBreakIterator_icu : public SkBreakIterator {
116 ICUBreakIterator fBreakIterator;
117 Position fLastResult;
118 public:
SkBreakIterator_icu(ICUBreakIterator iter)119 explicit SkBreakIterator_icu(ICUBreakIterator iter)
120 : fBreakIterator(std::move(iter))
121 , fLastResult(0) {}
first()122 Position first() override { return fLastResult = sk_ubrk_first(fBreakIterator.get()); }
current()123 Position current() override { return fLastResult = sk_ubrk_current(fBreakIterator.get()); }
next()124 Position next() override { return fLastResult = sk_ubrk_next(fBreakIterator.get()); }
status()125 Status status() override { return sk_ubrk_getRuleStatus(fBreakIterator.get()); }
isDone()126 bool isDone() override { return fLastResult == UBRK_DONE; }
127
setText(const char utftext8[],int utf8Units)128 bool setText(const char utftext8[], int utf8Units) override {
129 UErrorCode status = U_ZERO_ERROR;
130 ICUUText text(sk_utext_openUTF8(nullptr, &utftext8[0], utf8Units, &status));
131
132 if (U_FAILURE(status)) {
133 SkDEBUGF("Break error: %s", sk_u_errorName(status));
134 return false;
135 }
136 SkASSERT(text);
137 sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
138 if (U_FAILURE(status)) {
139 SkDEBUGF("Break error: %s", sk_u_errorName(status));
140 return false;
141 }
142 fLastResult = 0;
143 return true;
144 }
setText(const char16_t utftext16[],int utf16Units)145 bool setText(const char16_t utftext16[], int utf16Units) override {
146 UErrorCode status = U_ZERO_ERROR;
147 ICUUText text(sk_utext_openUChars(nullptr, reinterpret_cast<const UChar*>(&utftext16[0]),
148 utf16Units, &status));
149
150 if (U_FAILURE(status)) {
151 SkDEBUGF("Break error: %s", sk_u_errorName(status));
152 return false;
153 }
154 SkASSERT(text);
155 sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
156 if (U_FAILURE(status)) {
157 SkDEBUGF("Break error: %s", sk_u_errorName(status));
158 return false;
159 }
160 fLastResult = 0;
161 return true;
162 }
163 };
164
165 class SkIcuBreakIteratorCache {
166 SkTHashMap<SkUnicode::BreakType, ICUBreakIterator> fBreakCache;
167 SkMutex fBreakCacheMutex;
168
169 public:
get()170 static SkIcuBreakIteratorCache& get() {
171 static SkIcuBreakIteratorCache instance;
172 return instance;
173 }
174
makeBreakIterator(SkUnicode::BreakType type)175 ICUBreakIterator makeBreakIterator(SkUnicode::BreakType type) {
176 UErrorCode status = U_ZERO_ERROR;
177 ICUBreakIterator* cachedIterator;
178 {
179 SkAutoMutexExclusive lock(fBreakCacheMutex);
180 cachedIterator = fBreakCache.find(type);
181 if (!cachedIterator) {
182 ICUBreakIterator newIterator(sk_ubrk_open(convertType(type), sk_uloc_getDefault(),
183 nullptr, 0, &status));
184 if (U_FAILURE(status)) {
185 SkDEBUGF("Break error: %s", sk_u_errorName(status));
186 } else {
187 cachedIterator = fBreakCache.set(type, std::move(newIterator));
188 }
189 }
190 }
191 ICUBreakIterator iterator;
192 if (cachedIterator) {
193 iterator.reset(sk_ubrk_clone(cachedIterator->get(), &status));
194 if (U_FAILURE(status)) {
195 SkDEBUGF("Break error: %s", sk_u_errorName(status));
196 }
197 }
198 return iterator;
199 }
200 };
201
202 class SkUnicode_icu : public SkUnicode {
203
copy()204 std::unique_ptr<SkUnicode> copy() override {
205 return std::make_unique<SkUnicode_icu>();
206 }
207
extractWords(uint16_t utf16[],int utf16Units,const char * locale,std::vector<Position> * words)208 static bool extractWords(uint16_t utf16[], int utf16Units, const char* locale, std::vector<Position>* words) {
209
210 UErrorCode status = U_ZERO_ERROR;
211
212 ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(BreakType::kWords);
213 if (!iterator) {
214 SkDEBUGF("Break error: %s", sk_u_errorName(status));
215 return false;
216 }
217 SkASSERT(iterator);
218
219 ICUUText utf16UText(sk_utext_openUChars(nullptr, (UChar*)utf16, utf16Units, &status));
220 if (U_FAILURE(status)) {
221 SkDEBUGF("Break error: %s", sk_u_errorName(status));
222 return false;
223 }
224
225 sk_ubrk_setUText(iterator.get(), utf16UText.get(), &status);
226 if (U_FAILURE(status)) {
227 SkDEBUGF("Break error: %s", sk_u_errorName(status));
228 return false;
229 }
230
231 // Get the words
232 int32_t pos = sk_ubrk_first(iterator.get());
233 while (pos != UBRK_DONE) {
234 words->emplace_back(pos);
235 pos = sk_ubrk_next(iterator.get());
236 }
237
238 return true;
239 }
240
extractPositions(const char utf8[],int utf8Units,BreakType type,std::function<void (int,int)> setBreak)241 static bool extractPositions
242 (const char utf8[], int utf8Units, BreakType type, std::function<void(int, int)> setBreak) {
243
244 UErrorCode status = U_ZERO_ERROR;
245 ICUUText text(sk_utext_openUTF8(nullptr, &utf8[0], utf8Units, &status));
246
247 if (U_FAILURE(status)) {
248 SkDEBUGF("Break error: %s", sk_u_errorName(status));
249 return false;
250 }
251 SkASSERT(text);
252
253 ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type);
254 if (!iterator) {
255 return false;
256 }
257
258 sk_ubrk_setUText(iterator.get(), text.get(), &status);
259 if (U_FAILURE(status)) {
260 SkDEBUGF("Break error: %s", sk_u_errorName(status));
261 return false;
262 }
263
264 auto iter = iterator.get();
265 int32_t pos = sk_ubrk_first(iter);
266 while (pos != UBRK_DONE) {
267 int s = type == SkUnicode::BreakType::kLines
268 ? UBRK_LINE_SOFT
269 : sk_ubrk_getRuleStatus(iter);
270 setBreak(pos, s);
271 pos = sk_ubrk_next(iter);
272 }
273
274 if (type == SkUnicode::BreakType::kLines) {
275 // This is a workaround for https://bugs.chromium.org/p/skia/issues/detail?id=10715
276 // (ICU line break iterator does not work correctly on Thai text with new lines)
277 // So, we only use the iterator to collect soft line breaks and
278 // scan the text for all hard line breaks ourselves
279 const char* end = utf8 + utf8Units;
280 const char* ch = utf8;
281 while (ch < end) {
282 auto unichar = utf8_next(&ch, end);
283 if (isHardLineBreak(unichar)) {
284 setBreak(ch - utf8, UBRK_LINE_HARD);
285 }
286 }
287 }
288 return true;
289 }
290
isControl(SkUnichar utf8)291 static bool isControl(SkUnichar utf8) {
292 return sk_u_iscntrl(utf8);
293 }
294
isWhitespace(SkUnichar utf8)295 static bool isWhitespace(SkUnichar utf8) {
296 return sk_u_isWhitespace(utf8);
297 }
298
isSpace(SkUnichar utf8)299 static bool isSpace(SkUnichar utf8) {
300 return sk_u_isspace(utf8);
301 }
302
isTabulation(SkUnichar utf8)303 static bool isTabulation(SkUnichar utf8) {
304 return utf8 == '\t';
305 }
306
isHardBreak(SkUnichar utf8)307 static bool isHardBreak(SkUnichar utf8) {
308 auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
309 return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
310 }
311
isIdeographic(SkUnichar unichar)312 static bool isIdeographic(SkUnichar unichar) {
313 return sk_u_hasBinaryProperty(unichar, UCHAR_IDEOGRAPHIC);
314 }
315 #ifdef OHOS_SUPPORT
isPunctuation(SkUnichar unichar)316 static bool isPunctuation(SkUnichar unichar)
317 {
318 if (sk_u_ispunct(unichar)) {
319 return true;
320 }
321 static constexpr std::array<std::pair<SkUnichar, SkUnichar>, 13> ranges{{
322 {0x0021, 0x002F}, // ASCII punctuation (e.g., ! " # $ % & ' ( ) * + , - . /)
323 {0x003A, 0x0040}, // ASCII punctuation (e.g., : ; < = > ? @)
324 {0x005B, 0x0060}, // ASCII punctuation (e.g., [ \ ] ^ _ `)
325 {0x007B, 0x007E}, // ASCII punctuation (e.g., { | } ~)
326 {0x2000, 0x206F}, // Common punctuation (Chinese & English)
327 {0xFF00, 0xFFEF}, // Full-width characters and symbols
328 {0x2E00, 0x2E7F}, // Supplemental punctuation (e.g., ancient)
329 {0x3001, 0x3003}, // CJK punctuation (e.g., Chinese comma)
330 {0xFF01, 0xFF0F}, // Full-width ASCII punctuation (0x21-0x2F)
331 {0xFF1A, 0xFF20}, // Full-width ASCII punctuation (0x3A-0x40)
332 {0xFF3B, 0xFF40}, // Full-width ASCII punctuation (0x5B-0x60)
333 {0xFF5B, 0xFF65}, // Other full-width punctuation (e.g., quotes)
334 }};
335 for (auto range : ranges) {
336 if (range.first <= unichar && unichar <= range.second) {
337 return true;
338 }
339 }
340 return false;
341 }
isEllipsis(SkUnichar unichar)342 static bool isEllipsis(SkUnichar unichar) { return (unichar == 0x2026 || unichar == 0x002E); }
isGraphemeExtend(SkUnichar unichar)343 static bool isGraphemeExtend(SkUnichar unichar) {
344 return sk_u_hasBinaryProperty(unichar, UCHAR_GRAPHEME_EXTEND);
345 }
346 #endif
347
348 public:
~SkUnicode_icu()349 ~SkUnicode_icu() override { }
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)350 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
351 SkBidiIterator::Direction dir) override {
352 return SkUnicode::makeBidiIterator(text, count, dir);
353 }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)354 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
355 int count,
356 SkBidiIterator::Direction dir) override {
357 return SkUnicode::makeBidiIterator(text, count, dir);
358 }
makeBreakIterator(const char locale[],BreakType breakType)359 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
360 BreakType breakType) override {
361 UErrorCode status = U_ZERO_ERROR;
362 ICUBreakIterator iterator(sk_ubrk_open(convertType(breakType), locale, nullptr, 0,
363 &status));
364 if (U_FAILURE(status)) {
365 SkDEBUGF("Break error: %s", sk_u_errorName(status));
366 return nullptr;
367 }
368 return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
369 }
makeBreakIterator(BreakType breakType)370 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override {
371 return makeBreakIterator(sk_uloc_getDefault(), breakType);
372 }
373
isHardLineBreak(SkUnichar utf8)374 static bool isHardLineBreak(SkUnichar utf8) {
375 auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
376 return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
377 }
378
toUpper(const SkString & str)379 SkString toUpper(const SkString& str) override {
380 // Convert to UTF16 since that's what ICU wants.
381 auto str16 = SkUnicode::convertUtf8ToUtf16(str.c_str(), str.size());
382
383 UErrorCode icu_err = U_ZERO_ERROR;
384 const auto upper16len = sk_u_strToUpper(nullptr, 0, (UChar*)(str16.c_str()), str16.size(),
385 nullptr, &icu_err);
386 if (icu_err != U_BUFFER_OVERFLOW_ERROR || upper16len <= 0) {
387 return SkString();
388 }
389
390 SkAutoSTArray<128, uint16_t> upper16(upper16len);
391 icu_err = U_ZERO_ERROR;
392 sk_u_strToUpper((UChar*)(upper16.get()), SkToS32(upper16.size()),
393 (UChar*)(str16.c_str()), str16.size(),
394 nullptr, &icu_err);
395 SkASSERT(!U_FAILURE(icu_err));
396
397 // ... and back to utf8 'cause that's what we want.
398 return convertUtf16ToUtf8((char16_t*)upper16.get(), upper16.size());
399 }
400
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)401 bool getBidiRegions(const char utf8[],
402 int utf8Units,
403 TextDirection dir,
404 std::vector<BidiRegion>* results) override {
405 return SkUnicode::extractBidi(utf8, utf8Units, dir, results);
406 }
407
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)408 bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
409
410 // Convert to UTF16 since we want the results in utf16
411 auto utf16 = convertUtf8ToUtf16(utf8, utf8Units);
412 return SkUnicode_icu::extractWords((uint16_t*)utf16.c_str(), utf16.size(), locale, results);
413 }
414
415 #ifdef OHOS_SUPPORT
processPunctuationAndEllipsis(SkTArray<SkUnicode::CodeUnitFlags,true> * results,int i,SkUnichar unichar)416 void processPunctuationAndEllipsis(SkTArray<SkUnicode::CodeUnitFlags, true>* results, int i, SkUnichar unichar)
417 {
418 if (SkUnicode_icu::isPunctuation(unichar)) {
419 results->at(i) |= SkUnicode::kPunctuation;
420 }
421 if (SkUnicode_icu::isEllipsis(unichar)) {
422 results->at(i) |= SkUnicode::kEllipsis;
423 }
424 }
425 #endif
426
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)427 bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs,
428 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
429 results->reset();
430 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
431
432 SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kLines, [&](int pos, int status) {
433 (*results)[pos] |= status == UBRK_LINE_HARD
434 ? CodeUnitFlags::kHardLineBreakBefore
435 : CodeUnitFlags::kSoftLineBreakBefore;
436 });
437
438 SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kGraphemes, [&](int pos, int status) {
439 (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
440 });
441
442 const char* current = utf8;
443 const char* end = utf8 + utf8Units;
444 while (current < end) {
445 auto before = current - utf8;
446 SkUnichar unichar = SkUTF::NextUTF8(¤t, end);
447 if (unichar < 0) unichar = 0xFFFD;
448 auto after = current - utf8;
449 if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) {
450 results->at(before) |= SkUnicode::kTabulation;
451 if (replaceTabs) {
452 unichar = ' ';
453 utf8[before] = ' ';
454 }
455 }
456 for (auto i = before; i < after; ++i) {
457 if (SkUnicode_icu::isSpace(unichar)) {
458 results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
459 }
460 if (SkUnicode_icu::isWhitespace(unichar)) {
461 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
462 }
463 if (SkUnicode_icu::isControl(unichar)) {
464 results->at(i) |= SkUnicode::kControl;
465 }
466 if (SkUnicode_icu::isIdeographic(unichar)) {
467 results->at(i) |= SkUnicode::kIdeographic;
468 }
469 #ifdef OHOS_SUPPORT
470 processPunctuationAndEllipsis(results, i, unichar);
471 #endif
472 }
473
474 #ifdef OHOS_SUPPORT
475 if (SkUnicode_icu::isGraphemeExtend(unichar)) {
476 // Current unichar is a combining one.
477 results->at(before) |= SkUnicode::kCombine;
478 }
479 #endif
480 }
481
482 return true;
483 }
484
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)485 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
486 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
487 results->reset();
488 results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
489
490 // Get white spaces
491 this->forEachCodepoint((char16_t*)&utf16[0], utf16Units,
492 [results, replaceTabs, &utf16](SkUnichar unichar, int32_t start, int32_t end) {
493 for (auto i = start; i < end; ++i) {
494 if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) {
495 results->at(i) |= SkUnicode::kTabulation;
496 if (replaceTabs) {
497 unichar = ' ';
498 utf16[start] = ' ';
499 }
500 }
501 if (SkUnicode_icu::isSpace(unichar)) {
502 results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
503 }
504 if (SkUnicode_icu::isWhitespace(unichar)) {
505 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
506 }
507 if (SkUnicode_icu::isControl(unichar)) {
508 results->at(i) |= SkUnicode::kControl;
509 }
510 }
511 });
512 // Get graphemes
513 this->forEachBreak((char16_t*)&utf16[0],
514 utf16Units,
515 SkUnicode::BreakType::kGraphemes,
516 [results](SkBreakIterator::Position pos, SkBreakIterator::Status) {
517 (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
518 });
519 // Get line breaks
520 this->forEachBreak(
521 (char16_t*)&utf16[0],
522 utf16Units,
523 SkUnicode::BreakType::kLines,
524 [results](SkBreakIterator::Position pos, SkBreakIterator::Status status) {
525 if (status ==
526 (SkBreakIterator::Status)SkUnicode::LineBreakType::kHardLineBreak) {
527 // Hard line breaks clears off all the other flags
528 // TODO: Treat \n as a formatting mark and do not pass it to SkShaper
529 (*results)[pos-1] = CodeUnitFlags::kHardLineBreakBefore;
530 } else {
531 (*results)[pos] |= CodeUnitFlags::kSoftLineBreakBefore;
532 }
533 });
534
535 return true;
536 }
537
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])538 void reorderVisual(const BidiLevel runLevels[],
539 int levelsCount,
540 int32_t logicalFromVisual[]) override {
541 SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
542 }
543 };
544
MakeIcuBasedUnicode()545 std::unique_ptr<SkUnicode> SkUnicode::MakeIcuBasedUnicode() {
546 #if defined(SK_USING_THIRD_PARTY_ICU)
547 if (!SkLoadICU()) {
548 static SkOnce once;
549 once([] { SkDEBUGF("SkLoadICU() failed!\n"); });
550 return nullptr;
551 }
552 #endif
553
554 return ICULib()
555 ? std::make_unique<SkUnicode_icu>()
556 : nullptr;
557 }
558