1 /*
2 * Copyright 2020 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/core/SkString.h"
9 #include "include/core/SkTypes.h"
10 #include "include/private/SkBitmaskEnum.h"
11 #include "include/private/SkMutex.h"
12 #include "include/private/SkOnce.h"
13 #include "include/private/SkTArray.h"
14 #include "include/private/SkTemplates.h"
15 #include "include/private/SkTo.h"
16 #include "modules/skunicode/include/SkUnicode.h"
17 #include "modules/skunicode/src/SkUnicode_icu.h"
18 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
19 #include "src/utils/SkUTF.h"
20 #include "include/private/SkTHash.h"
21 #include <unicode/umachine.h>
22 #include <functional>
23 #include <string>
24 #include <utility>
25 #include <vector>
26
27 #ifdef OHOS_SUPPORT
28 #include <unordered_set>
29 #endif
30
31 #if defined(SK_USING_THIRD_PARTY_ICU)
32 #include "SkLoadICU.h"
33 #endif
34
ICULib()35 static const SkICULib* ICULib() {
36 static const auto gICU = SkLoadICULib();
37
38 return gICU.get();
39 }
40
41 // sk_* wrappers for ICU funcs
42 #define SKICU_FUNC(funcname) \
43 template <typename... Args> \
44 auto sk_##funcname(Args&&... args) -> decltype(funcname(std::forward<Args>(args)...)) { \
45 return ICULib()->f_##funcname(std::forward<Args>(args)...); \
46 } \
47
48 SKICU_EMIT_FUNCS
49 #undef SKICU_FUNC
50
errorName(UErrorCode status)51 const char* SkUnicode_IcuBidi::errorName(UErrorCode status) {
52 return sk_u_errorName(status);
53 }
54
bidi_close(UBiDi * bidi)55 void SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) {
56 sk_ubidi_close(bidi);
57 }
bidi_getDirection(const UBiDi * bidi)58 UBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) {
59 return sk_ubidi_getDirection(bidi);
60 }
bidi_getLength(const UBiDi * bidi)61 SkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) {
62 return sk_ubidi_getLength(bidi);
63 }
bidi_getLevelAt(const UBiDi * bidi,int pos)64 SkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) {
65 return sk_ubidi_getLevelAt(bidi, pos);
66 }
bidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)67 UBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) {
68 return sk_ubidi_openSized(maxLength, maxRunCount, pErrorCode);
69 }
bidi_setPara(UBiDi * bidi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * status)70 void SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi,
71 const UChar* text,
72 int32_t length,
73 UBiDiLevel paraLevel,
74 UBiDiLevel* embeddingLevels,
75 UErrorCode* status) {
76 return sk_ubidi_setPara(bidi, text, length, paraLevel, embeddingLevels, status);
77 }
bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])78 void SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],
79 int levelsCount,
80 int32_t logicalFromVisual[]) {
81 sk_ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
82 }
83
sk_ubrk_clone(const UBreakIterator * bi,UErrorCode * status)84 static inline UBreakIterator* sk_ubrk_clone(const UBreakIterator* bi, UErrorCode* status) {
85 const auto* icu = ICULib();
86 SkASSERT(icu->f_ubrk_clone_ || icu->f_ubrk_safeClone_);
87 return icu->f_ubrk_clone_
88 ? icu->f_ubrk_clone_(bi, status)
89 : icu->f_ubrk_safeClone_(bi, nullptr, nullptr, status);
90 }
91
utext_close_wrapper(UText * ut)92 static UText* utext_close_wrapper(UText* ut) {
93 return sk_utext_close(ut);
94 }
ubrk_close_wrapper(UBreakIterator * bi)95 static void ubrk_close_wrapper(UBreakIterator* bi) {
96 sk_ubrk_close(bi);
97 }
98
99 using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close),
100 utext_close_wrapper>>;
101 using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close),
102 ubrk_close_wrapper>>;
103 /** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
utf8_next(const char ** ptr,const char * end)104 static inline SkUnichar utf8_next(const char** ptr, const char* end) {
105 SkUnichar val = SkUTF::NextUTF8(ptr, end);
106 return val < 0 ? 0xFFFD : val;
107 }
108
convertType(SkUnicode::BreakType type)109 static UBreakIteratorType convertType(SkUnicode::BreakType type) {
110 switch (type) {
111 case SkUnicode::BreakType::kLines: return UBRK_LINE;
112 case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER;
113 case SkUnicode::BreakType::kWords: return UBRK_WORD;
114 default:
115 return UBRK_CHARACTER;
116 }
117 }
118
119 class SkBreakIterator_icu : public SkBreakIterator {
120 ICUBreakIterator fBreakIterator;
121 Position fLastResult;
122 public:
SkBreakIterator_icu(ICUBreakIterator iter)123 explicit SkBreakIterator_icu(ICUBreakIterator iter)
124 : fBreakIterator(std::move(iter))
125 , fLastResult(0) {}
first()126 Position first() override { return fLastResult = sk_ubrk_first(fBreakIterator.get()); }
current()127 Position current() override { return fLastResult = sk_ubrk_current(fBreakIterator.get()); }
next()128 Position next() override { return fLastResult = sk_ubrk_next(fBreakIterator.get()); }
status()129 Status status() override { return sk_ubrk_getRuleStatus(fBreakIterator.get()); }
isDone()130 bool isDone() override { return fLastResult == UBRK_DONE; }
131
setText(const char utftext8[],int utf8Units)132 bool setText(const char utftext8[], int utf8Units) override {
133 UErrorCode status = U_ZERO_ERROR;
134 ICUUText text(sk_utext_openUTF8(nullptr, &utftext8[0], utf8Units, &status));
135
136 if (U_FAILURE(status)) {
137 SkDEBUGF("Break error: %s", sk_u_errorName(status));
138 return false;
139 }
140 SkASSERT(text);
141 sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
142 if (U_FAILURE(status)) {
143 SkDEBUGF("Break error: %s", sk_u_errorName(status));
144 return false;
145 }
146 fLastResult = 0;
147 return true;
148 }
setText(const char16_t utftext16[],int utf16Units)149 bool setText(const char16_t utftext16[], int utf16Units) override {
150 UErrorCode status = U_ZERO_ERROR;
151 ICUUText text(sk_utext_openUChars(nullptr, reinterpret_cast<const UChar*>(&utftext16[0]),
152 utf16Units, &status));
153
154 if (U_FAILURE(status)) {
155 SkDEBUGF("Break error: %s", sk_u_errorName(status));
156 return false;
157 }
158 SkASSERT(text);
159 sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
160 if (U_FAILURE(status)) {
161 SkDEBUGF("Break error: %s", sk_u_errorName(status));
162 return false;
163 }
164 fLastResult = 0;
165 return true;
166 }
167 };
168
169 class SkIcuBreakIteratorCache {
170 SkTHashMap<SkUnicode::BreakType, ICUBreakIterator> fBreakCache;
171 SkMutex fBreakCacheMutex;
172
173 public:
get()174 static SkIcuBreakIteratorCache& get() {
175 static SkIcuBreakIteratorCache instance;
176 return instance;
177 }
178
179 #ifdef OHOS_SUPPORT
makeBreakIterator(const char locale[],SkUnicode::BreakType type)180 ICUBreakIterator makeBreakIterator(const char locale[], SkUnicode::BreakType type) {
181 UErrorCode status = U_ZERO_ERROR;
182 ICUBreakIterator* cachedIterator;
183 {
184 SkAutoMutexExclusive lock(fBreakCacheMutex);
185 cachedIterator = fBreakCache.find(type);
186 if (!cachedIterator) {
187 ICUBreakIterator newIterator(sk_ubrk_open(convertType(type), locale, nullptr, 0, &status));
188 if (U_FAILURE(status)) {
189 SkDEBUGF("Break error: %s", sk_u_errorName(status));
190 } else {
191 cachedIterator = fBreakCache.set(type, std::move(newIterator));
192 }
193 }
194 }
195 ICUBreakIterator iterator;
196 if (cachedIterator) {
197 iterator.reset(sk_ubrk_clone(cachedIterator->get(), &status));
198 if (U_FAILURE(status)) {
199 SkDEBUGF("Break error: %s", sk_u_errorName(status));
200 }
201 }
202 return iterator;
203 }
204 #else
makeBreakIterator(SkUnicode::BreakType type)205 ICUBreakIterator makeBreakIterator(SkUnicode::BreakType type) {
206 UErrorCode status = U_ZERO_ERROR;
207 ICUBreakIterator* cachedIterator;
208 {
209 SkAutoMutexExclusive lock(fBreakCacheMutex);
210 cachedIterator = fBreakCache.find(type);
211 if (!cachedIterator) {
212 ICUBreakIterator newIterator(sk_ubrk_open(convertType(type), sk_uloc_getDefault(),
213 nullptr, 0, &status));
214 if (U_FAILURE(status)) {
215 SkDEBUGF("Break error: %s", sk_u_errorName(status));
216 } else {
217 cachedIterator = fBreakCache.set(type, std::move(newIterator));
218 }
219 }
220 }
221 ICUBreakIterator iterator;
222 if (cachedIterator) {
223 iterator.reset(sk_ubrk_clone(cachedIterator->get(), &status));
224 if (U_FAILURE(status)) {
225 SkDEBUGF("Break error: %s", sk_u_errorName(status));
226 }
227 }
228 return iterator;
229 }
230 #endif
231 };
232
233 class SkUnicode_icu : public SkUnicode {
234
copy()235 std::unique_ptr<SkUnicode> copy() override {
236 return std::make_unique<SkUnicode_icu>();
237 }
238
extractWords(uint16_t utf16[],int utf16Units,const char * locale,std::vector<Position> * words)239 static bool extractWords(uint16_t utf16[], int utf16Units, const char* locale, std::vector<Position>* words) {
240
241 UErrorCode status = U_ZERO_ERROR;
242
243 #ifdef OHOS_SUPPORT
244 ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(locale, BreakType::kWords);
245 #else
246 ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(BreakType::kWords);
247 #endif
248 if (!iterator) {
249 SkDEBUGF("Break error: %s", sk_u_errorName(status));
250 return false;
251 }
252 SkASSERT(iterator);
253
254 ICUUText utf16UText(sk_utext_openUChars(nullptr, (UChar*)utf16, utf16Units, &status));
255 if (U_FAILURE(status)) {
256 SkDEBUGF("Break error: %s", sk_u_errorName(status));
257 return false;
258 }
259
260 sk_ubrk_setUText(iterator.get(), utf16UText.get(), &status);
261 if (U_FAILURE(status)) {
262 SkDEBUGF("Break error: %s", sk_u_errorName(status));
263 return false;
264 }
265
266 // Get the words
267 int32_t pos = sk_ubrk_first(iterator.get());
268 while (pos != UBRK_DONE) {
269 words->emplace_back(pos);
270 pos = sk_ubrk_next(iterator.get());
271 }
272
273 return true;
274 }
275
extractPositions(const char utf8[],int utf8Units,BreakType type,const char locale[],std::function<void (int,int)> setBreak)276 static bool extractPositions
277 #ifdef OHOS_SUPPORT
278 (const char utf8[], int utf8Units, BreakType type, const char locale[],
279 std::function<void(int, int)> setBreak) {
280 #else
281 (const char utf8[], int utf8Units, BreakType type, std::function<void(int, int)> setBreak) {
282 #endif
283
284 UErrorCode status = U_ZERO_ERROR;
285 ICUUText text(sk_utext_openUTF8(nullptr, &utf8[0], utf8Units, &status));
286
287 if (U_FAILURE(status)) {
288 SkDEBUGF("Break error: %s", sk_u_errorName(status));
289 return false;
290 }
291 SkASSERT(text);
292
293 #ifdef OHOS_SUPPORT
294 ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(locale, type);
295 #else
296 ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type);
297 #endif
298 if (!iterator) {
299 return false;
300 }
301
302 sk_ubrk_setUText(iterator.get(), text.get(), &status);
303 if (U_FAILURE(status)) {
304 SkDEBUGF("Break error: %s", sk_u_errorName(status));
305 return false;
306 }
307
308 auto iter = iterator.get();
309 int32_t pos = sk_ubrk_first(iter);
310 while (pos != UBRK_DONE) {
311 int s = type == SkUnicode::BreakType::kLines
312 ? UBRK_LINE_SOFT
313 : sk_ubrk_getRuleStatus(iter);
314 setBreak(pos, s);
315 pos = sk_ubrk_next(iter);
316 }
317
318 if (type == SkUnicode::BreakType::kLines) {
319 // This is a workaround for https://bugs.chromium.org/p/skia/issues/detail?id=10715
320 // (ICU line break iterator does not work correctly on Thai text with new lines)
321 // So, we only use the iterator to collect soft line breaks and
322 // scan the text for all hard line breaks ourselves
323 const char* end = utf8 + utf8Units;
324 const char* ch = utf8;
325 while (ch < end) {
326 auto unichar = utf8_next(&ch, end);
327 if (isHardLineBreak(unichar)) {
328 setBreak(ch - utf8, UBRK_LINE_HARD);
329 }
330 }
331 }
332 return true;
333 }
334
335 static bool isControl(SkUnichar utf8) {
336 return sk_u_iscntrl(utf8);
337 }
338
339 static bool isWhitespace(SkUnichar utf8) {
340 return sk_u_isWhitespace(utf8);
341 }
342
343 static bool isSpace(SkUnichar utf8) {
344 return sk_u_isspace(utf8);
345 }
346
347 static bool isTabulation(SkUnichar utf8) {
348 return utf8 == '\t';
349 }
350
351 static bool isHardBreak(SkUnichar utf8) {
352 auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
353 return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
354 }
355
356 static bool isIdeographic(SkUnichar unichar) {
357 return sk_u_hasBinaryProperty(unichar, UCHAR_IDEOGRAPHIC);
358 }
359 #ifdef OHOS_SUPPORT
360 static bool isPunctuation(SkUnichar unichar)
361 {
362 if (sk_u_ispunct(unichar)) {
363 return true;
364 }
365 static constexpr std::array<std::pair<SkUnichar, SkUnichar>, 13> ranges{{
366 {0x0021, 0x002F}, // ASCII punctuation (e.g., ! " # $ % & ' ( ) * + , - . /)
367 {0x003A, 0x0040}, // ASCII punctuation (e.g., : ; < = > ? @)
368 {0x005B, 0x0060}, // ASCII punctuation (e.g., [ \ ] ^ _ `)
369 {0x007B, 0x007E}, // ASCII punctuation (e.g., { | } ~)
370 {0x2000, 0x206F}, // Common punctuation (Chinese & English)
371 {0xFF00, 0xFFEF}, // Full-width characters and symbols
372 {0x2E00, 0x2E7F}, // Supplemental punctuation (e.g., ancient)
373 {0x3001, 0x3003}, // CJK punctuation (e.g., Chinese comma)
374 {0xFF01, 0xFF0F}, // Full-width ASCII punctuation (0x21-0x2F)
375 {0xFF1A, 0xFF20}, // Full-width ASCII punctuation (0x3A-0x40)
376 {0xFF3B, 0xFF40}, // Full-width ASCII punctuation (0x5B-0x60)
377 {0xFF5B, 0xFF65}, // Other full-width punctuation (e.g., quotes)
378 }};
379 for (auto range : ranges) {
380 if (range.first <= unichar && unichar <= range.second) {
381 return true;
382 }
383 }
384 return false;
385 }
386 static bool isEllipsis(SkUnichar unichar) { return (unichar == 0x2026 || unichar == 0x002E); }
387 static bool isGraphemeExtend(SkUnichar unichar) {
388 return sk_u_hasBinaryProperty(unichar, UCHAR_GRAPHEME_EXTEND);
389 }
390 static bool isCustomSoftBreak(SkUnichar unichar) {
391 // ‘ “ ( [ { < « — – • – – $ £ € + = × \ % ° # * @ _ § © ®
392 static const std::unordered_set<SkUnichar> kBreakTriggerCodePoints {
393 0x2018, 0x201C, 0x0028, 0x005B, 0x007B, 0x003C, 0x00AB, 0x2014, 0x2013,
394 0x2022, 0x0024, 0x00A3, 0x20AC, 0x002B, 0x003D, 0x00D7, 0x005C, 0x0025,
395 0x00B0, 0x0023, 0x002A, 0x0040, 0x005F, 0x00A7, 0x00A9, 0x00AE
396 };
397
398 return kBreakTriggerCodePoints.count(unichar) > 0;
399 }
400 #endif
401
402 public:
403 ~SkUnicode_icu() override { }
404 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
405 SkBidiIterator::Direction dir) override {
406 return SkUnicode::makeBidiIterator(text, count, dir);
407 }
408 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
409 int count,
410 SkBidiIterator::Direction dir) override {
411 return SkUnicode::makeBidiIterator(text, count, dir);
412 }
413 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
414 BreakType breakType) override {
415 UErrorCode status = U_ZERO_ERROR;
416 ICUBreakIterator iterator(sk_ubrk_open(convertType(breakType), locale, nullptr, 0,
417 &status));
418 if (U_FAILURE(status)) {
419 SkDEBUGF("Break error: %s", sk_u_errorName(status));
420 return nullptr;
421 }
422 return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
423 }
424 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override {
425 return makeBreakIterator(sk_uloc_getDefault(), breakType);
426 }
427
428 static bool isHardLineBreak(SkUnichar utf8) {
429 auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
430 return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
431 }
432
433 SkString toUpper(const SkString& str) override {
434 // Convert to UTF16 since that's what ICU wants.
435 auto str16 = SkUnicode::convertUtf8ToUtf16(str.c_str(), str.size());
436
437 UErrorCode icu_err = U_ZERO_ERROR;
438 const auto upper16len = sk_u_strToUpper(nullptr, 0, (UChar*)(str16.c_str()), str16.size(),
439 nullptr, &icu_err);
440 if (icu_err != U_BUFFER_OVERFLOW_ERROR || upper16len <= 0) {
441 return SkString();
442 }
443
444 SkAutoSTArray<128, uint16_t> upper16(upper16len);
445 icu_err = U_ZERO_ERROR;
446 sk_u_strToUpper((UChar*)(upper16.get()), SkToS32(upper16.size()),
447 (UChar*)(str16.c_str()), str16.size(),
448 nullptr, &icu_err);
449 SkASSERT(!U_FAILURE(icu_err));
450
451 // ... and back to utf8 'cause that's what we want.
452 return convertUtf16ToUtf8((char16_t*)upper16.get(), upper16.size());
453 }
454
455 bool getBidiRegions(const char utf8[],
456 int utf8Units,
457 TextDirection dir,
458 std::vector<BidiRegion>* results) override {
459 return SkUnicode::extractBidi(utf8, utf8Units, dir, results);
460 }
461
462 bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
463
464 // Convert to UTF16 since we want the results in utf16
465 auto utf16 = convertUtf8ToUtf16(utf8, utf8Units);
466 return SkUnicode_icu::extractWords((uint16_t*)utf16.c_str(), utf16.size(), locale, results);
467 }
468
469 #ifdef OHOS_SUPPORT
470 void processPunctuationAndEllipsis(SkTArray<SkUnicode::CodeUnitFlags, true>* results, int i, SkUnichar unichar)
471 {
472 if (SkUnicode_icu::isPunctuation(unichar)) {
473 results->at(i) |= SkUnicode::kPunctuation;
474 }
475 if (SkUnicode_icu::isEllipsis(unichar)) {
476 results->at(i) |= SkUnicode::kEllipsis;
477 }
478 if (SkUnicode_icu::isCustomSoftBreak(unichar)) {
479 results->at(i) |= SkUnicode::kSoftLineBreakBefore;
480 }
481 }
482 #endif
483
484 #ifdef OHOS_SUPPORT
485 bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs, const char locale[],
486 #else
487 bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs,
488 #endif
489 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
490 results->reset();
491 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
492
493 #ifdef OHOS_SUPPORT
494 SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kLines, locale, [&](int pos, int status) {
495 #else
496 SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kLines, [&](int pos, int status) {
497 #endif
498 (*results)[pos] |= status == UBRK_LINE_HARD
499 ? CodeUnitFlags::kHardLineBreakBefore
500 : CodeUnitFlags::kSoftLineBreakBefore;
501 });
502
503 #ifdef OHOS_SUPPORT
504 SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kGraphemes, locale, [&](int pos, int status) {
505 #else
506 SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kGraphemes, [&](int pos, int status) {
507 #endif
508 (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
509 });
510
511 const char* current = utf8;
512 const char* end = utf8 + utf8Units;
513 while (current < end) {
514 auto before = current - utf8;
515 SkUnichar unichar = SkUTF::NextUTF8(¤t, end);
516 if (unichar < 0) unichar = 0xFFFD;
517 auto after = current - utf8;
518 if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) {
519 results->at(before) |= SkUnicode::kTabulation;
520 if (replaceTabs) {
521 unichar = ' ';
522 utf8[before] = ' ';
523 }
524 }
525 for (auto i = before; i < after; ++i) {
526 if (SkUnicode_icu::isSpace(unichar)) {
527 results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
528 }
529 if (SkUnicode_icu::isWhitespace(unichar)) {
530 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
531 }
532 if (SkUnicode_icu::isControl(unichar)) {
533 results->at(i) |= SkUnicode::kControl;
534 }
535 if (SkUnicode_icu::isIdeographic(unichar)) {
536 results->at(i) |= SkUnicode::kIdeographic;
537 }
538 #ifdef OHOS_SUPPORT
539 processPunctuationAndEllipsis(results, i, unichar);
540 #endif
541 }
542
543 #ifdef OHOS_SUPPORT
544 if (SkUnicode_icu::isGraphemeExtend(unichar)) {
545 // Current unichar is a combining one.
546 results->at(before) |= SkUnicode::kCombine;
547 }
548 #endif
549 }
550
551 return true;
552 }
553
554 #ifdef OHOS_SUPPORT
555 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs, const char locale[],
556 #else
557 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
558 #endif
559 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
560 results->reset();
561 results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
562
563 // Get white spaces
564 this->forEachCodepoint((char16_t*)&utf16[0], utf16Units,
565 [results, replaceTabs, &utf16](SkUnichar unichar, int32_t start, int32_t end) {
566 for (auto i = start; i < end; ++i) {
567 if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) {
568 results->at(i) |= SkUnicode::kTabulation;
569 if (replaceTabs) {
570 unichar = ' ';
571 utf16[start] = ' ';
572 }
573 }
574 if (SkUnicode_icu::isSpace(unichar)) {
575 results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
576 }
577 if (SkUnicode_icu::isWhitespace(unichar)) {
578 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
579 }
580 if (SkUnicode_icu::isControl(unichar)) {
581 results->at(i) |= SkUnicode::kControl;
582 }
583 }
584 });
585 // Get graphemes
586 this->forEachBreak((char16_t*)&utf16[0],
587 utf16Units,
588 SkUnicode::BreakType::kGraphemes,
589 #ifdef OHOS_SUPPORT
590 locale,
591 #endif
592 [results](SkBreakIterator::Position pos, SkBreakIterator::Status) {
593 (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
594 });
595 // Get line breaks
596 this->forEachBreak(
597 (char16_t*)&utf16[0],
598 utf16Units,
599 SkUnicode::BreakType::kLines,
600 #ifdef OHOS_SUPPORT
601 locale,
602 #endif
603 [results](SkBreakIterator::Position pos, SkBreakIterator::Status status) {
604 if (status ==
605 (SkBreakIterator::Status)SkUnicode::LineBreakType::kHardLineBreak) {
606 // Hard line breaks clears off all the other flags
607 // TODO: Treat \n as a formatting mark and do not pass it to SkShaper
608 (*results)[pos-1] = CodeUnitFlags::kHardLineBreakBefore;
609 } else {
610 (*results)[pos] |= CodeUnitFlags::kSoftLineBreakBefore;
611 }
612 });
613
614 return true;
615 }
616
617 void reorderVisual(const BidiLevel runLevels[],
618 int levelsCount,
619 int32_t logicalFromVisual[]) override {
620 SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
621 }
622 };
623
624 std::unique_ptr<SkUnicode> SkUnicode::MakeIcuBasedUnicode() {
625 #if defined(SK_USING_THIRD_PARTY_ICU)
626 if (!SkLoadICU()) {
627 static SkOnce once;
628 once([] { SkDEBUGF("SkLoadICU() failed!\n"); });
629 return nullptr;
630 }
631 #endif
632
633 return ICULib()
634 ? std::make_unique<SkUnicode_icu>()
635 : nullptr;
636 }
637