• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 * Copyright 2020 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 
8 #include "include/core/SkString.h"
9 #include "include/core/SkTypes.h"
10 #include "include/private/SkBitmaskEnum.h"
11 #include "include/private/SkMutex.h"
12 #include "include/private/SkOnce.h"
13 #include "include/private/SkTArray.h"
14 #include "include/private/SkTemplates.h"
15 #include "include/private/SkTo.h"
16 #include "modules/skunicode/include/SkUnicode.h"
17 #include "modules/skunicode/src/SkUnicode_icu.h"
18 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
19 #include "src/utils/SkUTF.h"
20 #include "include/private/SkTHash.h"
21 #include <unicode/umachine.h>
22 #include <functional>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 
27 #ifdef OHOS_SUPPORT
28 #include <unordered_set>
29 #endif
30 
31 #if defined(SK_USING_THIRD_PARTY_ICU)
32 #include "SkLoadICU.h"
33 #endif
34 
ICULib()35 static const SkICULib* ICULib() {
36     static const auto gICU = SkLoadICULib();
37 
38     return gICU.get();
39 }
40 
41 // sk_* wrappers for ICU funcs
42 #define SKICU_FUNC(funcname)                                                                \
43     template <typename... Args>                                                             \
44     auto sk_##funcname(Args&&... args) -> decltype(funcname(std::forward<Args>(args)...)) { \
45         return ICULib()->f_##funcname(std::forward<Args>(args)...);                         \
46     }                                                                                       \
47 
48 SKICU_EMIT_FUNCS
49 #undef SKICU_FUNC
50 
errorName(UErrorCode status)51 const char* SkUnicode_IcuBidi::errorName(UErrorCode status) {
52     return sk_u_errorName(status);
53 }
54 
bidi_close(UBiDi * bidi)55 void SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) {
56     sk_ubidi_close(bidi);
57 }
bidi_getDirection(const UBiDi * bidi)58 UBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) {
59     return sk_ubidi_getDirection(bidi);
60 }
bidi_getLength(const UBiDi * bidi)61 SkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) {
62     return sk_ubidi_getLength(bidi);
63 }
bidi_getLevelAt(const UBiDi * bidi,int pos)64 SkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) {
65     return sk_ubidi_getLevelAt(bidi, pos);
66 }
bidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)67 UBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) {
68     return sk_ubidi_openSized(maxLength, maxRunCount, pErrorCode);
69 }
bidi_setPara(UBiDi * bidi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * status)70 void SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi,
71                          const UChar* text,
72                          int32_t length,
73                          UBiDiLevel paraLevel,
74                          UBiDiLevel* embeddingLevels,
75                          UErrorCode* status) {
76     return sk_ubidi_setPara(bidi, text, length, paraLevel, embeddingLevels, status);
77 }
bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])78 void SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],
79                                int levelsCount,
80                                int32_t logicalFromVisual[]) {
81     sk_ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
82 }
83 
sk_ubrk_clone(const UBreakIterator * bi,UErrorCode * status)84 static inline UBreakIterator* sk_ubrk_clone(const UBreakIterator* bi, UErrorCode* status) {
85     const auto* icu = ICULib();
86     SkASSERT(icu->f_ubrk_clone_ || icu->f_ubrk_safeClone_);
87     return icu->f_ubrk_clone_
88         ? icu->f_ubrk_clone_(bi, status)
89         : icu->f_ubrk_safeClone_(bi, nullptr, nullptr, status);
90 }
91 
utext_close_wrapper(UText * ut)92 static UText* utext_close_wrapper(UText* ut) {
93     return sk_utext_close(ut);
94 }
ubrk_close_wrapper(UBreakIterator * bi)95 static void ubrk_close_wrapper(UBreakIterator* bi) {
96     sk_ubrk_close(bi);
97 }
98 
99 using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close),
100                                                          utext_close_wrapper>>;
101 using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close),
102                                                                            ubrk_close_wrapper>>;
103 /** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
utf8_next(const char ** ptr,const char * end)104 static inline SkUnichar utf8_next(const char** ptr, const char* end) {
105     SkUnichar val = SkUTF::NextUTF8(ptr, end);
106     return val < 0 ? 0xFFFD : val;
107 }
108 
convertType(SkUnicode::BreakType type)109 static UBreakIteratorType convertType(SkUnicode::BreakType type) {
110     switch (type) {
111         case SkUnicode::BreakType::kLines: return UBRK_LINE;
112         case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER;
113         case SkUnicode::BreakType::kWords: return UBRK_WORD;
114         default:
115             return UBRK_CHARACTER;
116     }
117 }
118 
119 class SkBreakIterator_icu : public SkBreakIterator {
120     ICUBreakIterator fBreakIterator;
121     Position fLastResult;
122  public:
SkBreakIterator_icu(ICUBreakIterator iter)123     explicit SkBreakIterator_icu(ICUBreakIterator iter)
124             : fBreakIterator(std::move(iter))
125             , fLastResult(0) {}
first()126     Position first() override { return fLastResult = sk_ubrk_first(fBreakIterator.get()); }
current()127     Position current() override { return fLastResult = sk_ubrk_current(fBreakIterator.get()); }
next()128     Position next() override { return fLastResult = sk_ubrk_next(fBreakIterator.get()); }
status()129     Status status() override { return sk_ubrk_getRuleStatus(fBreakIterator.get()); }
isDone()130     bool isDone() override { return fLastResult == UBRK_DONE; }
131 
setText(const char utftext8[],int utf8Units)132     bool setText(const char utftext8[], int utf8Units) override {
133         UErrorCode status = U_ZERO_ERROR;
134         ICUUText text(sk_utext_openUTF8(nullptr, &utftext8[0], utf8Units, &status));
135 
136         if (U_FAILURE(status)) {
137             SkDEBUGF("Break error: %s", sk_u_errorName(status));
138             return false;
139         }
140         SkASSERT(text);
141         sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
142         if (U_FAILURE(status)) {
143             SkDEBUGF("Break error: %s", sk_u_errorName(status));
144             return false;
145         }
146         fLastResult = 0;
147         return true;
148     }
setText(const char16_t utftext16[],int utf16Units)149     bool setText(const char16_t utftext16[], int utf16Units) override {
150         UErrorCode status = U_ZERO_ERROR;
151         ICUUText text(sk_utext_openUChars(nullptr, reinterpret_cast<const UChar*>(&utftext16[0]),
152                                           utf16Units, &status));
153 
154         if (U_FAILURE(status)) {
155             SkDEBUGF("Break error: %s", sk_u_errorName(status));
156             return false;
157         }
158         SkASSERT(text);
159         sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
160         if (U_FAILURE(status)) {
161             SkDEBUGF("Break error: %s", sk_u_errorName(status));
162             return false;
163         }
164         fLastResult = 0;
165         return true;
166     }
167 };
168 
169 class SkIcuBreakIteratorCache {
170     SkTHashMap<SkUnicode::BreakType, ICUBreakIterator> fBreakCache;
171     SkMutex fBreakCacheMutex;
172 
173  public:
get()174     static SkIcuBreakIteratorCache& get() {
175         static SkIcuBreakIteratorCache instance;
176         return instance;
177     }
178 
179 #ifdef OHOS_SUPPORT
makeBreakIterator(const char locale[],SkUnicode::BreakType type)180     ICUBreakIterator makeBreakIterator(const char locale[], SkUnicode::BreakType type) {
181         UErrorCode status = U_ZERO_ERROR;
182         ICUBreakIterator* cachedIterator;
183         {
184             SkAutoMutexExclusive lock(fBreakCacheMutex);
185             cachedIterator = fBreakCache.find(type);
186             if (!cachedIterator) {
187                 ICUBreakIterator newIterator(sk_ubrk_open(convertType(type), locale, nullptr, 0, &status));
188                 if (U_FAILURE(status)) {
189                     SkDEBUGF("Break error: %s", sk_u_errorName(status));
190                 } else {
191                     cachedIterator = fBreakCache.set(type, std::move(newIterator));
192                 }
193             }
194         }
195         ICUBreakIterator iterator;
196         if (cachedIterator) {
197             iterator.reset(sk_ubrk_clone(cachedIterator->get(), &status));
198             if (U_FAILURE(status)) {
199                 SkDEBUGF("Break error: %s", sk_u_errorName(status));
200             }
201         }
202         return iterator;
203     }
204 #else
makeBreakIterator(SkUnicode::BreakType type)205     ICUBreakIterator makeBreakIterator(SkUnicode::BreakType type) {
206         UErrorCode status = U_ZERO_ERROR;
207         ICUBreakIterator* cachedIterator;
208         {
209             SkAutoMutexExclusive lock(fBreakCacheMutex);
210             cachedIterator = fBreakCache.find(type);
211             if (!cachedIterator) {
212                 ICUBreakIterator newIterator(sk_ubrk_open(convertType(type), sk_uloc_getDefault(),
213                                                           nullptr, 0, &status));
214                 if (U_FAILURE(status)) {
215                     SkDEBUGF("Break error: %s", sk_u_errorName(status));
216                 } else {
217                     cachedIterator = fBreakCache.set(type, std::move(newIterator));
218                 }
219             }
220         }
221         ICUBreakIterator iterator;
222         if (cachedIterator) {
223             iterator.reset(sk_ubrk_clone(cachedIterator->get(), &status));
224             if (U_FAILURE(status)) {
225                 SkDEBUGF("Break error: %s", sk_u_errorName(status));
226             }
227         }
228         return iterator;
229     }
230 #endif
231 };
232 
233 class SkUnicode_icu : public SkUnicode {
234 
copy()235     std::unique_ptr<SkUnicode> copy() override {
236         return std::make_unique<SkUnicode_icu>();
237     }
238 
extractWords(uint16_t utf16[],int utf16Units,const char * locale,std::vector<Position> * words)239     static bool extractWords(uint16_t utf16[], int utf16Units, const char* locale,  std::vector<Position>* words) {
240 
241         UErrorCode status = U_ZERO_ERROR;
242 
243 #ifdef OHOS_SUPPORT
244         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(locale, BreakType::kWords);
245 #else
246         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(BreakType::kWords);
247 #endif
248         if (!iterator) {
249             SkDEBUGF("Break error: %s", sk_u_errorName(status));
250             return false;
251         }
252         SkASSERT(iterator);
253 
254         ICUUText utf16UText(sk_utext_openUChars(nullptr, (UChar*)utf16, utf16Units, &status));
255         if (U_FAILURE(status)) {
256             SkDEBUGF("Break error: %s", sk_u_errorName(status));
257             return false;
258         }
259 
260         sk_ubrk_setUText(iterator.get(), utf16UText.get(), &status);
261         if (U_FAILURE(status)) {
262             SkDEBUGF("Break error: %s", sk_u_errorName(status));
263             return false;
264         }
265 
266         // Get the words
267         int32_t pos = sk_ubrk_first(iterator.get());
268         while (pos != UBRK_DONE) {
269             words->emplace_back(pos);
270             pos = sk_ubrk_next(iterator.get());
271         }
272 
273         return true;
274     }
275 
extractPositions(const char utf8[],int utf8Units,BreakType type,const char locale[],std::function<void (int,int)> setBreak)276     static bool extractPositions
277 #ifdef OHOS_SUPPORT
278         (const char utf8[], int utf8Units, BreakType type, const char locale[],
279             std::function<void(int, int)> setBreak) {
280 #else
281         (const char utf8[], int utf8Units, BreakType type, std::function<void(int, int)> setBreak) {
282 #endif
283 
284         UErrorCode status = U_ZERO_ERROR;
285         ICUUText text(sk_utext_openUTF8(nullptr, &utf8[0], utf8Units, &status));
286 
287         if (U_FAILURE(status)) {
288             SkDEBUGF("Break error: %s", sk_u_errorName(status));
289             return false;
290         }
291         SkASSERT(text);
292 
293 #ifdef OHOS_SUPPORT
294         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(locale, type);
295 #else
296         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type);
297 #endif
298         if (!iterator) {
299             return false;
300         }
301 
302         sk_ubrk_setUText(iterator.get(), text.get(), &status);
303         if (U_FAILURE(status)) {
304             SkDEBUGF("Break error: %s", sk_u_errorName(status));
305             return false;
306         }
307 
308         auto iter = iterator.get();
309         int32_t pos = sk_ubrk_first(iter);
310         while (pos != UBRK_DONE) {
311             int s = type == SkUnicode::BreakType::kLines
312                         ? UBRK_LINE_SOFT
313                         : sk_ubrk_getRuleStatus(iter);
314             setBreak(pos, s);
315             pos = sk_ubrk_next(iter);
316         }
317 
318         if (type == SkUnicode::BreakType::kLines) {
319             // This is a workaround for https://bugs.chromium.org/p/skia/issues/detail?id=10715
320             // (ICU line break iterator does not work correctly on Thai text with new lines)
321             // So, we only use the iterator to collect soft line breaks and
322             // scan the text for all hard line breaks ourselves
323             const char* end = utf8 + utf8Units;
324             const char* ch = utf8;
325             while (ch < end) {
326                 auto unichar = utf8_next(&ch, end);
327                 if (isHardLineBreak(unichar)) {
328                     setBreak(ch - utf8, UBRK_LINE_HARD);
329                 }
330             }
331         }
332         return true;
333     }
334 
335     static bool isControl(SkUnichar utf8) {
336         return sk_u_iscntrl(utf8);
337     }
338 
339     static bool isWhitespace(SkUnichar utf8) {
340         return sk_u_isWhitespace(utf8);
341     }
342 
343     static bool isSpace(SkUnichar utf8) {
344         return sk_u_isspace(utf8);
345     }
346 
347     static bool isTabulation(SkUnichar utf8) {
348         return utf8 == '\t';
349     }
350 
351     static bool isHardBreak(SkUnichar utf8) {
352         auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
353         return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
354     }
355 
356     static bool isIdeographic(SkUnichar unichar) {
357         return sk_u_hasBinaryProperty(unichar, UCHAR_IDEOGRAPHIC);
358     }
359 #ifdef OHOS_SUPPORT
360     static bool isPunctuation(SkUnichar unichar)
361     {
362         if (sk_u_ispunct(unichar)) {
363             return true;
364         }
365         static constexpr std::array<std::pair<SkUnichar, SkUnichar>, 13> ranges{{
366                 {0x0021, 0x002F},  // ASCII punctuation (e.g., ! " # $ % & ' ( ) * + , - . /)
367                 {0x003A, 0x0040},  // ASCII punctuation (e.g., : ; < = > ? @)
368                 {0x005B, 0x0060},  // ASCII punctuation (e.g., [ \ ] ^ _ `)
369                 {0x007B, 0x007E},  // ASCII punctuation (e.g., { | } ~)
370                 {0x2000, 0x206F},  // Common punctuation (Chinese & English)
371                 {0xFF00, 0xFFEF},  // Full-width characters and symbols
372                 {0x2E00, 0x2E7F},  // Supplemental punctuation (e.g., ancient)
373                 {0x3001, 0x3003},  // CJK punctuation (e.g., Chinese comma)
374                 {0xFF01, 0xFF0F},  // Full-width ASCII punctuation (0x21-0x2F)
375                 {0xFF1A, 0xFF20},  // Full-width ASCII punctuation (0x3A-0x40)
376                 {0xFF3B, 0xFF40},  // Full-width ASCII punctuation (0x5B-0x60)
377                 {0xFF5B, 0xFF65},  // Other full-width punctuation (e.g., quotes)
378         }};
379         for (auto range : ranges) {
380             if (range.first <= unichar && unichar <= range.second) {
381                 return true;
382             }
383         }
384         return false;
385     }
386     static bool isEllipsis(SkUnichar unichar) { return (unichar == 0x2026 || unichar == 0x002E); }
387     static bool isGraphemeExtend(SkUnichar unichar) {
388         return sk_u_hasBinaryProperty(unichar, UCHAR_GRAPHEME_EXTEND);
389     }
390     static bool isCustomSoftBreak(SkUnichar unichar) {
391         // ‘ “ ( [ { < « — – • – – $ £ € + = × \ % ° # * @ _ § © ®
392         static const std::unordered_set<SkUnichar> kBreakTriggerCodePoints {
393             0x2018, 0x201C, 0x0028, 0x005B, 0x007B, 0x003C, 0x00AB, 0x2014, 0x2013,
394             0x2022, 0x0024, 0x00A3, 0x20AC, 0x002B, 0x003D, 0x00D7, 0x005C, 0x0025,
395             0x00B0, 0x0023, 0x002A, 0x0040, 0x005F, 0x00A7, 0x00A9, 0x00AE
396         };
397 
398         return kBreakTriggerCodePoints.count(unichar) > 0;
399     }
400 #endif
401 
402 public:
403     ~SkUnicode_icu() override { }
404     std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
405                                                      SkBidiIterator::Direction dir) override {
406         return SkUnicode::makeBidiIterator(text, count, dir);
407     }
408     std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
409                                                      int count,
410                                                      SkBidiIterator::Direction dir) override {
411         return SkUnicode::makeBidiIterator(text, count, dir);
412     }
413     std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
414                                                        BreakType breakType) override {
415         UErrorCode status = U_ZERO_ERROR;
416         ICUBreakIterator iterator(sk_ubrk_open(convertType(breakType), locale, nullptr, 0,
417                                                &status));
418         if (U_FAILURE(status)) {
419             SkDEBUGF("Break error: %s", sk_u_errorName(status));
420             return nullptr;
421         }
422         return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
423     }
424     std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override {
425         return makeBreakIterator(sk_uloc_getDefault(), breakType);
426     }
427 
428     static bool isHardLineBreak(SkUnichar utf8) {
429         auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
430         return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
431     }
432 
433     SkString toUpper(const SkString& str) override {
434         // Convert to UTF16 since that's what ICU wants.
435         auto str16 = SkUnicode::convertUtf8ToUtf16(str.c_str(), str.size());
436 
437         UErrorCode icu_err = U_ZERO_ERROR;
438         const auto upper16len = sk_u_strToUpper(nullptr, 0, (UChar*)(str16.c_str()), str16.size(),
439                                                 nullptr, &icu_err);
440         if (icu_err != U_BUFFER_OVERFLOW_ERROR || upper16len <= 0) {
441             return SkString();
442         }
443 
444         SkAutoSTArray<128, uint16_t> upper16(upper16len);
445         icu_err = U_ZERO_ERROR;
446         sk_u_strToUpper((UChar*)(upper16.get()), SkToS32(upper16.size()),
447                         (UChar*)(str16.c_str()), str16.size(),
448                         nullptr, &icu_err);
449         SkASSERT(!U_FAILURE(icu_err));
450 
451         // ... and back to utf8 'cause that's what we want.
452         return convertUtf16ToUtf8((char16_t*)upper16.get(), upper16.size());
453     }
454 
455     bool getBidiRegions(const char utf8[],
456                         int utf8Units,
457                         TextDirection dir,
458                         std::vector<BidiRegion>* results) override {
459         return SkUnicode::extractBidi(utf8, utf8Units, dir, results);
460     }
461 
462     bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
463 
464         // Convert to UTF16 since we want the results in utf16
465         auto utf16 = convertUtf8ToUtf16(utf8, utf8Units);
466         return SkUnicode_icu::extractWords((uint16_t*)utf16.c_str(), utf16.size(), locale, results);
467     }
468 
469 #ifdef OHOS_SUPPORT
470     void processPunctuationAndEllipsis(SkTArray<SkUnicode::CodeUnitFlags, true>* results, int i, SkUnichar unichar)
471     {
472         if (SkUnicode_icu::isPunctuation(unichar)) {
473             results->at(i) |= SkUnicode::kPunctuation;
474         }
475         if (SkUnicode_icu::isEllipsis(unichar)) {
476             results->at(i) |= SkUnicode::kEllipsis;
477         }
478         if (SkUnicode_icu::isCustomSoftBreak(unichar)) {
479             results->at(i) |= SkUnicode::kSoftLineBreakBefore;
480         }
481     }
482 #endif
483 
484 #ifdef OHOS_SUPPORT
485     bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs, const char locale[],
486 #else
487     bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs,
488 #endif
489                           SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
490         results->reset();
491         results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
492 
493 #ifdef OHOS_SUPPORT
494         SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kLines, locale, [&](int pos, int status) {
495 #else
496         SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kLines, [&](int pos, int status) {
497 #endif
498             (*results)[pos] |= status == UBRK_LINE_HARD
499                                     ? CodeUnitFlags::kHardLineBreakBefore
500                                     : CodeUnitFlags::kSoftLineBreakBefore;
501         });
502 
503 #ifdef OHOS_SUPPORT
504         SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kGraphemes, locale, [&](int pos, int status) {
505 #else
506         SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kGraphemes, [&](int pos, int status) {
507 #endif
508             (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
509         });
510 
511         const char* current = utf8;
512         const char* end = utf8 + utf8Units;
513         while (current < end) {
514             auto before = current - utf8;
515             SkUnichar unichar = SkUTF::NextUTF8(&current, end);
516             if (unichar < 0) unichar = 0xFFFD;
517             auto after = current - utf8;
518             if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) {
519                 results->at(before) |= SkUnicode::kTabulation;
520                 if (replaceTabs) {
521                     unichar = ' ';
522                     utf8[before] = ' ';
523                 }
524             }
525             for (auto i = before; i < after; ++i) {
526                 if (SkUnicode_icu::isSpace(unichar)) {
527                     results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
528                 }
529                 if (SkUnicode_icu::isWhitespace(unichar)) {
530                     results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
531                 }
532                 if (SkUnicode_icu::isControl(unichar)) {
533                     results->at(i) |= SkUnicode::kControl;
534                 }
535                 if (SkUnicode_icu::isIdeographic(unichar)) {
536                     results->at(i) |= SkUnicode::kIdeographic;
537                 }
538 #ifdef OHOS_SUPPORT
539                 processPunctuationAndEllipsis(results, i, unichar);
540 #endif
541             }
542 
543 #ifdef OHOS_SUPPORT
544             if (SkUnicode_icu::isGraphemeExtend(unichar)) {
545                 // Current unichar is a combining one.
546                 results->at(before) |= SkUnicode::kCombine;
547             }
548 #endif
549         }
550 
551         return true;
552     }
553 
554 #ifdef OHOS_SUPPORT
555     bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs, const char locale[],
556 #else
557     bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
558 #endif
559                           SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
560         results->reset();
561         results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
562 
563         // Get white spaces
564         this->forEachCodepoint((char16_t*)&utf16[0], utf16Units,
565            [results, replaceTabs, &utf16](SkUnichar unichar, int32_t start, int32_t end) {
566                 for (auto i = start; i < end; ++i) {
567                     if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) {
568                         results->at(i) |= SkUnicode::kTabulation;
569                     if (replaceTabs) {
570                             unichar = ' ';
571                             utf16[start] = ' ';
572                         }
573                     }
574                     if (SkUnicode_icu::isSpace(unichar)) {
575                         results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
576                     }
577                     if (SkUnicode_icu::isWhitespace(unichar)) {
578                         results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
579                     }
580                     if (SkUnicode_icu::isControl(unichar)) {
581                         results->at(i) |= SkUnicode::kControl;
582                     }
583                 }
584            });
585         // Get graphemes
586         this->forEachBreak((char16_t*)&utf16[0],
587                            utf16Units,
588                            SkUnicode::BreakType::kGraphemes,
589 #ifdef OHOS_SUPPORT
590                            locale,
591 #endif
592                            [results](SkBreakIterator::Position pos, SkBreakIterator::Status) {
593                                (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
594                            });
595         // Get line breaks
596         this->forEachBreak(
597                 (char16_t*)&utf16[0],
598                 utf16Units,
599                 SkUnicode::BreakType::kLines,
600 #ifdef OHOS_SUPPORT
601                 locale,
602 #endif
603                 [results](SkBreakIterator::Position pos, SkBreakIterator::Status status) {
604                     if (status ==
605                         (SkBreakIterator::Status)SkUnicode::LineBreakType::kHardLineBreak) {
606                         // Hard line breaks clears off all the other flags
607                         // TODO: Treat \n as a formatting mark and do not pass it to SkShaper
608                         (*results)[pos-1] = CodeUnitFlags::kHardLineBreakBefore;
609                     } else {
610                         (*results)[pos] |= CodeUnitFlags::kSoftLineBreakBefore;
611                     }
612                 });
613 
614         return true;
615     }
616 
617     void reorderVisual(const BidiLevel runLevels[],
618                        int levelsCount,
619                        int32_t logicalFromVisual[]) override {
620         SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
621     }
622 };
623 
624 std::unique_ptr<SkUnicode> SkUnicode::MakeIcuBasedUnicode() {
625     #if defined(SK_USING_THIRD_PARTY_ICU)
626     if (!SkLoadICU()) {
627         static SkOnce once;
628         once([] { SkDEBUGF("SkLoadICU() failed!\n"); });
629         return nullptr;
630     }
631     #endif
632 
633     return ICULib()
634         ? std::make_unique<SkUnicode_icu>()
635         : nullptr;
636 }
637