• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 * Copyright 2020 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 
8 #include "include/core/SkString.h"
9 #include "include/core/SkTypes.h"
10 #include "include/private/SkBitmaskEnum.h"
11 #include "include/private/base/SkDebug.h"
12 #include "include/private/base/SkMutex.h"
13 #include "include/private/base/SkOnce.h"
14 #include "include/private/base/SkTArray.h"
15 #include "include/private/base/SkTemplates.h"
16 #include "include/private/base/SkTo.h"
17 #include "modules/skunicode/include/SkUnicode.h"
18 #include "modules/skunicode/src/SkUnicode_icu.h"
19 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
20 #include "src/base/SkUTF.h"
21 #include "src/core/SkTHash.h"
22 #include <unicode/umachine.h>
23 #include <functional>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 
28 #if defined(SK_USING_THIRD_PARTY_ICU)
29 #include "SkLoadICU.h"
30 #endif
31 
32 using namespace skia_private;
33 
ICULib()34 static const SkICULib* ICULib() {
35     static const auto gICU = SkLoadICULib();
36 
37     return gICU.get();
38 }
39 
40 // sk_* wrappers for ICU funcs
41 #define SKICU_FUNC(funcname)                                                                \
42     template <typename... Args>                                                             \
43     auto sk_##funcname(Args&&... args) -> decltype(funcname(std::forward<Args>(args)...)) { \
44         return ICULib()->f_##funcname(std::forward<Args>(args)...);                         \
45     }                                                                                       \
46 
47 SKICU_EMIT_FUNCS
48 #undef SKICU_FUNC
49 
errorName(UErrorCode status)50 const char* SkUnicode_IcuBidi::errorName(UErrorCode status) {
51     return sk_u_errorName(status);
52 }
53 
bidi_close(UBiDi * bidi)54 void SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) {
55     sk_ubidi_close(bidi);
56 }
bidi_getDirection(const UBiDi * bidi)57 UBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) {
58     return sk_ubidi_getDirection(bidi);
59 }
bidi_getLength(const UBiDi * bidi)60 SkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) {
61     return sk_ubidi_getLength(bidi);
62 }
bidi_getLevelAt(const UBiDi * bidi,int pos)63 SkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) {
64     return sk_ubidi_getLevelAt(bidi, pos);
65 }
bidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)66 UBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) {
67     return sk_ubidi_openSized(maxLength, maxRunCount, pErrorCode);
68 }
bidi_setPara(UBiDi * bidi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * status)69 void SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi,
70                          const UChar* text,
71                          int32_t length,
72                          UBiDiLevel paraLevel,
73                          UBiDiLevel* embeddingLevels,
74                          UErrorCode* status) {
75     return sk_ubidi_setPara(bidi, text, length, paraLevel, embeddingLevels, status);
76 }
bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])77 void SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],
78                                int levelsCount,
79                                int32_t logicalFromVisual[]) {
80     sk_ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
81 }
82 
sk_ubrk_clone(const UBreakIterator * bi,UErrorCode * status)83 static inline UBreakIterator* sk_ubrk_clone(const UBreakIterator* bi, UErrorCode* status) {
84     const auto* icu = ICULib();
85     SkASSERT(icu->f_ubrk_clone_ || icu->f_ubrk_safeClone_);
86     return icu->f_ubrk_clone_
87         ? icu->f_ubrk_clone_(bi, status)
88         : icu->f_ubrk_safeClone_(bi, nullptr, nullptr, status);
89 }
90 
utext_close_wrapper(UText * ut)91 static UText* utext_close_wrapper(UText* ut) {
92     return sk_utext_close(ut);
93 }
ubrk_close_wrapper(UBreakIterator * bi)94 static void ubrk_close_wrapper(UBreakIterator* bi) {
95     sk_ubrk_close(bi);
96 }
97 
98 using ICUUText = std::unique_ptr<UText, SkFunctionObject<utext_close_wrapper>>;
99 using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionObject<ubrk_close_wrapper>>;
100 /** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
utf8_next(const char ** ptr,const char * end)101 static inline SkUnichar utf8_next(const char** ptr, const char* end) {
102     SkUnichar val = SkUTF::NextUTF8(ptr, end);
103     return val < 0 ? 0xFFFD : val;
104 }
105 
convertType(SkUnicode::BreakType type)106 static UBreakIteratorType convertType(SkUnicode::BreakType type) {
107     switch (type) {
108         case SkUnicode::BreakType::kLines: return UBRK_LINE;
109         case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER;
110         case SkUnicode::BreakType::kWords: return UBRK_WORD;
111         default:
112             return UBRK_CHARACTER;
113     }
114 }
115 
116 class SkBreakIterator_icu : public SkBreakIterator {
117     ICUBreakIterator fBreakIterator;
118     Position fLastResult;
119  public:
SkBreakIterator_icu(ICUBreakIterator iter)120     explicit SkBreakIterator_icu(ICUBreakIterator iter)
121             : fBreakIterator(std::move(iter))
122             , fLastResult(0) {}
first()123     Position first() override { return fLastResult = sk_ubrk_first(fBreakIterator.get()); }
current()124     Position current() override { return fLastResult = sk_ubrk_current(fBreakIterator.get()); }
next()125     Position next() override { return fLastResult = sk_ubrk_next(fBreakIterator.get()); }
status()126     Status status() override { return sk_ubrk_getRuleStatus(fBreakIterator.get()); }
isDone()127     bool isDone() override { return fLastResult == UBRK_DONE; }
128 
setText(const char utftext8[],int utf8Units)129     bool setText(const char utftext8[], int utf8Units) override {
130         UErrorCode status = U_ZERO_ERROR;
131         ICUUText text(sk_utext_openUTF8(nullptr, &utftext8[0], utf8Units, &status));
132 
133         if (U_FAILURE(status)) {
134             SkDEBUGF("Break error: %s", sk_u_errorName(status));
135             return false;
136         }
137         SkASSERT(text);
138         sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
139         if (U_FAILURE(status)) {
140             SkDEBUGF("Break error: %s", sk_u_errorName(status));
141             return false;
142         }
143         fLastResult = 0;
144         return true;
145     }
setText(const char16_t utftext16[],int utf16Units)146     bool setText(const char16_t utftext16[], int utf16Units) override {
147         UErrorCode status = U_ZERO_ERROR;
148         ICUUText text(sk_utext_openUChars(nullptr, reinterpret_cast<const UChar*>(&utftext16[0]),
149                                           utf16Units, &status));
150 
151         if (U_FAILURE(status)) {
152             SkDEBUGF("Break error: %s", sk_u_errorName(status));
153             return false;
154         }
155         SkASSERT(text);
156         sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
157         if (U_FAILURE(status)) {
158             SkDEBUGF("Break error: %s", sk_u_errorName(status));
159             return false;
160         }
161         fLastResult = 0;
162         return true;
163     }
164 };
165 
166 class SkIcuBreakIteratorCache {
167     SkTHashMap<SkUnicode::BreakType, ICUBreakIterator> fBreakCache;
168     SkMutex fBreakCacheMutex;
169 
170  public:
get()171     static SkIcuBreakIteratorCache& get() {
172         static SkIcuBreakIteratorCache instance;
173         return instance;
174     }
175 
makeBreakIterator(SkUnicode::BreakType type)176     ICUBreakIterator makeBreakIterator(SkUnicode::BreakType type) {
177         UErrorCode status = U_ZERO_ERROR;
178         ICUBreakIterator* cachedIterator;
179         {
180             SkAutoMutexExclusive lock(fBreakCacheMutex);
181             cachedIterator = fBreakCache.find(type);
182             if (!cachedIterator) {
183                 ICUBreakIterator newIterator(sk_ubrk_open(convertType(type), sk_uloc_getDefault(),
184                                                           nullptr, 0, &status));
185                 if (U_FAILURE(status)) {
186                     SkDEBUGF("Break error: %s", sk_u_errorName(status));
187                 } else {
188                     cachedIterator = fBreakCache.set(type, std::move(newIterator));
189                 }
190             }
191         }
192         ICUBreakIterator iterator;
193         if (cachedIterator) {
194             iterator.reset(sk_ubrk_clone(cachedIterator->get(), &status));
195             if (U_FAILURE(status)) {
196                 SkDEBUGF("Break error: %s", sk_u_errorName(status));
197             }
198         }
199         return iterator;
200     }
201 };
202 
203 class SkUnicode_icu : public SkUnicode {
204 
copy()205     std::unique_ptr<SkUnicode> copy() override {
206         return std::make_unique<SkUnicode_icu>();
207     }
208 
extractWords(uint16_t utf16[],int utf16Units,const char * locale,std::vector<Position> * words)209     static bool extractWords(uint16_t utf16[], int utf16Units, const char* locale,  std::vector<Position>* words) {
210 
211         UErrorCode status = U_ZERO_ERROR;
212 
213         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(BreakType::kWords);
214         if (!iterator) {
215             SkDEBUGF("Break error: %s", sk_u_errorName(status));
216             return false;
217         }
218         SkASSERT(iterator);
219 
220         ICUUText utf16UText(sk_utext_openUChars(nullptr, (UChar*)utf16, utf16Units, &status));
221         if (U_FAILURE(status)) {
222             SkDEBUGF("Break error: %s", sk_u_errorName(status));
223             return false;
224         }
225 
226         sk_ubrk_setUText(iterator.get(), utf16UText.get(), &status);
227         if (U_FAILURE(status)) {
228             SkDEBUGF("Break error: %s", sk_u_errorName(status));
229             return false;
230         }
231 
232         // Get the words
233         int32_t pos = sk_ubrk_first(iterator.get());
234         while (pos != UBRK_DONE) {
235             words->emplace_back(pos);
236             pos = sk_ubrk_next(iterator.get());
237         }
238 
239         return true;
240     }
241 
extractPositions(const char utf8[],int utf8Units,BreakType type,std::function<void (int,int)> setBreak)242     static bool extractPositions
243         (const char utf8[], int utf8Units, BreakType type, std::function<void(int, int)> setBreak) {
244 
245         UErrorCode status = U_ZERO_ERROR;
246         ICUUText text(sk_utext_openUTF8(nullptr, &utf8[0], utf8Units, &status));
247 
248         if (U_FAILURE(status)) {
249             SkDEBUGF("Break error: %s", sk_u_errorName(status));
250             return false;
251         }
252         SkASSERT(text);
253 
254         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type);
255         if (!iterator) {
256             return false;
257         }
258 
259         sk_ubrk_setUText(iterator.get(), text.get(), &status);
260         if (U_FAILURE(status)) {
261             SkDEBUGF("Break error: %s", sk_u_errorName(status));
262             return false;
263         }
264 
265         auto iter = iterator.get();
266         int32_t pos = sk_ubrk_first(iter);
267         while (pos != UBRK_DONE) {
268             int s = type == SkUnicode::BreakType::kLines
269                         ? UBRK_LINE_SOFT
270                         : sk_ubrk_getRuleStatus(iter);
271             setBreak(pos, s);
272             pos = sk_ubrk_next(iter);
273         }
274 
275         if (type == SkUnicode::BreakType::kLines) {
276             // This is a workaround for https://bugs.chromium.org/p/skia/issues/detail?id=10715
277             // (ICU line break iterator does not work correctly on Thai text with new lines)
278             // So, we only use the iterator to collect soft line breaks and
279             // scan the text for all hard line breaks ourselves
280             const char* end = utf8 + utf8Units;
281             const char* ch = utf8;
282             while (ch < end) {
283                 auto unichar = utf8_next(&ch, end);
284                 if (isHardLineBreak(unichar)) {
285                     setBreak(ch - utf8, UBRK_LINE_HARD);
286                 }
287             }
288         }
289         return true;
290     }
291 
isControl(SkUnichar utf8)292     static bool isControl(SkUnichar utf8) {
293         return sk_u_iscntrl(utf8);
294     }
295 
isWhitespace(SkUnichar utf8)296     static bool isWhitespace(SkUnichar utf8) {
297         return sk_u_isWhitespace(utf8);
298     }
299 
isSpace(SkUnichar utf8)300     static bool isSpace(SkUnichar utf8) {
301         return sk_u_isspace(utf8);
302     }
303 
isTabulation(SkUnichar utf8)304     static bool isTabulation(SkUnichar utf8) {
305         return utf8 == '\t';
306     }
307 
isHardBreak(SkUnichar utf8)308     static bool isHardBreak(SkUnichar utf8) {
309         auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
310         return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
311     }
312 
313 public:
~SkUnicode_icu()314     ~SkUnicode_icu() override { }
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)315     std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
316                                                      SkBidiIterator::Direction dir) override {
317         return SkUnicode::makeBidiIterator(text, count, dir);
318     }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)319     std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
320                                                      int count,
321                                                      SkBidiIterator::Direction dir) override {
322         return SkUnicode::makeBidiIterator(text, count, dir);
323     }
makeBreakIterator(const char locale[],BreakType breakType)324     std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
325                                                        BreakType breakType) override {
326         UErrorCode status = U_ZERO_ERROR;
327         ICUBreakIterator iterator(sk_ubrk_open(convertType(breakType), locale, nullptr, 0,
328                                                &status));
329         if (U_FAILURE(status)) {
330             SkDEBUGF("Break error: %s", sk_u_errorName(status));
331             return nullptr;
332         }
333         return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
334     }
makeBreakIterator(BreakType breakType)335     std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override {
336         return makeBreakIterator(sk_uloc_getDefault(), breakType);
337     }
338 
isHardLineBreak(SkUnichar utf8)339     static bool isHardLineBreak(SkUnichar utf8) {
340         auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
341         return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
342     }
343 
toUpper(const SkString & str)344     SkString toUpper(const SkString& str) override {
345         // Convert to UTF16 since that's what ICU wants.
346         auto str16 = SkUnicode::convertUtf8ToUtf16(str.c_str(), str.size());
347 
348         UErrorCode icu_err = U_ZERO_ERROR;
349         const auto upper16len = sk_u_strToUpper(nullptr, 0, (UChar*)(str16.c_str()), str16.size(),
350                                                 nullptr, &icu_err);
351         if (icu_err != U_BUFFER_OVERFLOW_ERROR || upper16len <= 0) {
352             return SkString();
353         }
354 
355         AutoSTArray<128, uint16_t> upper16(upper16len);
356         icu_err = U_ZERO_ERROR;
357         sk_u_strToUpper((UChar*)(upper16.get()), SkToS32(upper16.size()),
358                         (UChar*)(str16.c_str()), str16.size(),
359                         nullptr, &icu_err);
360         SkASSERT(!U_FAILURE(icu_err));
361 
362         // ... and back to utf8 'cause that's what we want.
363         return convertUtf16ToUtf8((char16_t*)upper16.get(), upper16.size());
364     }
365 
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)366     bool getBidiRegions(const char utf8[],
367                         int utf8Units,
368                         TextDirection dir,
369                         std::vector<BidiRegion>* results) override {
370         return SkUnicode::extractBidi(utf8, utf8Units, dir, results);
371     }
372 
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)373     bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
374 
375         // Convert to UTF16 since we want the results in utf16
376         auto utf16 = convertUtf8ToUtf16(utf8, utf8Units);
377         return SkUnicode_icu::extractWords((uint16_t*)utf16.c_str(), utf16.size(), locale, results);
378     }
379 
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)380     bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs,
381                           SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
382         results->clear();
383         results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
384 
385         SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kLines, [&](int pos,
386                                                                        int status) {
387             (*results)[pos] |= status == UBRK_LINE_HARD
388                                     ? CodeUnitFlags::kHardLineBreakBefore
389                                     : CodeUnitFlags::kSoftLineBreakBefore;
390         });
391 
392         SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kGraphemes, [&](int pos,
393                                                                        int status) {
394             (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
395         });
396 
397         const char* current = utf8;
398         const char* end = utf8 + utf8Units;
399         while (current < end) {
400             auto before = current - utf8;
401             SkUnichar unichar = SkUTF::NextUTF8(&current, end);
402             if (unichar < 0) unichar = 0xFFFD;
403             auto after = current - utf8;
404             if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) {
405                 results->at(before) |= SkUnicode::kTabulation;
406                 if (replaceTabs) {
407                     unichar = ' ';
408                     utf8[before] = ' ';
409                 }
410             }
411             for (auto i = before; i < after; ++i) {
412                 if (SkUnicode_icu::isSpace(unichar)) {
413                     results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
414                 }
415                 if (SkUnicode_icu::isWhitespace(unichar)) {
416                     results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
417                 }
418                 if (SkUnicode_icu::isControl(unichar)) {
419                     results->at(i) |= SkUnicode::kControl;
420                 }
421             }
422         }
423 
424         return true;
425     }
426 
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)427     bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
428                           SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
429         results->clear();
430         results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
431 
432         // Get white spaces
433         this->forEachCodepoint((char16_t*)&utf16[0], utf16Units,
434            [results, replaceTabs, &utf16](SkUnichar unichar, int32_t start, int32_t end) {
435                 for (auto i = start; i < end; ++i) {
436                     if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) {
437                         results->at(i) |= SkUnicode::kTabulation;
438                     if (replaceTabs) {
439                             unichar = ' ';
440                             utf16[start] = ' ';
441                         }
442                     }
443                     if (SkUnicode_icu::isSpace(unichar)) {
444                         results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
445                     }
446                     if (SkUnicode_icu::isWhitespace(unichar)) {
447                         results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
448                     }
449                     if (SkUnicode_icu::isControl(unichar)) {
450                         results->at(i) |= SkUnicode::kControl;
451                     }
452                 }
453            });
454         // Get graphemes
455         this->forEachBreak((char16_t*)&utf16[0],
456                            utf16Units,
457                            SkUnicode::BreakType::kGraphemes,
458                            [results](SkBreakIterator::Position pos, SkBreakIterator::Status) {
459                                (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
460                            });
461         // Get line breaks
462         this->forEachBreak(
463                 (char16_t*)&utf16[0],
464                 utf16Units,
465                 SkUnicode::BreakType::kLines,
466                 [results](SkBreakIterator::Position pos, SkBreakIterator::Status status) {
467                     if (status ==
468                         (SkBreakIterator::Status)SkUnicode::LineBreakType::kHardLineBreak) {
469                         // Hard line breaks clears off all the other flags
470                         // TODO: Treat \n as a formatting mark and do not pass it to SkShaper
471                         (*results)[pos-1] = CodeUnitFlags::kHardLineBreakBefore;
472                     } else {
473                         (*results)[pos] |= CodeUnitFlags::kSoftLineBreakBefore;
474                     }
475                 });
476 
477         return true;
478     }
479 
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])480     void reorderVisual(const BidiLevel runLevels[],
481                        int levelsCount,
482                        int32_t logicalFromVisual[]) override {
483         SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
484     }
485 };
486 
MakeIcuBasedUnicode()487 std::unique_ptr<SkUnicode> SkUnicode::MakeIcuBasedUnicode() {
488     #if defined(SK_USING_THIRD_PARTY_ICU)
489     if (!SkLoadICU()) {
490         static SkOnce once;
491         once([] { SkDEBUGF("SkLoadICU() failed!\n"); });
492         return nullptr;
493     }
494     #endif
495 
496     return ICULib()
497         ? std::make_unique<SkUnicode_icu>()
498         : nullptr;
499 }
500