• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1                                            /*
2 * Copyright 2022 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 #include "include/core/SkSpan.h"
8 #include "include/core/SkString.h"
9 #include "include/core/SkTypes.h"
10 #include "include/private/SkBitmaskEnum.h"
11 #include "include/private/SkTArray.h"
12 #include "include/private/SkTo.h"
13 #include "modules/skunicode/include/SkUnicode.h"
14 #include "modules/skunicode/src/SkUnicode_client.h"
15 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
16 #include "src/utils/SkUTF.h"
17 
18 #include <algorithm>
19 #include <cstdint>
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #include <array>
25 #include <unicode/ubidi.h>
26 #include <unicode/ubrk.h>
27 #include <unicode/uchar.h>
28 #include <unicode/uloc.h>
29 #include <unicode/uscript.h>
30 #include <unicode/ustring.h>
31 #include <unicode/utext.h>
32 #include <unicode/utypes.h>
33 
34 
35 #ifndef SK_UNICODE_ICU_IMPLEMENTATION
36 
errorName(UErrorCode status)37 const char* SkUnicode_IcuBidi::errorName(UErrorCode status) {
38     return u_errorName_skia(status);
39 }
bidi_close(UBiDi * bidi)40 void SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) {
41     ubidi_close_skia(bidi);
42 }
bidi_getDirection(const UBiDi * bidi)43 UBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) {
44     return ubidi_getDirection_skia(bidi);
45 }
bidi_getLength(const UBiDi * bidi)46 SkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) {
47     return ubidi_getLength_skia(bidi);
48 }
bidi_getLevelAt(const UBiDi * bidi,int pos)49 SkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) {
50     return ubidi_getLevelAt_skia(bidi, pos);
51 }
bidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)52 UBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) {
53     return ubidi_openSized_skia(maxLength, maxRunCount, pErrorCode);
54 }
bidi_setPara(UBiDi * bidi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * status)55 void SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi,
56                          const UChar* text,
57                          int32_t length,
58                          UBiDiLevel paraLevel,
59                          UBiDiLevel* embeddingLevels,
60                          UErrorCode* status) {
61     return ubidi_setPara_skia(bidi, text, length, paraLevel, embeddingLevels, status);
62 }
bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])63 void SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],
64                                int levelsCount,
65                                int32_t logicalFromVisual[]) {
66     ubidi_reorderVisual_skia(runLevels, levelsCount, logicalFromVisual);
67 }
68 #endif
69 
70 class SkUnicode_client : public SkUnicode {
71 public:
72     struct Data {
73         SkSpan<const char> fText8;
74         SkSpan<const char16_t> fText16;
75         std::vector<Position> fWords;
76         std::vector<SkUnicode::Position> fGraphemeBreaks;
77         std::vector<SkUnicode::LineBreakBefore> fLineBreaks;
DataSkUnicode_client::Data78         Data(SkSpan<char> text,
79              std::vector<SkUnicode::Position> words,
80              std::vector<SkUnicode::Position> graphemeBreaks,
81              std::vector<SkUnicode::LineBreakBefore> lineBreaks)
82             : fText8(text)
83             , fText16(SkSpan<const char16_t>(nullptr, 0))
84             , fWords(std::move(words))
85             , fGraphemeBreaks(std::move(graphemeBreaks))
86             , fLineBreaks(std::move(lineBreaks)) {
87         }
88 
resetSkUnicode_client::Data89         void reset() {
90             fText8 = SkSpan<const char>(nullptr, 0);
91             fText16 = SkSpan<const char16_t>(nullptr, 0);
92             fGraphemeBreaks.clear();
93             fLineBreaks.clear();
94         }
95     };
96     SkUnicode_client() = delete;
SkUnicode_client(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)97     SkUnicode_client(SkSpan<char> text,
98                      std::vector<SkUnicode::Position> words,
99                      std::vector<SkUnicode::Position> graphemeBreaks,
100                      std::vector<SkUnicode::LineBreakBefore> lineBreaks)
101             : fData(std::make_shared<Data>(text,
102                                            std::move(words),
103                                            std::move(graphemeBreaks),
104                                            std::move(lineBreaks))) { }
SkUnicode_client(const SkUnicode_client * origin)105     SkUnicode_client(const SkUnicode_client* origin)
106             : fData(origin->fData) {}
107 
108 
copy()109     std::unique_ptr<SkUnicode> copy() override {
110         return std::make_unique<SkUnicode_client>(this);
111     }
112 
113     ~SkUnicode_client() override = default;
114 
reset()115     void reset() { fData->reset(); }
116     // For SkShaper
117     std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
118                                                      SkBidiIterator::Direction dir) override;
119     std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
120                                                      int count,
121                                                      SkBidiIterator::Direction dir) override;
122     std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
123                                                        BreakType breakType) override;
124     std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
125     // For SkParagraph
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)126     bool getBidiRegions(const char utf8[],
127                         int utf8Units,
128                         TextDirection dir,
129                         std::vector<BidiRegion>* results) override {
130         return SkUnicode::extractBidi(utf8, utf8Units, dir, results);
131     }
132 
133     // TODO: Take if from the Client or hard code here?
isControl(SkUnichar utf8)134     static bool isControl(SkUnichar utf8) {
135         return (utf8 < ' ') || (utf8 >= 0x7f && utf8 <= 0x9f) ||
136                (utf8 >= 0x200D && utf8 <= 0x200F) ||
137                (utf8 >= 0x202A && utf8 <= 0x202E);
138     }
139 
isWhitespace(SkUnichar unichar)140     static bool isWhitespace(SkUnichar unichar) {
141         static constexpr std::array<SkUnichar, 21> whitespaces {
142                 0x0009, // character tabulation
143                 0x000A, // line feed
144                 0x000B, // line tabulation
145                 0x000C, // form feed
146                 0x000D, // carriage return
147                 0x0020, // space
148               //0x0085, // next line
149               //0x00A0, // no-break space
150                 0x1680, // ogham space mark
151                 0x2000, // en quad
152                 0x2001, // em quad
153                 0x2002, // en space
154                 0x2003, // em space
155                 0x2004, // three-per-em space
156                 0x2005, // four-per-em space
157                 0x2006, // six-per-em space
158               //0x2007, // figure space
159                 0x2008, // punctuation space
160                 0x2009, // thin space
161                 0x200A, // hair space
162                 0x2028, // line separator
163                 0x2029, // paragraph separator
164               //0x202F, // narrow no-break space
165                 0x205F, // medium mathematical space
166                 0x3000};// ideographic space
167         return std::find(whitespaces.begin(), whitespaces.end(), unichar) != whitespaces.end();
168     }
169 
isSpace(SkUnichar unichar)170     static bool isSpace(SkUnichar unichar) {
171         static constexpr std::array<SkUnichar, 25> spaces {
172                 0x0009, // character tabulation
173                 0x000A, // line feed
174                 0x000B, // line tabulation
175                 0x000C, // form feed
176                 0x000D, // carriage return
177                 0x0020, // space
178                 0x0085, // next line
179                 0x00A0, // no-break space
180                 0x1680, // ogham space mark
181                 0x2000, // en quad
182                 0x2001, // em quad
183                 0x2002, // en space
184                 0x2003, // em space
185                 0x2004, // three-per-em space
186                 0x2005, // four-per-em space
187                 0x2006, // six-per-em space
188                 0x2007, // figure space
189                 0x2008, // punctuation space
190                 0x2009, // thin space
191                 0x200A, // hair space
192                 0x2028, // line separator
193                 0x2029, // paragraph separator
194                 0x202F, // narrow no-break space
195                 0x205F, // medium mathematical space
196                 0x3000}; // ideographic space
197         return std::find(spaces.begin(), spaces.end(), unichar) != spaces.end();
198     }
199 
isTabulation(SkUnichar utf8)200     static bool isTabulation(SkUnichar utf8) {
201         return utf8 == '\t';
202     }
203 
isHardBreak(SkUnichar utf8)204     static bool isHardBreak(SkUnichar utf8) {
205         return utf8 == '\n';
206     }
207 
isIdeographic(SkUnichar unichar)208     static bool isIdeographic(SkUnichar unichar) {
209         static constexpr std::array<std::pair<SkUnichar, SkUnichar>, 8> ranges {{
210             {4352,   4607}, // Hangul Jamo
211             {11904, 42191}, // CJK_Radicals
212             {43072, 43135}, // Phags_Pa
213             {44032, 55215}, // Hangul_Syllables
214             {63744, 64255}, // CJK_Compatibility_Ideographs
215             {65072, 65103}, // CJK_Compatibility_Forms
216             {65381, 65500}, // Katakana_Hangul_Halfwidth
217             {131072, 196607} // Supplementary_Ideographic_Plane
218         }};
219         for (auto range : ranges) {
220             if (range.first <= unichar && range.second > unichar) {
221                 return true;
222             }
223         }
224         return false;
225     }
226 
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)227     bool computeCodeUnitFlags(char utf8[],
228                               int utf8Units,
229                               bool replaceTabs,
230                               SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
231         results->clear();
232         results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
233         for (auto& lineBreak : fData->fLineBreaks) {
234             (*results)[lineBreak.pos] |=
235                 lineBreak.breakType == LineBreakType::kHardLineBreak
236                     ? CodeUnitFlags::kHardLineBreakBefore
237                     : CodeUnitFlags::kSoftLineBreakBefore;
238         }
239         for (auto& grapheme : fData->fGraphemeBreaks) {
240             (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
241         }
242         const char* current = utf8;
243         const char* end = utf8 + utf8Units;
244         while (current < end) {
245             auto before = current - utf8;
246             SkUnichar unichar = SkUTF::NextUTF8(&current, end);
247             if (unichar < 0) unichar = 0xFFFD;
248             auto after = current - utf8;
249             if (replaceTabs && SkUnicode_client::isTabulation(unichar)) {
250                 results->at(before) |= SkUnicode::kTabulation;
251                 if (replaceTabs) {
252                     unichar = ' ';
253                     utf8[before] = ' ';
254                 }
255             }
256             for (auto i = before; i < after; ++i) {
257                 if (SkUnicode_client::isSpace(unichar)) {
258                     results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
259                 }
260                 if (SkUnicode_client::isWhitespace(unichar)) {
261                     results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
262                 }
263                 if (SkUnicode_client::isControl(unichar)) {
264                     results->at(i) |= SkUnicode::kControl;
265                 }
266                 if (SkUnicode_client::isIdeographic(unichar)) {
267                     results->at(i) |= SkUnicode::kIdeographic;
268                 }
269             }
270         }
271         return true;
272     }
273 
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)274     bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
275                           SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
276         results->clear();
277         results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
278         for (auto& lineBreak : fData->fLineBreaks) {
279             (*results)[lineBreak.pos] |=
280                 lineBreak.breakType == LineBreakType::kHardLineBreak
281                     ? CodeUnitFlags::kHardLineBreakBefore
282                     : CodeUnitFlags::kSoftLineBreakBefore;
283         }
284         for (auto& grapheme : fData->fGraphemeBreaks) {
285             (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
286         }
287         return true;
288     }
289 
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)290     bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
291         *results = fData->fWords;
292         return true;
293     }
294 
toUpper(const SkString & str)295     SkString toUpper(const SkString& str) override {
296         SkASSERT(false);
297         return SkString(fData->fText8.data(), fData->fText8.size());
298     }
299 
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])300     void reorderVisual(const BidiLevel runLevels[],
301                        int levelsCount,
302                        int32_t logicalFromVisual[]) override {
303         SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
304     }
305 private:
306     friend class SkBreakIterator_client;
307 
308     std::shared_ptr<Data> fData;
309 };
310 
311 class SkBreakIterator_client: public SkBreakIterator {
312     std::shared_ptr<SkUnicode_client::Data> fData;
313     Position fLastResult;
314     Position fStart;
315     Position fEnd;
316 public:
SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data)317     explicit SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data) : fData(data) { }
first()318     Position first() override
319       { return fData->fLineBreaks[fStart + (fLastResult = 0)].pos; }
current()320     Position current() override
321       { return fData->fLineBreaks[fStart + fLastResult].pos; }
next()322     Position next() override
323       { return fData->fLineBreaks[fStart + fLastResult + 1].pos; }
status()324     Status status() override {
325         return fData->fLineBreaks[fStart + fLastResult].breakType ==
326                        SkUnicode::LineBreakType::kHardLineBreak
327                        ? SkUnicode::CodeUnitFlags::kHardLineBreakBefore
328                        : SkUnicode::CodeUnitFlags::kSoftLineBreakBefore;
329     }
isDone()330     bool isDone() override { return fStart + fLastResult == fEnd; }
setText(const char utftext8[],int utf8Units)331     bool setText(const char utftext8[], int utf8Units) override {
332         SkASSERT(utftext8 >= fData->fText8.data() &&
333                  utf8Units <= SkToS16(fData->fText8.size()));
334         fStart = utftext8 - fData->fText8.data();
335         fEnd = fStart + utf8Units;
336         fLastResult = 0;
337         return true;
338     }
setText(const char16_t utftext16[],int utf16Units)339     bool setText(const char16_t utftext16[], int utf16Units) override {
340         SkASSERT(utftext16 >= fData->fText16.data() &&
341                  utf16Units <= SkToS16(fData->fText16.size()));
342         fStart = utftext16 - fData->fText16.data();
343         fEnd = fStart + utf16Units;
344         fLastResult = 0;
345         return true;
346     }
347 };
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)348 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const uint16_t text[], int count,
349                                                  SkBidiIterator::Direction dir) {
350     return SkUnicode::makeBidiIterator(text, count, dir);
351 }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)352 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const char text[],
353                                                  int count,
354                                                  SkBidiIterator::Direction dir) {
355     return SkUnicode::makeBidiIterator(text, count, dir);
356 }
makeBreakIterator(const char locale[],BreakType breakType)357 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(const char locale[],
358                                                    BreakType breakType) {
359     return std::make_unique<SkBreakIterator_client>(fData);
360 }
makeBreakIterator(BreakType breakType)361 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(BreakType breakType) {
362     return std::make_unique<SkBreakIterator_client>(fData);
363 }
364 
MakeClientBasedUnicode(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)365 std::unique_ptr<SkUnicode> SkUnicode::MakeClientBasedUnicode(
366         SkSpan<char> text,
367         std::vector<SkUnicode::Position> words,
368         std::vector<SkUnicode::Position> graphemeBreaks,
369         std::vector<SkUnicode::LineBreakBefore> lineBreaks) {
370     return std::make_unique<SkUnicode_client>(text, words, graphemeBreaks, lineBreaks);
371 }
372 
373