• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1                                            /*
2 * Copyright 2022 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 #include "include/core/SkSpan.h"
8 #include "include/core/SkString.h"
9 #include "include/core/SkTypes.h"
10 #include "include/private/SkBitmaskEnum.h"
11 #include "include/private/SkTArray.h"
12 #include "include/private/SkTo.h"
13 #include "modules/skunicode/include/SkUnicode.h"
14 #include "modules/skunicode/src/SkUnicode_client.h"
15 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
16 #include "src/utils/SkUTF.h"
17 
18 #include <algorithm>
19 #include <cstdint>
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #include <array>
25 #include <unicode/ubidi.h>
26 #include <unicode/ubrk.h>
27 #include <unicode/uchar.h>
28 #include <unicode/uloc.h>
29 #include <unicode/uscript.h>
30 #include <unicode/ustring.h>
31 #include <unicode/utext.h>
32 #include <unicode/utypes.h>
33 
34 
35 #ifndef SK_UNICODE_ICU_IMPLEMENTATION
36 
errorName(UErrorCode status)37 const char* SkUnicode_IcuBidi::errorName(UErrorCode status) {
38     return u_errorName_skia(status);
39 }
bidi_close(UBiDi * bidi)40 void SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) {
41     ubidi_close_skia(bidi);
42 }
bidi_getDirection(const UBiDi * bidi)43 UBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) {
44     return ubidi_getDirection_skia(bidi);
45 }
bidi_getLength(const UBiDi * bidi)46 SkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) {
47     return ubidi_getLength_skia(bidi);
48 }
bidi_getLevelAt(const UBiDi * bidi,int pos)49 SkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) {
50     return ubidi_getLevelAt_skia(bidi, pos);
51 }
bidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)52 UBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) {
53     return ubidi_openSized_skia(maxLength, maxRunCount, pErrorCode);
54 }
bidi_setPara(UBiDi * bidi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * status)55 void SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi,
56                          const UChar* text,
57                          int32_t length,
58                          UBiDiLevel paraLevel,
59                          UBiDiLevel* embeddingLevels,
60                          UErrorCode* status) {
61     return ubidi_setPara_skia(bidi, text, length, paraLevel, embeddingLevels, status);
62 }
bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])63 void SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],
64                                int levelsCount,
65                                int32_t logicalFromVisual[]) {
66     ubidi_reorderVisual_skia(runLevels, levelsCount, logicalFromVisual);
67 }
68 #endif
69 
70 class SkUnicode_client : public SkUnicode {
71 public:
72     struct Data {
73         SkSpan<const char> fText8;
74         SkSpan<const char16_t> fText16;
75         std::vector<Position> fWords;
76         std::vector<SkUnicode::Position> fGraphemeBreaks;
77         std::vector<SkUnicode::LineBreakBefore> fLineBreaks;
DataSkUnicode_client::Data78         Data(SkSpan<char> text,
79              std::vector<SkUnicode::Position> words,
80              std::vector<SkUnicode::Position> graphemeBreaks,
81              std::vector<SkUnicode::LineBreakBefore> lineBreaks)
82             : fText8(text)
83             , fText16(SkSpan<const char16_t>(nullptr, 0))
84             , fWords(std::move(words))
85             , fGraphemeBreaks(std::move(graphemeBreaks))
86             , fLineBreaks(std::move(lineBreaks)) {
87         }
88 
resetSkUnicode_client::Data89         void reset() {
90             fText8 = SkSpan<const char>(nullptr, 0);
91             fText16 = SkSpan<const char16_t>(nullptr, 0);
92             fGraphemeBreaks.clear();
93             fLineBreaks.clear();
94         }
95     };
96     SkUnicode_client() = delete;
SkUnicode_client(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)97     SkUnicode_client(SkSpan<char> text,
98                      std::vector<SkUnicode::Position> words,
99                      std::vector<SkUnicode::Position> graphemeBreaks,
100                      std::vector<SkUnicode::LineBreakBefore> lineBreaks)
101             : fData(std::make_shared<Data>(text,
102                                            std::move(words),
103                                            std::move(graphemeBreaks),
104                                            std::move(lineBreaks))) { }
SkUnicode_client(const SkUnicode_client * origin)105     SkUnicode_client(const SkUnicode_client* origin)
106             : fData(origin->fData) {}
107 
108 
copy()109     std::unique_ptr<SkUnicode> copy() override {
110         return std::make_unique<SkUnicode_client>(this);
111     }
112 
113     ~SkUnicode_client() override = default;
114 
reset()115     void reset() { fData->reset(); }
116     // For SkShaper
117     std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
118                                                      SkBidiIterator::Direction dir) override;
119     std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
120                                                      int count,
121                                                      SkBidiIterator::Direction dir) override;
122     std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
123                                                        BreakType breakType) override;
124     std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
125     // For SkParagraph
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)126     bool getBidiRegions(const char utf8[],
127                         int utf8Units,
128                         TextDirection dir,
129                         std::vector<BidiRegion>* results) override {
130         return SkUnicode::extractBidi(utf8, utf8Units, dir, results);
131     }
132 
133     // TODO: Take if from the Client or hard code here?
isControl(SkUnichar utf8)134     static bool isControl(SkUnichar utf8) {
135         return (utf8 < ' ') || (utf8 >= 0x7f && utf8 <= 0x9f) ||
136                (utf8 >= 0x200D && utf8 <= 0x200F) ||
137                (utf8 >= 0x202A && utf8 <= 0x202E);
138     }
139 
isWhitespace(SkUnichar unichar)140     static bool isWhitespace(SkUnichar unichar) {
141         static constexpr std::array<SkUnichar, 21> whitespaces {
142                 0x0009, // character tabulation
143                 0x000A, // line feed
144                 0x000B, // line tabulation
145                 0x000C, // form feed
146                 0x000D, // carriage return
147                 0x0020, // space
148               //0x0085, // next line
149               //0x00A0, // no-break space
150                 0x1680, // ogham space mark
151                 0x2000, // en quad
152                 0x2001, // em quad
153                 0x2002, // en space
154                 0x2003, // em space
155                 0x2004, // three-per-em space
156                 0x2005, // four-per-em space
157                 0x2006, // six-per-em space
158               //0x2007, // figure space
159                 0x2008, // punctuation space
160                 0x2009, // thin space
161                 0x200A, // hair space
162                 0x2028, // line separator
163                 0x2029, // paragraph separator
164               //0x202F, // narrow no-break space
165                 0x205F, // medium mathematical space
166                 0x3000};// ideographic space
167         return std::find(whitespaces.begin(), whitespaces.end(), unichar) != whitespaces.end();
168     }
169 
isSpace(SkUnichar unichar)170     static bool isSpace(SkUnichar unichar) {
171         static constexpr std::array<SkUnichar, 25> spaces {
172                 0x0009, // character tabulation
173                 0x000A, // line feed
174                 0x000B, // line tabulation
175                 0x000C, // form feed
176                 0x000D, // carriage return
177                 0x0020, // space
178                 0x0085, // next line
179                 0x00A0, // no-break space
180                 0x1680, // ogham space mark
181                 0x2000, // en quad
182                 0x2001, // em quad
183                 0x2002, // en space
184                 0x2003, // em space
185                 0x2004, // three-per-em space
186                 0x2005, // four-per-em space
187                 0x2006, // six-per-em space
188                 0x2007, // figure space
189                 0x2008, // punctuation space
190                 0x2009, // thin space
191                 0x200A, // hair space
192                 0x2028, // line separator
193                 0x2029, // paragraph separator
194                 0x202F, // narrow no-break space
195                 0x205F, // medium mathematical space
196                 0x3000}; // ideographic space
197         return std::find(spaces.begin(), spaces.end(), unichar) != spaces.end();
198     }
199 
isTabulation(SkUnichar utf8)200     static bool isTabulation(SkUnichar utf8) {
201         return utf8 == '\t';
202     }
203 
isHardBreak(SkUnichar utf8)204     static bool isHardBreak(SkUnichar utf8) {
205         return utf8 == '\n';
206     }
207 
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)208     bool computeCodeUnitFlags(char utf8[],
209                               int utf8Units,
210                               bool replaceTabs,
211                               SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
212         results->clear();
213         results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
214         for (auto& lineBreak : fData->fLineBreaks) {
215             (*results)[lineBreak.pos] |=
216                 lineBreak.breakType == LineBreakType::kHardLineBreak
217                     ? CodeUnitFlags::kHardLineBreakBefore
218                     : CodeUnitFlags::kSoftLineBreakBefore;
219         }
220         for (auto& grapheme : fData->fGraphemeBreaks) {
221             (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
222         }
223         const char* current = utf8;
224         const char* end = utf8 + utf8Units;
225         while (current < end) {
226             auto before = current - utf8;
227             SkUnichar unichar = SkUTF::NextUTF8(&current, end);
228             if (unichar < 0) unichar = 0xFFFD;
229             auto after = current - utf8;
230             if (replaceTabs && SkUnicode_client::isTabulation(unichar)) {
231                 results->at(before) |= SkUnicode::kTabulation;
232                 if (replaceTabs) {
233                     unichar = ' ';
234                     utf8[before] = ' ';
235                 }
236             }
237             for (auto i = before; i < after; ++i) {
238                 if (SkUnicode_client::isSpace(unichar)) {
239                     results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
240                 }
241                 if (SkUnicode_client::isWhitespace(unichar)) {
242                     results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
243                 }
244                 if (SkUnicode_client::isControl(unichar)) {
245                     results->at(i) |= SkUnicode::kControl;
246                 }
247             }
248         }
249         return true;
250     }
251 
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)252     bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
253                           SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
254         results->clear();
255         results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
256         for (auto& lineBreak : fData->fLineBreaks) {
257             (*results)[lineBreak.pos] |=
258                 lineBreak.breakType == LineBreakType::kHardLineBreak
259                     ? CodeUnitFlags::kHardLineBreakBefore
260                     : CodeUnitFlags::kSoftLineBreakBefore;
261         }
262         for (auto& grapheme : fData->fGraphemeBreaks) {
263             (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
264         }
265         return true;
266     }
267 
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)268     bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
269         *results = fData->fWords;
270         return true;
271     }
272 
toUpper(const SkString & str)273     SkString toUpper(const SkString& str) override {
274         SkASSERT(false);
275         return SkString(fData->fText8.data(), fData->fText8.size());
276     }
277 
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])278     void reorderVisual(const BidiLevel runLevels[],
279                        int levelsCount,
280                        int32_t logicalFromVisual[]) override {
281         SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
282     }
283 private:
284     friend class SkBreakIterator_client;
285 
286     std::shared_ptr<Data> fData;
287 };
288 
289 class SkBreakIterator_client: public SkBreakIterator {
290     std::shared_ptr<SkUnicode_client::Data> fData;
291     Position fLastResult;
292     Position fStart;
293     Position fEnd;
294 public:
SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data)295     explicit SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data) : fData(data) { }
first()296     Position first() override
297       { return fData->fLineBreaks[fStart + (fLastResult = 0)].pos; }
current()298     Position current() override
299       { return fData->fLineBreaks[fStart + fLastResult].pos; }
next()300     Position next() override
301       { return fData->fLineBreaks[fStart + fLastResult + 1].pos; }
status()302     Status status() override {
303         return fData->fLineBreaks[fStart + fLastResult].breakType ==
304                        SkUnicode::LineBreakType::kHardLineBreak
305                        ? SkUnicode::CodeUnitFlags::kHardLineBreakBefore
306                        : SkUnicode::CodeUnitFlags::kSoftLineBreakBefore;
307     }
isDone()308     bool isDone() override { return fStart + fLastResult == fEnd; }
setText(const char utftext8[],int utf8Units)309     bool setText(const char utftext8[], int utf8Units) override {
310         SkASSERT(utftext8 >= fData->fText8.data() &&
311                  utf8Units <= SkToS16(fData->fText8.size()));
312         fStart = utftext8 - fData->fText8.data();
313         fEnd = fStart + utf8Units;
314         fLastResult = 0;
315         return true;
316     }
setText(const char16_t utftext16[],int utf16Units)317     bool setText(const char16_t utftext16[], int utf16Units) override {
318         SkASSERT(utftext16 >= fData->fText16.data() &&
319                  utf16Units <= SkToS16(fData->fText16.size()));
320         fStart = utftext16 - fData->fText16.data();
321         fEnd = fStart + utf16Units;
322         fLastResult = 0;
323         return true;
324     }
325 };
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)326 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const uint16_t text[], int count,
327                                                  SkBidiIterator::Direction dir) {
328     return SkUnicode::makeBidiIterator(text, count, dir);
329 }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)330 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const char text[],
331                                                  int count,
332                                                  SkBidiIterator::Direction dir) {
333     return SkUnicode::makeBidiIterator(text, count, dir);
334 }
makeBreakIterator(const char locale[],BreakType breakType)335 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(const char locale[],
336                                                    BreakType breakType) {
337     return std::make_unique<SkBreakIterator_client>(fData);
338 }
makeBreakIterator(BreakType breakType)339 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(BreakType breakType) {
340     return std::make_unique<SkBreakIterator_client>(fData);
341 }
342 
MakeClientBasedUnicode(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)343 std::unique_ptr<SkUnicode> SkUnicode::MakeClientBasedUnicode(
344         SkSpan<char> text,
345         std::vector<SkUnicode::Position> words,
346         std::vector<SkUnicode::Position> graphemeBreaks,
347         std::vector<SkUnicode::LineBreakBefore> lineBreaks) {
348     return std::make_unique<SkUnicode_client>(text, words, graphemeBreaks, lineBreaks);
349 }
350 
351