• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1                                            /*
2 * Copyright 2022 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 
8 #include "include/core/SkSpan.h"
9 #include "include/core/SkString.h"
10 #include "include/core/SkTypes.h"
11 #include "include/private/SkBitmaskEnum.h"
12 #include "include/private/base/SkTArray.h"
13 #include "include/private/base/SkTo.h"
14 #include "modules/skunicode/include/SkUnicode.h"
15 #include "modules/skunicode/src/SkUnicode_client.h"
16 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
17 #include "src/base/SkUTF.h"
18 
19 #include <algorithm>
20 #include <cstdint>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 
26 
27 #ifndef SK_UNICODE_ICU_IMPLEMENTATION
errorName(UErrorCode status)28 const char* SkUnicode_IcuBidi::errorName(UErrorCode status) {
29     return cl_u_errorName(status);
30 }
bidi_close(UBiDi * bidi)31 void SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) {
32     cl_ubidi_close(bidi);
33 }
bidi_getDirection(const UBiDi * bidi)34 UBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) {
35     return cl_ubidi_getDirection(bidi);
36 }
bidi_getLength(const UBiDi * bidi)37 SkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) {
38     return cl_ubidi_getLength(bidi);
39 }
bidi_getLevelAt(const UBiDi * bidi,int pos)40 SkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) {
41     return cl_ubidi_getLevelAt(bidi, pos);
42 }
bidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)43 UBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) {
44     return cl_ubidi_openSized(maxLength, maxRunCount, pErrorCode);
45 }
bidi_setPara(UBiDi * bidi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * status)46 void SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi,
47                          const UChar* text,
48                          int32_t length,
49                          UBiDiLevel paraLevel,
50                          UBiDiLevel* embeddingLevels,
51                          UErrorCode* status) {
52     return cl_ubidi_setPara(bidi, text, length, paraLevel, embeddingLevels, status);
53 }
bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])54 void SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],
55                                int levelsCount,
56                                int32_t logicalFromVisual[]) {
57     cl_ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
58 }
59 #endif
60 
61 class SkUnicode_client : public SkUnicode {
62 public:
63     struct Data {
64         SkSpan<const char> fText8;
65         SkSpan<const char16_t> fText16;
66         std::vector<Position> fWords;
67         std::vector<SkUnicode::Position> fGraphemeBreaks;
68         std::vector<SkUnicode::LineBreakBefore> fLineBreaks;
DataSkUnicode_client::Data69         Data(SkSpan<char> text,
70              std::vector<SkUnicode::Position> words,
71              std::vector<SkUnicode::Position> graphemeBreaks,
72              std::vector<SkUnicode::LineBreakBefore> lineBreaks)
73             : fText8(text)
74             , fText16(SkSpan<const char16_t>(nullptr, 0))
75             , fWords(std::move(words))
76             , fGraphemeBreaks(std::move(graphemeBreaks))
77             , fLineBreaks(std::move(lineBreaks)) {
78         }
79 
resetSkUnicode_client::Data80         void reset() {
81             fText8 = SkSpan<const char>(nullptr, 0);
82             fText16 = SkSpan<const char16_t>(nullptr, 0);
83             fGraphemeBreaks.clear();
84             fLineBreaks.clear();
85         }
86     };
87     SkUnicode_client() = delete;
SkUnicode_client(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)88     SkUnicode_client(SkSpan<char> text,
89                      std::vector<SkUnicode::Position> words,
90                      std::vector<SkUnicode::Position> graphemeBreaks,
91                      std::vector<SkUnicode::LineBreakBefore> lineBreaks)
92             : fData(std::make_shared<Data>(text,
93                                            std::move(words),
94                                            std::move(graphemeBreaks),
95                                            std::move(lineBreaks))) {}
SkUnicode_client(const SkUnicode_client * origin)96     SkUnicode_client(const SkUnicode_client* origin)
97             : fData(origin->fData) {}
98 
99 
copy()100     std::unique_ptr<SkUnicode> copy() override {
101         return std::make_unique<SkUnicode_client>(this);
102     }
103 
104     ~SkUnicode_client() override = default;
105 
reset()106     void reset() { fData->reset(); }
107     // For SkShaper
108     std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
109                                                      SkBidiIterator::Direction dir) override;
110     std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
111                                                      int count,
112                                                      SkBidiIterator::Direction dir) override;
113     std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
114                                                        BreakType breakType) override;
115     std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
116     // For SkParagraph
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)117     bool getBidiRegions(const char utf8[],
118                         int utf8Units,
119                         TextDirection dir,
120                         std::vector<BidiRegion>* results) override {
121         return SkUnicode::extractBidi(utf8, utf8Units, dir, results);
122     }
123 
124     // TODO: Take if from the Client or hard code here?
isControl(SkUnichar utf8)125     static bool isControl(SkUnichar utf8) {
126         return (utf8 < ' ') || (utf8 >= 0x7f && utf8 <= 0x9f) ||
127                (utf8 >= 0x200D && utf8 <= 0x200F) ||
128                (utf8 >= 0x202A && utf8 <= 0x202E);
129     }
130 
isWhitespace(SkUnichar unichar)131     static bool isWhitespace(SkUnichar unichar) {
132         std::u16string whitespaces =
133        u"\u0009" // character tabulation
134         "\u000A" // line feed
135         "\u000B" // line tabulation
136         "\u000C" // form feed
137         "\u000D" // carriage return
138         "\u0020" // space
139       //"\u0085" // next line
140       //"\u00A0" // no-break space
141         "\u1680" // ogham space mark
142         "\u2000" // en quad
143         "\u2001" // em quad
144         "\u2002" // en space
145         "\u2003" // em space
146         "\u2004" // three-per-em space
147         "\u2005" // four-per-em space
148         "\u2006" // six-per-em space
149       //"\u2007" // figure space
150         "\u2008" // punctuation space
151         "\u2009" // thin space
152         "\u200A" // hair space
153         "\u2028" // line separator
154         "\u2029" // paragraph separator
155       //"\u202F" // narrow no-break space
156         "\u205F" // medium mathematical space
157         "\u3000";// ideographic space
158         return whitespaces.find(unichar) != std::u16string::npos;
159     }
160 
isSpace(SkUnichar unichar)161     static bool isSpace(SkUnichar unichar) {
162         std::u16string spaces =
163        u"\u0009" // character tabulation
164         "\u000A" // line feed
165         "\u000B" // line tabulation
166         "\u000C" // form feed
167         "\u000D" // carriage return
168         "\u0020" // space
169         "\u0085" // next line
170         "\u00A0" // no-break space
171         "\u1680" // ogham space mark
172         "\u2000" // en quad
173         "\u2001" // em quad
174         "\u2002" // en space
175         "\u2003" // em space
176         "\u2004" // three-per-em space
177         "\u2005" // four-per-em space
178         "\u2006" // six-per-em space
179         "\u2007" // figure space
180         "\u2008" // punctuation space
181         "\u2009" // thin space
182         "\u200A" // hair space
183         "\u2028" // line separator
184         "\u2029" // paragraph separator
185         "\u202F" // narrow no-break space
186         "\u205F" // medium mathematical space
187         "\u3000"; // ideographic space
188         return spaces.find(unichar) != std::u16string::npos;
189     }
190 
isTabulation(SkUnichar utf8)191     static bool isTabulation(SkUnichar utf8) {
192         return utf8 == '\t';
193     }
194 
isHardBreak(SkUnichar utf8)195     static bool isHardBreak(SkUnichar utf8) {
196         return utf8 == '\n';
197     }
198 
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)199     bool computeCodeUnitFlags(char utf8[],
200                               int utf8Units,
201                               bool replaceTabs,
202                               SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
203         results->clear();
204         results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
205         for (auto& lineBreak : fData->fLineBreaks) {
206             (*results)[lineBreak.pos] |=
207                 lineBreak.breakType == LineBreakType::kHardLineBreak
208                     ? CodeUnitFlags::kHardLineBreakBefore
209                     : CodeUnitFlags::kSoftLineBreakBefore;
210         }
211         for (auto& grapheme : fData->fGraphemeBreaks) {
212             (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
213         }
214         const char* current = utf8;
215         const char* end = utf8 + utf8Units;
216         while (current < end) {
217             auto before = current - utf8;
218             SkUnichar unichar = SkUTF::NextUTF8(&current, end);
219             if (unichar < 0) unichar = 0xFFFD;
220             auto after = current - utf8;
221             if (replaceTabs && SkUnicode_client::isTabulation(unichar)) {
222                 results->at(before) |= SkUnicode::kTabulation;
223                 if (replaceTabs) {
224                     unichar = ' ';
225                     utf8[before] = ' ';
226                 }
227             }
228             for (auto i = before; i < after; ++i) {
229                 if (SkUnicode_client::isSpace(unichar)) {
230                     results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
231                 }
232                 if (SkUnicode_client::isWhitespace(unichar)) {
233                     results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
234                 }
235                 if (SkUnicode_client::isControl(unichar)) {
236                     results->at(i) |= SkUnicode::kControl;
237                 }
238             }
239         }
240         return true;
241     }
242 
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)243     bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
244                           SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
245         results->clear();
246         results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
247         for (auto& lineBreak : fData->fLineBreaks) {
248             (*results)[lineBreak.pos] |=
249                 lineBreak.breakType == LineBreakType::kHardLineBreak
250                     ? CodeUnitFlags::kHardLineBreakBefore
251                     : CodeUnitFlags::kSoftLineBreakBefore;
252         }
253         for (auto& grapheme : fData->fGraphemeBreaks) {
254             (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
255         }
256         return true;
257     }
258 
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)259     bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
260         *results = fData->fWords;
261         return true;
262     }
263 
toUpper(const SkString & str)264     SkString toUpper(const SkString& str) override {
265         SkASSERT(false);
266         return SkString(fData->fText8.data(), fData->fText8.size());
267     }
268 
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])269     void reorderVisual(const BidiLevel runLevels[],
270                        int levelsCount,
271                        int32_t logicalFromVisual[]) override {
272         SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
273     }
274 private:
275     friend class SkBreakIterator_client;
276 
277     std::shared_ptr<Data> fData;
278 };
279 
280 class SkBreakIterator_client: public SkBreakIterator {
281     std::shared_ptr<SkUnicode_client::Data> fData;
282     Position fLastResult;
283     Position fStart;
284     Position fEnd;
285 public:
SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data)286     explicit SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data) : fData(data) { }
first()287     Position first() override
288       { return fData->fLineBreaks[fStart + (fLastResult = 0)].pos; }
current()289     Position current() override
290       { return fData->fLineBreaks[fStart + fLastResult].pos; }
next()291     Position next() override
292       { return fData->fLineBreaks[fStart + fLastResult + 1].pos; }
status()293     Status status() override {
294         return fData->fLineBreaks[fStart + fLastResult].breakType ==
295                        SkUnicode::LineBreakType::kHardLineBreak
296                        ? SkUnicode::CodeUnitFlags::kHardLineBreakBefore
297                        : SkUnicode::CodeUnitFlags::kSoftLineBreakBefore;
298     }
isDone()299     bool isDone() override { return fStart + fLastResult == fEnd; }
setText(const char utftext8[],int utf8Units)300     bool setText(const char utftext8[], int utf8Units) override {
301         SkASSERT(utftext8 >= fData->fText8.data() &&
302                  utf8Units <= SkToS16(fData->fText8.size()));
303         fStart = utftext8 - fData->fText8.data();
304         fEnd = fStart + utf8Units;
305         fLastResult = 0;
306         return true;
307     }
setText(const char16_t utftext16[],int utf16Units)308     bool setText(const char16_t utftext16[], int utf16Units) override {
309         SkASSERT(utftext16 >= fData->fText16.data() &&
310                  utf16Units <= SkToS16(fData->fText16.size()));
311         fStart = utftext16 - fData->fText16.data();
312         fEnd = fStart + utf16Units;
313         fLastResult = 0;
314         return true;
315     }
316 };
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)317 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const uint16_t text[], int count,
318                                                  SkBidiIterator::Direction dir) {
319     return SkUnicode::makeBidiIterator(text, count, dir);
320 }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)321 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const char text[],
322                                                  int count,
323                                                  SkBidiIterator::Direction dir) {
324     return SkUnicode::makeBidiIterator(text, count, dir);
325 }
makeBreakIterator(const char locale[],BreakType breakType)326 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(const char locale[],
327                                                    BreakType breakType) {
328     return std::make_unique<SkBreakIterator_client>(fData);
329 }
makeBreakIterator(BreakType breakType)330 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(BreakType breakType) {
331     return std::make_unique<SkBreakIterator_client>(fData);
332 }
333 
MakeClientBasedUnicode(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)334 std::unique_ptr<SkUnicode> SkUnicode::MakeClientBasedUnicode(
335         SkSpan<char> text,
336         std::vector<SkUnicode::Position> words,
337         std::vector<SkUnicode::Position> graphemeBreaks,
338         std::vector<SkUnicode::LineBreakBefore> lineBreaks) {
339     return std::make_unique<SkUnicode_client>(text, words, graphemeBreaks, lineBreaks);
340 }
341