1 /*
2 * Copyright 2022 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 #include "include/core/SkSpan.h"
8 #include "include/core/SkString.h"
9 #include "include/core/SkTypes.h"
10 #include "include/private/SkBitmaskEnum.h"
11 #include "include/private/SkTArray.h"
12 #include "include/private/SkTo.h"
13 #include "modules/skunicode/include/SkUnicode.h"
14 #include "modules/skunicode/src/SkUnicode_client.h"
15 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
16 #include "src/utils/SkUTF.h"
17
18 #include <algorithm>
19 #include <cstdint>
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #include <array>
25 #include <unicode/ubidi.h>
26 #include <unicode/ubrk.h>
27 #include <unicode/uchar.h>
28 #include <unicode/uloc.h>
29 #include <unicode/uscript.h>
30 #include <unicode/ustring.h>
31 #include <unicode/utext.h>
32 #include <unicode/utypes.h>
33
34
35 #ifndef SK_UNICODE_ICU_IMPLEMENTATION
36
errorName(UErrorCode status)37 const char* SkUnicode_IcuBidi::errorName(UErrorCode status) {
38 return u_errorName_skia(status);
39 }
bidi_close(UBiDi * bidi)40 void SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) {
41 ubidi_close_skia(bidi);
42 }
bidi_getDirection(const UBiDi * bidi)43 UBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) {
44 return ubidi_getDirection_skia(bidi);
45 }
bidi_getLength(const UBiDi * bidi)46 SkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) {
47 return ubidi_getLength_skia(bidi);
48 }
bidi_getLevelAt(const UBiDi * bidi,int pos)49 SkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) {
50 return ubidi_getLevelAt_skia(bidi, pos);
51 }
bidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)52 UBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) {
53 return ubidi_openSized_skia(maxLength, maxRunCount, pErrorCode);
54 }
bidi_setPara(UBiDi * bidi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * status)55 void SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi,
56 const UChar* text,
57 int32_t length,
58 UBiDiLevel paraLevel,
59 UBiDiLevel* embeddingLevels,
60 UErrorCode* status) {
61 return ubidi_setPara_skia(bidi, text, length, paraLevel, embeddingLevels, status);
62 }
bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])63 void SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],
64 int levelsCount,
65 int32_t logicalFromVisual[]) {
66 ubidi_reorderVisual_skia(runLevels, levelsCount, logicalFromVisual);
67 }
68 #endif
69
70 class SkUnicode_client : public SkUnicode {
71 public:
72 struct Data {
73 SkSpan<const char> fText8;
74 SkSpan<const char16_t> fText16;
75 std::vector<Position> fWords;
76 std::vector<SkUnicode::Position> fGraphemeBreaks;
77 std::vector<SkUnicode::LineBreakBefore> fLineBreaks;
DataSkUnicode_client::Data78 Data(SkSpan<char> text,
79 std::vector<SkUnicode::Position> words,
80 std::vector<SkUnicode::Position> graphemeBreaks,
81 std::vector<SkUnicode::LineBreakBefore> lineBreaks)
82 : fText8(text)
83 , fText16(SkSpan<const char16_t>(nullptr, 0))
84 , fWords(std::move(words))
85 , fGraphemeBreaks(std::move(graphemeBreaks))
86 , fLineBreaks(std::move(lineBreaks)) {
87 }
88
resetSkUnicode_client::Data89 void reset() {
90 fText8 = SkSpan<const char>(nullptr, 0);
91 fText16 = SkSpan<const char16_t>(nullptr, 0);
92 fGraphemeBreaks.clear();
93 fLineBreaks.clear();
94 }
95 };
96 SkUnicode_client() = delete;
SkUnicode_client(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)97 SkUnicode_client(SkSpan<char> text,
98 std::vector<SkUnicode::Position> words,
99 std::vector<SkUnicode::Position> graphemeBreaks,
100 std::vector<SkUnicode::LineBreakBefore> lineBreaks)
101 : fData(std::make_shared<Data>(text,
102 std::move(words),
103 std::move(graphemeBreaks),
104 std::move(lineBreaks))) { }
SkUnicode_client(const SkUnicode_client * origin)105 SkUnicode_client(const SkUnicode_client* origin)
106 : fData(origin->fData) {}
107
108
copy()109 std::unique_ptr<SkUnicode> copy() override {
110 return std::make_unique<SkUnicode_client>(this);
111 }
112
113 ~SkUnicode_client() override = default;
114
reset()115 void reset() { fData->reset(); }
116 // For SkShaper
117 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
118 SkBidiIterator::Direction dir) override;
119 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
120 int count,
121 SkBidiIterator::Direction dir) override;
122 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
123 BreakType breakType) override;
124 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
125 // For SkParagraph
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)126 bool getBidiRegions(const char utf8[],
127 int utf8Units,
128 TextDirection dir,
129 std::vector<BidiRegion>* results) override {
130 return SkUnicode::extractBidi(utf8, utf8Units, dir, results);
131 }
132
133 // TODO: Take if from the Client or hard code here?
isControl(SkUnichar utf8)134 static bool isControl(SkUnichar utf8) {
135 return (utf8 < ' ') || (utf8 >= 0x7f && utf8 <= 0x9f) ||
136 (utf8 >= 0x200D && utf8 <= 0x200F) ||
137 (utf8 >= 0x202A && utf8 <= 0x202E);
138 }
139
isWhitespace(SkUnichar unichar)140 static bool isWhitespace(SkUnichar unichar) {
141 static constexpr std::array<SkUnichar, 21> whitespaces {
142 0x0009, // character tabulation
143 0x000A, // line feed
144 0x000B, // line tabulation
145 0x000C, // form feed
146 0x000D, // carriage return
147 0x0020, // space
148 //0x0085, // next line
149 //0x00A0, // no-break space
150 0x1680, // ogham space mark
151 0x2000, // en quad
152 0x2001, // em quad
153 0x2002, // en space
154 0x2003, // em space
155 0x2004, // three-per-em space
156 0x2005, // four-per-em space
157 0x2006, // six-per-em space
158 //0x2007, // figure space
159 0x2008, // punctuation space
160 0x2009, // thin space
161 0x200A, // hair space
162 0x2028, // line separator
163 0x2029, // paragraph separator
164 //0x202F, // narrow no-break space
165 0x205F, // medium mathematical space
166 0x3000};// ideographic space
167 return std::find(whitespaces.begin(), whitespaces.end(), unichar) != whitespaces.end();
168 }
169
isSpace(SkUnichar unichar)170 static bool isSpace(SkUnichar unichar) {
171 static constexpr std::array<SkUnichar, 25> spaces {
172 0x0009, // character tabulation
173 0x000A, // line feed
174 0x000B, // line tabulation
175 0x000C, // form feed
176 0x000D, // carriage return
177 0x0020, // space
178 0x0085, // next line
179 0x00A0, // no-break space
180 0x1680, // ogham space mark
181 0x2000, // en quad
182 0x2001, // em quad
183 0x2002, // en space
184 0x2003, // em space
185 0x2004, // three-per-em space
186 0x2005, // four-per-em space
187 0x2006, // six-per-em space
188 0x2007, // figure space
189 0x2008, // punctuation space
190 0x2009, // thin space
191 0x200A, // hair space
192 0x2028, // line separator
193 0x2029, // paragraph separator
194 0x202F, // narrow no-break space
195 0x205F, // medium mathematical space
196 0x3000}; // ideographic space
197 return std::find(spaces.begin(), spaces.end(), unichar) != spaces.end();
198 }
199
isTabulation(SkUnichar utf8)200 static bool isTabulation(SkUnichar utf8) {
201 return utf8 == '\t';
202 }
203
isHardBreak(SkUnichar utf8)204 static bool isHardBreak(SkUnichar utf8) {
205 return utf8 == '\n';
206 }
207
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)208 bool computeCodeUnitFlags(char utf8[],
209 int utf8Units,
210 bool replaceTabs,
211 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
212 results->clear();
213 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
214 for (auto& lineBreak : fData->fLineBreaks) {
215 (*results)[lineBreak.pos] |=
216 lineBreak.breakType == LineBreakType::kHardLineBreak
217 ? CodeUnitFlags::kHardLineBreakBefore
218 : CodeUnitFlags::kSoftLineBreakBefore;
219 }
220 for (auto& grapheme : fData->fGraphemeBreaks) {
221 (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
222 }
223 const char* current = utf8;
224 const char* end = utf8 + utf8Units;
225 while (current < end) {
226 auto before = current - utf8;
227 SkUnichar unichar = SkUTF::NextUTF8(¤t, end);
228 if (unichar < 0) unichar = 0xFFFD;
229 auto after = current - utf8;
230 if (replaceTabs && SkUnicode_client::isTabulation(unichar)) {
231 results->at(before) |= SkUnicode::kTabulation;
232 if (replaceTabs) {
233 unichar = ' ';
234 utf8[before] = ' ';
235 }
236 }
237 for (auto i = before; i < after; ++i) {
238 if (SkUnicode_client::isSpace(unichar)) {
239 results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
240 }
241 if (SkUnicode_client::isWhitespace(unichar)) {
242 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
243 }
244 if (SkUnicode_client::isControl(unichar)) {
245 results->at(i) |= SkUnicode::kControl;
246 }
247 }
248 }
249 return true;
250 }
251
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)252 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
253 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
254 results->clear();
255 results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
256 for (auto& lineBreak : fData->fLineBreaks) {
257 (*results)[lineBreak.pos] |=
258 lineBreak.breakType == LineBreakType::kHardLineBreak
259 ? CodeUnitFlags::kHardLineBreakBefore
260 : CodeUnitFlags::kSoftLineBreakBefore;
261 }
262 for (auto& grapheme : fData->fGraphemeBreaks) {
263 (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
264 }
265 return true;
266 }
267
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)268 bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
269 *results = fData->fWords;
270 return true;
271 }
272
toUpper(const SkString & str)273 SkString toUpper(const SkString& str) override {
274 SkASSERT(false);
275 return SkString(fData->fText8.data(), fData->fText8.size());
276 }
277
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])278 void reorderVisual(const BidiLevel runLevels[],
279 int levelsCount,
280 int32_t logicalFromVisual[]) override {
281 SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
282 }
283 private:
284 friend class SkBreakIterator_client;
285
286 std::shared_ptr<Data> fData;
287 };
288
289 class SkBreakIterator_client: public SkBreakIterator {
290 std::shared_ptr<SkUnicode_client::Data> fData;
291 Position fLastResult;
292 Position fStart;
293 Position fEnd;
294 public:
SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data)295 explicit SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data) : fData(data) { }
first()296 Position first() override
297 { return fData->fLineBreaks[fStart + (fLastResult = 0)].pos; }
current()298 Position current() override
299 { return fData->fLineBreaks[fStart + fLastResult].pos; }
next()300 Position next() override
301 { return fData->fLineBreaks[fStart + fLastResult + 1].pos; }
status()302 Status status() override {
303 return fData->fLineBreaks[fStart + fLastResult].breakType ==
304 SkUnicode::LineBreakType::kHardLineBreak
305 ? SkUnicode::CodeUnitFlags::kHardLineBreakBefore
306 : SkUnicode::CodeUnitFlags::kSoftLineBreakBefore;
307 }
isDone()308 bool isDone() override { return fStart + fLastResult == fEnd; }
setText(const char utftext8[],int utf8Units)309 bool setText(const char utftext8[], int utf8Units) override {
310 SkASSERT(utftext8 >= fData->fText8.data() &&
311 utf8Units <= SkToS16(fData->fText8.size()));
312 fStart = utftext8 - fData->fText8.data();
313 fEnd = fStart + utf8Units;
314 fLastResult = 0;
315 return true;
316 }
setText(const char16_t utftext16[],int utf16Units)317 bool setText(const char16_t utftext16[], int utf16Units) override {
318 SkASSERT(utftext16 >= fData->fText16.data() &&
319 utf16Units <= SkToS16(fData->fText16.size()));
320 fStart = utftext16 - fData->fText16.data();
321 fEnd = fStart + utf16Units;
322 fLastResult = 0;
323 return true;
324 }
325 };
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)326 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const uint16_t text[], int count,
327 SkBidiIterator::Direction dir) {
328 return SkUnicode::makeBidiIterator(text, count, dir);
329 }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)330 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const char text[],
331 int count,
332 SkBidiIterator::Direction dir) {
333 return SkUnicode::makeBidiIterator(text, count, dir);
334 }
makeBreakIterator(const char locale[],BreakType breakType)335 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(const char locale[],
336 BreakType breakType) {
337 return std::make_unique<SkBreakIterator_client>(fData);
338 }
makeBreakIterator(BreakType breakType)339 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(BreakType breakType) {
340 return std::make_unique<SkBreakIterator_client>(fData);
341 }
342
MakeClientBasedUnicode(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)343 std::unique_ptr<SkUnicode> SkUnicode::MakeClientBasedUnicode(
344 SkSpan<char> text,
345 std::vector<SkUnicode::Position> words,
346 std::vector<SkUnicode::Position> graphemeBreaks,
347 std::vector<SkUnicode::LineBreakBefore> lineBreaks) {
348 return std::make_unique<SkUnicode_client>(text, words, graphemeBreaks, lineBreaks);
349 }
350
351