1 /*
2 * Copyright 2022 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/core/SkSpan.h"
9 #include "include/core/SkString.h"
10 #include "include/core/SkTypes.h"
11 #include "include/private/SkBitmaskEnum.h"
12 #include "include/private/base/SkTArray.h"
13 #include "include/private/base/SkTo.h"
14 #include "modules/skunicode/include/SkUnicode.h"
15 #include "modules/skunicode/src/SkUnicode_client.h"
16 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
17 #include "src/base/SkUTF.h"
18
19 #include <algorithm>
20 #include <cstdint>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 #include <vector>
25
26
27 #ifndef SK_UNICODE_ICU_IMPLEMENTATION
errorName(UErrorCode status)28 const char* SkUnicode_IcuBidi::errorName(UErrorCode status) {
29 return cl_u_errorName(status);
30 }
bidi_close(UBiDi * bidi)31 void SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) {
32 cl_ubidi_close(bidi);
33 }
bidi_getDirection(const UBiDi * bidi)34 UBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) {
35 return cl_ubidi_getDirection(bidi);
36 }
bidi_getLength(const UBiDi * bidi)37 SkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) {
38 return cl_ubidi_getLength(bidi);
39 }
bidi_getLevelAt(const UBiDi * bidi,int pos)40 SkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) {
41 return cl_ubidi_getLevelAt(bidi, pos);
42 }
bidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)43 UBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) {
44 return cl_ubidi_openSized(maxLength, maxRunCount, pErrorCode);
45 }
bidi_setPara(UBiDi * bidi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * status)46 void SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi,
47 const UChar* text,
48 int32_t length,
49 UBiDiLevel paraLevel,
50 UBiDiLevel* embeddingLevels,
51 UErrorCode* status) {
52 return cl_ubidi_setPara(bidi, text, length, paraLevel, embeddingLevels, status);
53 }
bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])54 void SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],
55 int levelsCount,
56 int32_t logicalFromVisual[]) {
57 cl_ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
58 }
59 #endif
60
61 class SkUnicode_client : public SkUnicode {
62 public:
63 struct Data {
64 SkSpan<const char> fText8;
65 SkSpan<const char16_t> fText16;
66 std::vector<Position> fWords;
67 std::vector<SkUnicode::Position> fGraphemeBreaks;
68 std::vector<SkUnicode::LineBreakBefore> fLineBreaks;
DataSkUnicode_client::Data69 Data(SkSpan<char> text,
70 std::vector<SkUnicode::Position> words,
71 std::vector<SkUnicode::Position> graphemeBreaks,
72 std::vector<SkUnicode::LineBreakBefore> lineBreaks)
73 : fText8(text)
74 , fText16(SkSpan<const char16_t>(nullptr, 0))
75 , fWords(std::move(words))
76 , fGraphemeBreaks(std::move(graphemeBreaks))
77 , fLineBreaks(std::move(lineBreaks)) {
78 }
79
resetSkUnicode_client::Data80 void reset() {
81 fText8 = SkSpan<const char>(nullptr, 0);
82 fText16 = SkSpan<const char16_t>(nullptr, 0);
83 fGraphemeBreaks.clear();
84 fLineBreaks.clear();
85 }
86 };
87 SkUnicode_client() = delete;
SkUnicode_client(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)88 SkUnicode_client(SkSpan<char> text,
89 std::vector<SkUnicode::Position> words,
90 std::vector<SkUnicode::Position> graphemeBreaks,
91 std::vector<SkUnicode::LineBreakBefore> lineBreaks)
92 : fData(std::make_shared<Data>(text,
93 std::move(words),
94 std::move(graphemeBreaks),
95 std::move(lineBreaks))) {}
SkUnicode_client(const SkUnicode_client * origin)96 SkUnicode_client(const SkUnicode_client* origin)
97 : fData(origin->fData) {}
98
99
copy()100 std::unique_ptr<SkUnicode> copy() override {
101 return std::make_unique<SkUnicode_client>(this);
102 }
103
104 ~SkUnicode_client() override = default;
105
reset()106 void reset() { fData->reset(); }
107 // For SkShaper
108 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
109 SkBidiIterator::Direction dir) override;
110 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
111 int count,
112 SkBidiIterator::Direction dir) override;
113 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
114 BreakType breakType) override;
115 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
116 // For SkParagraph
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)117 bool getBidiRegions(const char utf8[],
118 int utf8Units,
119 TextDirection dir,
120 std::vector<BidiRegion>* results) override {
121 return SkUnicode::extractBidi(utf8, utf8Units, dir, results);
122 }
123
124 // TODO: Take if from the Client or hard code here?
isControl(SkUnichar utf8)125 static bool isControl(SkUnichar utf8) {
126 return (utf8 < ' ') || (utf8 >= 0x7f && utf8 <= 0x9f) ||
127 (utf8 >= 0x200D && utf8 <= 0x200F) ||
128 (utf8 >= 0x202A && utf8 <= 0x202E);
129 }
130
isWhitespace(SkUnichar unichar)131 static bool isWhitespace(SkUnichar unichar) {
132 std::u16string whitespaces =
133 u"\u0009" // character tabulation
134 "\u000A" // line feed
135 "\u000B" // line tabulation
136 "\u000C" // form feed
137 "\u000D" // carriage return
138 "\u0020" // space
139 //"\u0085" // next line
140 //"\u00A0" // no-break space
141 "\u1680" // ogham space mark
142 "\u2000" // en quad
143 "\u2001" // em quad
144 "\u2002" // en space
145 "\u2003" // em space
146 "\u2004" // three-per-em space
147 "\u2005" // four-per-em space
148 "\u2006" // six-per-em space
149 //"\u2007" // figure space
150 "\u2008" // punctuation space
151 "\u2009" // thin space
152 "\u200A" // hair space
153 "\u2028" // line separator
154 "\u2029" // paragraph separator
155 //"\u202F" // narrow no-break space
156 "\u205F" // medium mathematical space
157 "\u3000";// ideographic space
158 return whitespaces.find(unichar) != std::u16string::npos;
159 }
160
isSpace(SkUnichar unichar)161 static bool isSpace(SkUnichar unichar) {
162 std::u16string spaces =
163 u"\u0009" // character tabulation
164 "\u000A" // line feed
165 "\u000B" // line tabulation
166 "\u000C" // form feed
167 "\u000D" // carriage return
168 "\u0020" // space
169 "\u0085" // next line
170 "\u00A0" // no-break space
171 "\u1680" // ogham space mark
172 "\u2000" // en quad
173 "\u2001" // em quad
174 "\u2002" // en space
175 "\u2003" // em space
176 "\u2004" // three-per-em space
177 "\u2005" // four-per-em space
178 "\u2006" // six-per-em space
179 "\u2007" // figure space
180 "\u2008" // punctuation space
181 "\u2009" // thin space
182 "\u200A" // hair space
183 "\u2028" // line separator
184 "\u2029" // paragraph separator
185 "\u202F" // narrow no-break space
186 "\u205F" // medium mathematical space
187 "\u3000"; // ideographic space
188 return spaces.find(unichar) != std::u16string::npos;
189 }
190
isTabulation(SkUnichar utf8)191 static bool isTabulation(SkUnichar utf8) {
192 return utf8 == '\t';
193 }
194
isHardBreak(SkUnichar utf8)195 static bool isHardBreak(SkUnichar utf8) {
196 return utf8 == '\n';
197 }
198
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)199 bool computeCodeUnitFlags(char utf8[],
200 int utf8Units,
201 bool replaceTabs,
202 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
203 results->clear();
204 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
205 for (auto& lineBreak : fData->fLineBreaks) {
206 (*results)[lineBreak.pos] |=
207 lineBreak.breakType == LineBreakType::kHardLineBreak
208 ? CodeUnitFlags::kHardLineBreakBefore
209 : CodeUnitFlags::kSoftLineBreakBefore;
210 }
211 for (auto& grapheme : fData->fGraphemeBreaks) {
212 (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
213 }
214 const char* current = utf8;
215 const char* end = utf8 + utf8Units;
216 while (current < end) {
217 auto before = current - utf8;
218 SkUnichar unichar = SkUTF::NextUTF8(¤t, end);
219 if (unichar < 0) unichar = 0xFFFD;
220 auto after = current - utf8;
221 if (replaceTabs && SkUnicode_client::isTabulation(unichar)) {
222 results->at(before) |= SkUnicode::kTabulation;
223 if (replaceTabs) {
224 unichar = ' ';
225 utf8[before] = ' ';
226 }
227 }
228 for (auto i = before; i < after; ++i) {
229 if (SkUnicode_client::isSpace(unichar)) {
230 results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
231 }
232 if (SkUnicode_client::isWhitespace(unichar)) {
233 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
234 }
235 if (SkUnicode_client::isControl(unichar)) {
236 results->at(i) |= SkUnicode::kControl;
237 }
238 }
239 }
240 return true;
241 }
242
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,SkTArray<SkUnicode::CodeUnitFlags,true> * results)243 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
244 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
245 results->clear();
246 results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
247 for (auto& lineBreak : fData->fLineBreaks) {
248 (*results)[lineBreak.pos] |=
249 lineBreak.breakType == LineBreakType::kHardLineBreak
250 ? CodeUnitFlags::kHardLineBreakBefore
251 : CodeUnitFlags::kSoftLineBreakBefore;
252 }
253 for (auto& grapheme : fData->fGraphemeBreaks) {
254 (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
255 }
256 return true;
257 }
258
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)259 bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
260 *results = fData->fWords;
261 return true;
262 }
263
toUpper(const SkString & str)264 SkString toUpper(const SkString& str) override {
265 SkASSERT(false);
266 return SkString(fData->fText8.data(), fData->fText8.size());
267 }
268
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])269 void reorderVisual(const BidiLevel runLevels[],
270 int levelsCount,
271 int32_t logicalFromVisual[]) override {
272 SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
273 }
274 private:
275 friend class SkBreakIterator_client;
276
277 std::shared_ptr<Data> fData;
278 };
279
280 class SkBreakIterator_client: public SkBreakIterator {
281 std::shared_ptr<SkUnicode_client::Data> fData;
282 Position fLastResult;
283 Position fStart;
284 Position fEnd;
285 public:
SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data)286 explicit SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data) : fData(data) { }
first()287 Position first() override
288 { return fData->fLineBreaks[fStart + (fLastResult = 0)].pos; }
current()289 Position current() override
290 { return fData->fLineBreaks[fStart + fLastResult].pos; }
next()291 Position next() override
292 { return fData->fLineBreaks[fStart + fLastResult + 1].pos; }
status()293 Status status() override {
294 return fData->fLineBreaks[fStart + fLastResult].breakType ==
295 SkUnicode::LineBreakType::kHardLineBreak
296 ? SkUnicode::CodeUnitFlags::kHardLineBreakBefore
297 : SkUnicode::CodeUnitFlags::kSoftLineBreakBefore;
298 }
isDone()299 bool isDone() override { return fStart + fLastResult == fEnd; }
setText(const char utftext8[],int utf8Units)300 bool setText(const char utftext8[], int utf8Units) override {
301 SkASSERT(utftext8 >= fData->fText8.data() &&
302 utf8Units <= SkToS16(fData->fText8.size()));
303 fStart = utftext8 - fData->fText8.data();
304 fEnd = fStart + utf8Units;
305 fLastResult = 0;
306 return true;
307 }
setText(const char16_t utftext16[],int utf16Units)308 bool setText(const char16_t utftext16[], int utf16Units) override {
309 SkASSERT(utftext16 >= fData->fText16.data() &&
310 utf16Units <= SkToS16(fData->fText16.size()));
311 fStart = utftext16 - fData->fText16.data();
312 fEnd = fStart + utf16Units;
313 fLastResult = 0;
314 return true;
315 }
316 };
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)317 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const uint16_t text[], int count,
318 SkBidiIterator::Direction dir) {
319 return SkUnicode::makeBidiIterator(text, count, dir);
320 }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)321 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const char text[],
322 int count,
323 SkBidiIterator::Direction dir) {
324 return SkUnicode::makeBidiIterator(text, count, dir);
325 }
makeBreakIterator(const char locale[],BreakType breakType)326 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(const char locale[],
327 BreakType breakType) {
328 return std::make_unique<SkBreakIterator_client>(fData);
329 }
makeBreakIterator(BreakType breakType)330 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(BreakType breakType) {
331 return std::make_unique<SkBreakIterator_client>(fData);
332 }
333
MakeClientBasedUnicode(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)334 std::unique_ptr<SkUnicode> SkUnicode::MakeClientBasedUnicode(
335 SkSpan<char> text,
336 std::vector<SkUnicode::Position> words,
337 std::vector<SkUnicode::Position> graphemeBreaks,
338 std::vector<SkUnicode::LineBreakBefore> lineBreaks) {
339 return std::make_unique<SkUnicode_client>(text, words, graphemeBreaks, lineBreaks);
340 }
341