1 /*
2 * Copyright 2022 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 #include "modules/skunicode/include/SkUnicode_client.h"
8
9 #include "include/core/SkSpan.h"
10 #include "include/core/SkString.h"
11 #include "include/core/SkTypes.h"
12 #include "include/private/base/SkTArray.h"
13 #include "include/private/base/SkTo.h"
14 #include "modules/skunicode/include/SkUnicode.h"
15 #include "modules/skunicode/src/SkBidiFactory_icu_subset.h"
16 #include "modules/skunicode/src/SkUnicode_hardcoded.h"
17 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
18 #include "src/base/SkBitmaskEnum.h"
19 #include "src/base/SkUTF.h"
20
21 #include <algorithm>
22 #include <cstdint>
23 #include <memory>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 #include <array>
28 #include <unicode/ubidi.h>
29 #include <unicode/ubrk.h>
30 #include <unicode/uchar.h>
31 #include <unicode/uloc.h>
32 #include <unicode/uscript.h>
33 #include <unicode/ustring.h>
34 #include <unicode/utext.h>
35 #include <unicode/utypes.h>
36
37 using namespace skia_private;
38
39 class SkUnicode_client : public SkUnicodeHardCodedCharProperties {
40 public:
41 struct Data {
42 SkSpan<const char> fText8;
43 SkSpan<const char16_t> fText16;
44 std::vector<Position> fWords;
45 std::vector<SkUnicode::Position> fGraphemeBreaks;
46 std::vector<SkUnicode::LineBreakBefore> fLineBreaks;
DataSkUnicode_client::Data47 Data(SkSpan<char> text,
48 std::vector<SkUnicode::Position> words,
49 std::vector<SkUnicode::Position> graphemeBreaks,
50 std::vector<SkUnicode::LineBreakBefore> lineBreaks)
51 : fText8(text)
52 , fText16(SkSpan<const char16_t>(nullptr, 0))
53 , fWords(std::move(words))
54 , fGraphemeBreaks(std::move(graphemeBreaks))
55 , fLineBreaks(std::move(lineBreaks)) {
56 }
57
resetSkUnicode_client::Data58 void reset() {
59 fText8 = SkSpan<const char>(nullptr, 0);
60 fText16 = SkSpan<const char16_t>(nullptr, 0);
61 fGraphemeBreaks.clear();
62 fLineBreaks.clear();
63 }
64 };
65 SkUnicode_client() = delete;
SkUnicode_client(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)66 SkUnicode_client(SkSpan<char> text,
67 std::vector<SkUnicode::Position> words,
68 std::vector<SkUnicode::Position> graphemeBreaks,
69 std::vector<SkUnicode::LineBreakBefore> lineBreaks)
70 : fData(std::make_shared<Data>(text,
71 std::move(words),
72 std::move(graphemeBreaks),
73 std::move(lineBreaks))) { }
74
75 ~SkUnicode_client() override = default;
76
reset()77 void reset() { fData->reset(); }
78 // For SkShaper
79 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
80 SkBidiIterator::Direction dir) override;
81 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
82 int count,
83 SkBidiIterator::Direction dir) override;
84 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
85 BreakType breakType) override;
86 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
87 // For SkParagraph
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)88 bool getBidiRegions(const char utf8[],
89 int utf8Units,
90 TextDirection dir,
91 std::vector<BidiRegion>* results) override {
92 return fBidiFact->ExtractBidi(utf8, utf8Units, dir, results);
93 }
94
getUtf8Words(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)95 bool getUtf8Words(const char utf8[],
96 int utf8Units,
97 const char* locale,
98 std::vector<Position>* results) override {
99 SkDEBUGF("Method 'getUtf8Words' is not implemented\n");
100 return false;
101 }
102
getSentences(const char utf8[],int utf8Units,const char * locale,std::vector<SkUnicode::Position> * results)103 bool getSentences(const char utf8[],
104 int utf8Units,
105 const char* locale,
106 std::vector<SkUnicode::Position>* results) override {
107 SkDEBUGF("Method 'getSentences' is not implemented\n");
108 return false;
109 }
110
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,TArray<SkUnicode::CodeUnitFlags,true> * results)111 bool computeCodeUnitFlags(char utf8[],
112 int utf8Units,
113 bool replaceTabs,
114 TArray<SkUnicode::CodeUnitFlags, true>* results) override {
115 results->clear();
116 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
117 for (auto& lineBreak : fData->fLineBreaks) {
118 (*results)[lineBreak.pos] |=
119 lineBreak.breakType == LineBreakType::kHardLineBreak
120 ? CodeUnitFlags::kHardLineBreakBefore
121 : CodeUnitFlags::kSoftLineBreakBefore;
122 }
123 for (auto& grapheme : fData->fGraphemeBreaks) {
124 (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
125 }
126 const char* current = utf8;
127 const char* end = utf8 + utf8Units;
128 while (current < end) {
129 auto before = current - utf8;
130 SkUnichar unichar = SkUTF::NextUTF8(¤t, end);
131 if (unichar < 0) unichar = 0xFFFD;
132 auto after = current - utf8;
133 if (replaceTabs && this->isTabulation(unichar)) {
134 results->at(before) |= SkUnicode::kTabulation;
135 if (replaceTabs) {
136 unichar = ' ';
137 utf8[before] = ' ';
138 }
139 }
140 for (auto i = before; i < after; ++i) {
141 if (this->isSpace(unichar)) {
142 results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
143 }
144 if (this->isWhitespace(unichar)) {
145 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
146 }
147 if (this->isControl(unichar)) {
148 results->at(i) |= SkUnicode::kControl;
149 }
150 if (this->isIdeographic(unichar)) {
151 results->at(i) |= SkUnicode::kIdeographic;
152 }
153 }
154 }
155 return true;
156 }
157
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,TArray<SkUnicode::CodeUnitFlags,true> * results)158 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
159 TArray<SkUnicode::CodeUnitFlags, true>* results) override {
160 results->clear();
161 results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
162 for (auto& lineBreak : fData->fLineBreaks) {
163 (*results)[lineBreak.pos] |=
164 lineBreak.breakType == LineBreakType::kHardLineBreak
165 ? CodeUnitFlags::kHardLineBreakBefore
166 : CodeUnitFlags::kSoftLineBreakBefore;
167 }
168 for (auto& grapheme : fData->fGraphemeBreaks) {
169 (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
170 }
171 for (auto i = 0; i < utf16Units; ++i) {
172 auto unichar = utf16[i];
173 if (this->isSpace(unichar)) {
174 results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
175 }
176 if (this->isWhitespace(unichar)) {
177 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
178 }
179 if (this->isControl(unichar)) {
180 results->at(i) |= SkUnicode::kControl;
181 }
182 if (this->isIdeographic(unichar)) {
183 results->at(i) |= SkUnicode::kIdeographic;
184 }
185 }
186 return true;
187 }
188
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)189 bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
190 *results = fData->fWords;
191 return true;
192 }
193
toUpper(const SkString & str)194 SkString toUpper(const SkString& str) override {
195 return this->toUpper(str, nullptr);
196 }
197
toUpper(const SkString & str,const char * locale)198 SkString toUpper(const SkString& str, const char* locale) override {
199 return SkString(fData->fText8.data(), fData->fText8.size());
200 }
201
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])202 void reorderVisual(const BidiLevel runLevels[],
203 int levelsCount,
204 int32_t logicalFromVisual[]) override {
205 fBidiFact->bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
206 }
207 private:
208 friend class SkBreakIterator_client;
209
210 std::shared_ptr<Data> fData;
211 sk_sp<SkBidiFactory> fBidiFact = sk_make_sp<SkBidiSubsetFactory>();
212 };
213
214 class SkBreakIterator_client: public SkBreakIterator {
215 std::shared_ptr<SkUnicode_client::Data> fData;
216 Position fLastResult;
217 Position fStart;
218 Position fEnd;
219 public:
SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data)220 explicit SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data) : fData(data) { }
first()221 Position first() override
222 { return fData->fLineBreaks[fStart + (fLastResult = 0)].pos; }
current()223 Position current() override
224 { return fData->fLineBreaks[fStart + fLastResult].pos; }
next()225 Position next() override
226 { return fData->fLineBreaks[fStart + fLastResult + 1].pos; }
status()227 Status status() override {
228 return fData->fLineBreaks[fStart + fLastResult].breakType ==
229 SkUnicode::LineBreakType::kHardLineBreak
230 ? SkUnicode::CodeUnitFlags::kHardLineBreakBefore
231 : SkUnicode::CodeUnitFlags::kSoftLineBreakBefore;
232 }
isDone()233 bool isDone() override { return fStart + fLastResult == fEnd; }
setText(const char utftext8[],int utf8Units)234 bool setText(const char utftext8[], int utf8Units) override {
235 SkASSERT(utftext8 >= fData->fText8.data() &&
236 utf8Units <= SkToS16(fData->fText8.size()));
237 fStart = utftext8 - fData->fText8.data();
238 fEnd = fStart + utf8Units;
239 fLastResult = 0;
240 return true;
241 }
setText(const char16_t utftext16[],int utf16Units)242 bool setText(const char16_t utftext16[], int utf16Units) override {
243 SkASSERT(utftext16 >= fData->fText16.data() &&
244 utf16Units <= SkToS16(fData->fText16.size()));
245 fStart = utftext16 - fData->fText16.data();
246 fEnd = fStart + utf16Units;
247 fLastResult = 0;
248 return true;
249 }
250 };
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)251 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const uint16_t text[], int count,
252 SkBidiIterator::Direction dir) {
253 return fBidiFact->MakeIterator(text, count, dir);
254 }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)255 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const char text[],
256 int count,
257 SkBidiIterator::Direction dir) {
258 return fBidiFact->MakeIterator(text, count, dir);
259 }
makeBreakIterator(const char locale[],BreakType breakType)260 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(const char locale[],
261 BreakType breakType) {
262 return std::make_unique<SkBreakIterator_client>(fData);
263 }
makeBreakIterator(BreakType breakType)264 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(BreakType breakType) {
265 return std::make_unique<SkBreakIterator_client>(fData);
266 }
267
268 namespace SkUnicodes::Client {
Make(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)269 sk_sp<SkUnicode> Make(
270 SkSpan<char> text,
271 std::vector<SkUnicode::Position> words,
272 std::vector<SkUnicode::Position> graphemeBreaks,
273 std::vector<SkUnicode::LineBreakBefore> lineBreaks) {
274 return sk_make_sp<SkUnicode_client>(text,
275 std::move(words),
276 std::move(graphemeBreaks),
277 std::move(lineBreaks));
278 }
279 }
280
281
282