• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
8 
9 #include "include/core/SkTypes.h"
10 #include "include/private/base/SkDebug.h"
11 #include "include/private/base/SkTFitsIn.h"
12 #include "src/base/SkUTF.h"
13 
14 #include <unicode/ubidi.h>
15 #include <unicode/umachine.h>
16 #include <unicode/utypes.h>
17 
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
24 namespace {
25 using SkUnicodeBidi = std::unique_ptr<UBiDi, SkBidiFactory::BidiCloseCallback>;
26 
27 class SkBidiIterator_icu : public SkBidiIterator {
28 public:
SkBidiIterator_icu(SkUnicodeBidi bidi,sk_sp<SkBidiFactory> fact)29     SkBidiIterator_icu(SkUnicodeBidi bidi, sk_sp<SkBidiFactory> fact)
30             : fBidi(std::move(bidi)), fBidiFact(std::move(fact)) {}
31 
getLength()32     Position getLength() override { return fBidiFact->bidi_getLength(fBidi.get()); }
33 
getLevelAt(Position pos)34     Level getLevelAt(Position pos) override { return fBidiFact->bidi_getLevelAt(fBidi.get(), pos); }
35 
36 private:
37     SkUnicodeBidi fBidi;
38     sk_sp<SkBidiFactory> fBidiFact;
39 };
40 }  // namespace
41 
42 #ifdef ENABLE_DRAWING_ADAPTER
43 namespace SkiaRsText {
44 #endif
MakeIterator(const uint16_t utf16[],int utf16Units,SkBidiIterator::Direction dir) const45 std::unique_ptr<SkBidiIterator> SkBidiFactory::MakeIterator(const uint16_t utf16[],
46                                                             int utf16Units,
47                                                             SkBidiIterator::Direction dir) const {
48     UErrorCode status = U_ZERO_ERROR;
49     SkUnicodeBidi bidi(this->bidi_openSized(utf16Units, 0, &status), this->bidi_close_callback());
50     if (U_FAILURE(status)) {
51         SkDEBUGF("Bidi error: %s", this->errorName(status));
52         return nullptr;
53     }
54     SkASSERT(bidi);
55     uint8_t bidiLevel = (dir == SkBidiIterator::kLTR) ? UBIDI_LTR : UBIDI_RTL;
56     // The required lifetime of utf16 isn't well documented.
57     // It appears it isn't used after ubidi_setPara except through ubidi_getText.
58     this->bidi_setPara(bidi.get(), (const UChar*)utf16, utf16Units, bidiLevel, nullptr, &status);
59     if (U_FAILURE(status)) {
60         SkDEBUGF("Bidi error: %s", this->errorName(status));
61         return nullptr;
62     }
63     return std::unique_ptr<SkBidiIterator>(
64             new SkBidiIterator_icu(std::move(bidi), sk_ref_sp(this)));
65 }
66 
MakeIterator(const char utf8[],int utf8Units,SkBidiIterator::Direction dir) const67 std::unique_ptr<SkBidiIterator> SkBidiFactory::MakeIterator(const char utf8[],
68                                                             int utf8Units,
69                                                             SkBidiIterator::Direction dir) const {
70     // Convert utf8 into utf16 since ubidi only accepts utf16
71     if (!SkTFitsIn<int32_t>(utf8Units)) {
72         SkDEBUGF("Bidi error: text too long");
73         return nullptr;
74     }
75 
76     // Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR
77     int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units);
78     if (utf16Units < 0) {
79         SkDEBUGF("Bidi error: Invalid utf8 input");
80         return nullptr;
81     }
82     std::unique_ptr<uint16_t[]> utf16(new uint16_t[utf16Units]);
83     SkDEBUGCODE(int dstLen =) SkUTF::UTF8ToUTF16(utf16.get(), utf16Units, utf8, utf8Units);
84     SkASSERT(dstLen == utf16Units);
85 
86     return MakeIterator(utf16.get(), utf16Units, dir);
87 }
88 
89 /** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
utf8_next(const char ** ptr,const char * end)90 static inline SkUnichar utf8_next(const char** ptr, const char* end) {
91     SkUnichar val = SkUTF::NextUTF8(ptr, end);
92     return val < 0 ? 0xFFFD : val;
93 }
94 
ExtractBidi(const char utf8[],int utf8Units,SkUnicode::TextDirection dir,std::vector<SkUnicode::BidiRegion> * bidiRegions) const95 bool SkBidiFactory::ExtractBidi(const char utf8[],
96                                 int utf8Units,
97                                 SkUnicode::TextDirection dir,
98                                 std::vector<SkUnicode::BidiRegion>* bidiRegions) const {
99     // Convert to UTF16 since for now bidi iterator only operates on utf16
100     auto utf16 = SkUnicode::convertUtf8ToUtf16(utf8, utf8Units);
101 
102     // Create bidi iterator
103     UErrorCode status = U_ZERO_ERROR;
104     SkUnicodeBidi bidi(this->bidi_openSized(utf16.size(), 0, &status), this->bidi_close_callback());
105     if (U_FAILURE(status)) {
106         SkDEBUGF("Bidi error: %s", this->errorName(status));
107         return false;
108     }
109     SkASSERT(bidi);
110     uint8_t bidiLevel = (dir == SkUnicode::TextDirection::kLTR) ? UBIDI_LTR : UBIDI_RTL;
111     // The required lifetime of utf16 isn't well documented.
112     // It appears it isn't used after ubidi_setPara except through ubidi_getText.
113     this->bidi_setPara(
114             bidi.get(), (const UChar*)utf16.c_str(), utf16.size(), bidiLevel, nullptr, &status);
115     if (U_FAILURE(status)) {
116         SkDEBUGF("Bidi error: %s", this->errorName(status));
117         return false;
118     }
119 
120     // Iterate through bidi regions and the result positions into utf8
121     const char* start8 = utf8;
122     const char* end8 = utf8 + utf8Units;
123     SkUnicode::BidiLevel currentLevel = 0;
124 
125     SkUnicode::Position pos8 = 0;
126     SkUnicode::Position pos16 = 0;
127     SkUnicode::Position end16 = this->bidi_getLength(bidi.get());
128 
129     if (end16 == 0) {
130         return true;
131     }
132     if (this->bidi_getDirection(bidi.get()) != UBIDI_MIXED) {
133         // The entire paragraph is unidirectional.
134         bidiRegions->emplace_back(0, utf8Units, this->bidi_getLevelAt(bidi.get(), 0));
135         return true;
136     }
137 
138     while (pos16 < end16) {
139         auto level = this->bidi_getLevelAt(bidi.get(), pos16);
140         if (pos16 == 0) {
141             currentLevel = level;
142         } else if (level != currentLevel) {
143             SkUnicode::Position end = start8 - utf8;
144             bidiRegions->emplace_back(pos8, end, currentLevel);
145             currentLevel = level;
146             pos8 = end;
147         }
148         SkUnichar u = utf8_next(&start8, end8);
149         pos16 += SkUTF::ToUTF16(u);
150     }
151     SkUnicode::Position end = start8 - utf8;
152     if (end != pos8) {
153         bidiRegions->emplace_back(pos8, end, currentLevel);
154     }
155     return true;
156 }
157 #ifdef ENABLE_DRAWING_ADAPTER
158 }
159 #endif // ENABLE_DRAWING_ADAPTER