• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 #include "include/core/SkTypes.h"
8 #include "include/private/SkTFitsIn.h"
9 #include "include/private/SkTemplates.h"
10 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
11 #include "src/utils/SkUTF.h"
12 #include <unicode/umachine.h>
13 #include <cstdint>
14 #include <memory>
15 #include <string>
16 #include <utility>
17 #include <vector>
18 
19 namespace {
20 using SkUnicodeBidi = std::unique_ptr<UBiDi, SkFunctionWrapper<decltype(ubidi_close),
21                                                                SkUnicode_IcuBidi::bidi_close>>;
22 
23 class SkBidiIterator_icu : public SkBidiIterator {
24 public:
SkBidiIterator_icu(SkUnicodeBidi bidi)25     SkBidiIterator_icu(SkUnicodeBidi bidi) : fBidi(std::move(bidi)) {}
26 
getLength()27     Position getLength() override { return SkUnicode_IcuBidi::bidi_getLength(fBidi.get()); }
28 
getLevelAt(Position pos)29     Level getLevelAt(Position pos) override { return SkUnicode_IcuBidi::bidi_getLevelAt(fBidi.get(), pos); }
30 
31 private:
32     SkUnicodeBidi fBidi;
33 };
34 }  // namespace
35 
makeBidiIterator(const uint16_t utf16[],int utf16Units,SkBidiIterator::Direction dir)36 std::unique_ptr<SkBidiIterator> SkUnicode::makeBidiIterator(const uint16_t utf16[],
37                                                             int utf16Units,
38                                                             SkBidiIterator::Direction dir) {
39     UErrorCode status = U_ZERO_ERROR;
40     SkUnicodeBidi bidi(SkUnicode_IcuBidi::bidi_openSized(utf16Units, 0, &status));
41     if (U_FAILURE(status)) {
42         SkDEBUGF("Bidi error: %s", SkUnicode_IcuBidi::errorName(status));
43         return nullptr;
44     }
45     SkASSERT(bidi);
46     uint8_t bidiLevel = (dir == SkBidiIterator::kLTR) ? UBIDI_LTR : UBIDI_RTL;
47     // The required lifetime of utf16 isn't well documented.
48     // It appears it isn't used after ubidi_setPara except through ubidi_getText.
49     SkUnicode_IcuBidi::bidi_setPara(bidi.get(), (const UChar*)utf16, utf16Units, bidiLevel, nullptr, &status);
50     if (U_FAILURE(status)) {
51         SkDEBUGF("Bidi error: %s", SkUnicode_IcuBidi::errorName(status));
52         return nullptr;
53     }
54     return std::unique_ptr<SkBidiIterator>(new SkBidiIterator_icu(std::move(bidi)));
55 }
56 
makeBidiIterator(const char utf8[],int utf8Units,SkBidiIterator::Direction dir)57 std::unique_ptr<SkBidiIterator> SkUnicode::makeBidiIterator(const char utf8[],
58                                                             int utf8Units,
59                                                             SkBidiIterator::Direction dir) {
60     // Convert utf8 into utf16 since ubidi only accepts utf16
61     if (!SkTFitsIn<int32_t>(utf8Units)) {
62         SkDEBUGF("Bidi error: text too long");
63         return nullptr;
64     }
65 
66     // Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR
67     int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units);
68     if (utf16Units < 0) {
69         SkDEBUGF("Bidi error: Invalid utf8 input");
70         return nullptr;
71     }
72     std::unique_ptr<uint16_t[]> utf16(new uint16_t[utf16Units]);
73     SkDEBUGCODE(int dstLen =) SkUTF::UTF8ToUTF16(utf16.get(), utf16Units, utf8, utf8Units);
74     SkASSERT(dstLen == utf16Units);
75 
76     return makeBidiIterator(utf16.get(), utf16Units, dir);
77 }
78 
79 /** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
utf8_next(const char ** ptr,const char * end)80 static inline SkUnichar utf8_next(const char** ptr, const char* end) {
81     SkUnichar val = SkUTF::NextUTF8(ptr, end);
82     return val < 0 ? 0xFFFD : val;
83 }
84 
extractBidi(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * bidiRegions)85 bool SkUnicode::extractBidi(const char utf8[],
86                                    int utf8Units,
87                                    TextDirection dir,
88                                    std::vector<BidiRegion>* bidiRegions) {
89     // Convert to UTF16 since for now bidi iterator only operates on utf16
90     auto utf16 = SkUnicode::convertUtf8ToUtf16(utf8, utf8Units);
91 
92     // Create bidi iterator
93     UErrorCode status = U_ZERO_ERROR;
94     SkUnicodeBidi bidi(SkUnicode_IcuBidi::bidi_openSized(utf16.size(), 0, &status));
95     if (U_FAILURE(status)) {
96         SkDEBUGF("Bidi error: %s", SkUnicode_IcuBidi::errorName(status));
97         return false;
98     }
99     SkASSERT(bidi);
100     uint8_t bidiLevel = (dir == TextDirection::kLTR) ? UBIDI_LTR : UBIDI_RTL;
101     // The required lifetime of utf16 isn't well documented.
102     // It appears it isn't used after ubidi_setPara except through ubidi_getText.
103     SkUnicode_IcuBidi::bidi_setPara(bidi.get(), (const UChar*)utf16.c_str(), utf16.size(), bidiLevel, nullptr,
104                         &status);
105     if (U_FAILURE(status)) {
106         SkDEBUGF("Bidi error: %s", SkUnicode_IcuBidi::errorName(status));
107         return false;
108     }
109 
110     // Iterate through bidi regions and the result positions into utf8
111     const char* start8 = utf8;
112     const char* end8 = utf8 + utf8Units;
113     BidiLevel currentLevel = 0;
114 
115     Position pos8 = 0;
116     Position pos16 = 0;
117     Position end16 = SkUnicode_IcuBidi::bidi_getLength(bidi.get());
118 
119     if (end16 == 0) {
120         return true;
121     }
122     if (SkUnicode_IcuBidi::bidi_getDirection(bidi.get()) != UBIDI_MIXED) {
123         // The entire paragraph is unidirectional.
124         bidiRegions->emplace_back(0, utf8Units, SkUnicode_IcuBidi::bidi_getLevelAt(bidi.get(), 0));
125         return true;
126     }
127 
128     while (pos16 < end16) {
129         auto level = SkUnicode_IcuBidi::bidi_getLevelAt(bidi.get(), pos16);
130         if (pos16 == 0) {
131             currentLevel = level;
132         } else if (level != currentLevel) {
133             Position end = start8 - utf8;
134             bidiRegions->emplace_back(pos8, end, currentLevel);
135             currentLevel = level;
136             pos8 = end;
137         }
138         SkUnichar u = utf8_next(&start8, end8);
139         pos16 += SkUTF::ToUTF16(u);
140     }
141     Position end = start8 - utf8;
142     if (end != pos8) {
143         bidiRegions->emplace_back(pos8, end, currentLevel);
144     }
145     return true;
146 }
147