1 /* 2 * Copyright 2020 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 #ifndef SkUnicode_DEFINED 8 #define SkUnicode_DEFINED 9 #include "include/core/SkSpan.h" 10 #include "include/core/SkString.h" 11 #include "include/core/SkTypes.h" 12 #include "include/private/SkBitmaskEnum.h" // IWYU pragma: keep 13 #include "include/private/base/SkTArray.h" 14 #include "src/base/SkUTF.h" 15 16 #include <cstddef> 17 #include <cstdint> 18 #include <memory> 19 #include <string> 20 #include <vector> 21 22 #if !defined(SKUNICODE_IMPLEMENTATION) 23 #define SKUNICODE_IMPLEMENTATION 0 24 #endif 25 26 #if !defined(SKUNICODE_API) 27 #if defined(SKUNICODE_DLL) 28 #if defined(_MSC_VER) 29 #if SKUNICODE_IMPLEMENTATION 30 #define SKUNICODE_API __declspec(dllexport) 31 #else 32 #define SKUNICODE_API __declspec(dllimport) 33 #endif 34 #else 35 #define SKUNICODE_API __attribute__((visibility("default"))) 36 #endif 37 #else 38 #define SKUNICODE_API 39 #endif 40 #endif 41 42 class SKUNICODE_API SkBidiIterator { 43 public: 44 typedef int32_t Position; 45 typedef uint8_t Level; 46 struct Region { RegionRegion47 Region(Position start, Position end, Level level) 48 : start(start), end(end), level(level) { } 49 Position start; 50 Position end; 51 Level level; 52 }; 53 enum Direction { 54 kLTR, 55 kRTL, 56 }; 57 virtual ~SkBidiIterator() = default; 58 virtual Position getLength() = 0; 59 virtual Level getLevelAt(Position) = 0; 60 }; 61 62 class SKUNICODE_API SkBreakIterator { 63 public: 64 typedef int32_t Position; 65 typedef int32_t Status; 66 virtual ~SkBreakIterator() = default; 67 virtual Position first() = 0; 68 virtual Position current() = 0; 69 virtual Position next() = 0; 70 virtual Status status() = 0; 71 virtual bool isDone() = 0; 72 virtual bool setText(const char utftext8[], int utf8Units) = 0; 73 virtual bool setText(const char16_t utftext16[], int utf16Units) = 0; 74 }; 75 76 class SKUNICODE_API SkUnicode { 77 public: 78 enum CodeUnitFlags { 79 kNoCodeUnitFlag = 0x00, 80 kPartOfWhiteSpaceBreak = 0x01, 81 kGraphemeStart = 0x02, 82 kSoftLineBreakBefore = 0x04, 83 kHardLineBreakBefore = 0x08, 84 kPartOfIntraWordBreak = 0x10, 85 kControl = 0x20, 86 kTabulation = 0x40, 87 kGlyphClusterStart = 0x80, 88 }; 89 enum class TextDirection { 90 kLTR, 91 kRTL, 92 }; 93 typedef size_t Position; 94 typedef uint8_t BidiLevel; 95 struct BidiRegion { BidiRegionBidiRegion96 BidiRegion(Position start, Position end, BidiLevel level) 97 : start(start), end(end), level(level) { } 98 Position start; 99 Position end; 100 BidiLevel level; 101 }; 102 enum class LineBreakType { 103 kSoftLineBreak = 0, 104 kHardLineBreak = 100, 105 }; 106 107 enum class BreakType { 108 kWords, 109 kGraphemes, 110 kLines 111 }; 112 struct LineBreakBefore { LineBreakBeforeLineBreakBefore113 LineBreakBefore(Position pos, LineBreakType breakType) 114 : pos(pos), breakType(breakType) { } 115 Position pos; 116 LineBreakType breakType; 117 }; 118 119 virtual ~SkUnicode() = default; 120 121 virtual SkString toUpper(const SkString&) = 0; 122 123 // Methods used in SkShaper and SkText 124 virtual std::unique_ptr<SkBidiIterator> makeBidiIterator 125 (const uint16_t text[], int count, SkBidiIterator::Direction) = 0; 126 virtual std::unique_ptr<SkBidiIterator> makeBidiIterator 127 (const char text[], int count, SkBidiIterator::Direction) = 0; 128 virtual std::unique_ptr<SkBreakIterator> makeBreakIterator 129 (const char locale[], BreakType breakType) = 0; 130 virtual std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) = 0; 131 132 // Methods used in SkParagraph 133 static bool isTabulation(SkUnicode::CodeUnitFlags flags); 134 static bool isHardLineBreak(SkUnicode::CodeUnitFlags flags); 135 static bool isSoftLineBreak(SkUnicode::CodeUnitFlags flags); 136 static bool isGraphemeStart(SkUnicode::CodeUnitFlags flags); 137 static bool isControl(SkUnicode::CodeUnitFlags flags); 138 static bool isPartOfWhiteSpaceBreak(SkUnicode::CodeUnitFlags flags); 139 static bool extractBidi(const char utf8[], 140 int utf8Units, 141 TextDirection dir, 142 std::vector<BidiRegion>* bidiRegions); 143 virtual bool getBidiRegions(const char utf8[], 144 int utf8Units, 145 TextDirection dir, 146 std::vector<BidiRegion>* results) = 0; 147 virtual bool getWords(const char utf8[], int utf8Units, const char* locale, 148 std::vector<Position>* results) = 0; 149 virtual bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs, 150 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 151 virtual bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs, 152 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 153 154 static SkString convertUtf16ToUtf8(const char16_t * utf16, int utf16Units); 155 static SkString convertUtf16ToUtf8(const std::u16string& utf16); 156 static std::u16string convertUtf8ToUtf16(const char* utf8, int utf8Units); 157 static std::u16string convertUtf8ToUtf16(const SkString& utf8); 158 159 template <typename Appender8, typename Appender16> extractUtfConversionMapping(SkSpan<const char> utf8,Appender8 && appender8,Appender16 && appender16)160 static bool extractUtfConversionMapping(SkSpan<const char> utf8, Appender8&& appender8, Appender16&& appender16) { 161 size_t size8 = 0; 162 size_t size16 = 0; 163 auto ptr = utf8.begin(); 164 auto end = utf8.end(); 165 while (ptr < end) { 166 167 size_t index = ptr - utf8.begin(); 168 SkUnichar u = SkUTF::NextUTF8(&ptr, end); 169 170 // All UTF8 code units refer to the same codepoint 171 size_t next = ptr - utf8.begin(); 172 for (auto i = index; i < next; ++i) { 173 //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); 174 appender16(size8); 175 ++size16; 176 } 177 //SkASSERT(fUTF16IndexForUTF8Index.size() == next); 178 SkASSERT(size16 == next); 179 if (size16 != next) { 180 return false; 181 } 182 183 // One or two UTF16 code units refer to the same codepoint 184 uint16_t buffer[2]; 185 size_t count = SkUTF::ToUTF16(u, buffer); 186 //fUTF8IndexForUTF16Index.emplace_back(index); 187 appender8(index); 188 ++size8; 189 if (count > 1) { 190 //fUTF8IndexForUTF16Index.emplace_back(index); 191 appender8(index); 192 ++size8; 193 } 194 } 195 //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); 196 appender16(size8); 197 ++size16; 198 //fUTF8IndexForUTF16Index.emplace_back(fText.size()); 199 appender8(utf8.size()); 200 ++size8; 201 202 return true; 203 } 204 205 template <typename Callback> forEachCodepoint(const char * utf8,int32_t utf8Units,Callback && callback)206 void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) { 207 const char* current = utf8; 208 const char* end = utf8 + utf8Units; 209 while (current < end) { 210 auto before = current - utf8; 211 SkUnichar unichar = SkUTF::NextUTF8(¤t, end); 212 if (unichar < 0) unichar = 0xFFFD; 213 auto after = current - utf8; 214 uint16_t buffer[2]; 215 size_t count = SkUTF::ToUTF16(unichar, buffer); 216 callback(unichar, before, after, count); 217 } 218 } 219 220 template <typename Callback> forEachCodepoint(const char16_t * utf16,int32_t utf16Units,Callback && callback)221 void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) { 222 const char16_t* current = utf16; 223 const char16_t* end = utf16 + utf16Units; 224 while (current < end) { 225 auto before = current - utf16; 226 SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)¤t, (const uint16_t*)end); 227 auto after = current - utf16; 228 callback(unichar, before, after); 229 } 230 } 231 232 template <typename Callback> forEachBidiRegion(const uint16_t utf16[],int utf16Units,SkBidiIterator::Direction dir,Callback && callback)233 void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) { 234 auto iter = makeBidiIterator(utf16, utf16Units, dir); 235 const uint16_t* start16 = utf16; 236 const uint16_t* end16 = utf16 + utf16Units; 237 SkBidiIterator::Level currentLevel = 0; 238 239 SkBidiIterator::Position pos16 = 0; 240 while (pos16 <= iter->getLength()) { 241 auto level = iter->getLevelAt(pos16); 242 if (pos16 == 0) { 243 currentLevel = level; 244 } else if (level != currentLevel) { 245 callback(pos16, start16 - utf16, currentLevel); 246 currentLevel = level; 247 } 248 if (start16 == end16) { 249 break; 250 } 251 SkUnichar u = SkUTF::NextUTF16(&start16, end16); 252 pos16 += SkUTF::ToUTF16(u); 253 } 254 } 255 256 template <typename Callback> forEachBreak(const char16_t utf16[],int utf16Units,SkUnicode::BreakType type,Callback && callback)257 void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) { 258 auto iter = makeBreakIterator(type); 259 iter->setText(utf16, utf16Units); 260 auto pos = iter->first(); 261 do { 262 callback(pos, iter->status()); 263 pos = iter->next(); 264 } while (!iter->isDone()); 265 } 266 267 virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0; 268 269 virtual std::unique_ptr<SkUnicode> copy() = 0; 270 271 static std::unique_ptr<SkUnicode> Make(); 272 273 static std::unique_ptr<SkUnicode> MakeIcuBasedUnicode(); 274 275 static std::unique_ptr<SkUnicode> MakeClientBasedUnicode( 276 SkSpan<char> text, 277 std::vector<SkUnicode::Position> words, 278 std::vector<SkUnicode::Position> graphemeBreaks, 279 std::vector<SkUnicode::LineBreakBefore> lineBreaks); 280 }; 281 282 namespace sknonstd { 283 template <> struct is_bitmask_enum<SkUnicode::CodeUnitFlags> : std::true_type {}; 284 } // namespace sknonstd 285 #endif // SkUnicode_DEFINED 286