1 /* 2 * Copyright 2020 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 #ifndef SkUnicode_DEFINED 8 #define SkUnicode_DEFINED 9 #include "include/core/SkRefCnt.h" 10 #include "include/core/SkSpan.h" 11 #include "include/core/SkString.h" 12 #include "include/core/SkTypes.h" 13 #include "include/private/base/SkTArray.h" 14 #include "include/private/base/SkTo.h" 15 #include "src/base/SkUTF.h" 16 #include <cstddef> 17 #include <cstdint> 18 #include <memory> 19 #include <string> 20 #include <vector> 21 namespace sknonstd { template <typename T> struct is_bitmask_enum; } 22 23 #if !defined(SKUNICODE_IMPLEMENTATION) 24 #define SKUNICODE_IMPLEMENTATION 0 25 #endif 26 27 #if !defined(SKUNICODE_API) 28 #if defined(SKUNICODE_DLL) 29 #if defined(_MSC_VER) 30 #if SKUNICODE_IMPLEMENTATION 31 #define SKUNICODE_API __declspec(dllexport) 32 #else 33 #define SKUNICODE_API __declspec(dllimport) 34 #endif 35 #else 36 #define SKUNICODE_API __attribute__((visibility("default"))) 37 #endif 38 #else 39 #define SKUNICODE_API 40 #endif 41 #endif 42 43 class SKUNICODE_API SkBidiIterator { 44 public: 45 typedef int32_t Position; 46 typedef uint8_t Level; 47 struct Region { RegionRegion48 Region(Position start, Position end, Level level) 49 : start(start), end(end), level(level) { } 50 Position start; 51 Position end; 52 Level level; 53 }; 54 enum Direction { 55 kLTR, 56 kRTL, 57 }; 58 virtual ~SkBidiIterator() = default; 59 virtual Position getLength() = 0; 60 virtual Level getLevelAt(Position) = 0; 61 }; 62 63 class SKUNICODE_API SkBreakIterator { 64 public: 65 typedef int32_t Position; 66 typedef int32_t Status; 67 virtual ~SkBreakIterator() = default; 68 virtual Position first() = 0; 69 virtual Position current() = 0; 70 virtual Position next() = 0; 71 virtual Status status() = 0; 72 virtual bool isDone() = 0; 73 virtual bool setText(const char utftext8[], int utf8Units) = 0; 74 virtual bool setText(const char16_t utftext16[], int utf16Units) = 0; 75 }; 76 77 class SKUNICODE_API SkUnicode : public SkRefCnt { 78 public: 79 enum CodeUnitFlags { 80 kNoCodeUnitFlag = 0x00, 81 kPartOfWhiteSpaceBreak = 0x01, 82 kGraphemeStart = 0x02, 83 kSoftLineBreakBefore = 0x04, 84 kHardLineBreakBefore = 0x08, 85 kPartOfIntraWordBreak = 0x10, 86 kControl = 0x20, 87 kTabulation = 0x40, 88 kGlyphClusterStart = 0x80, 89 kIdeographic = 0x100, 90 kEmoji = 0x200, 91 kWordBreak = 0x400, 92 kSentenceBreak = 0x800, 93 #ifdef ENABLE_TEXT_ENHANCE 94 kCombine = 0x1000, 95 kPunctuation = 0x2000, 96 kEllipsis = 0x4000, 97 #endif 98 }; 99 enum class TextDirection { 100 kLTR, 101 kRTL, 102 }; 103 typedef size_t Position; 104 typedef uint8_t BidiLevel; 105 struct BidiRegion { BidiRegionBidiRegion106 BidiRegion(Position start, Position end, BidiLevel level) 107 : start(start), end(end), level(level) { } 108 Position start; 109 Position end; 110 BidiLevel level; 111 }; 112 enum class LineBreakType { 113 kSoftLineBreak = 0, 114 kHardLineBreak = 100, 115 }; 116 117 enum class BreakType { kWords, kGraphemes, kLines, kSentences }; 118 struct LineBreakBefore { LineBreakBeforeLineBreakBefore119 LineBreakBefore(Position pos, LineBreakType breakType) 120 : pos(pos), breakType(breakType) { } 121 Position pos; 122 LineBreakType breakType; 123 }; 124 125 ~SkUnicode() override = default; 126 127 // deprecated 128 virtual SkString toUpper(const SkString&) = 0; 129 virtual SkString toUpper(const SkString&, const char* locale) = 0; 130 131 virtual bool isControl(SkUnichar utf8) = 0; 132 virtual bool isWhitespace(SkUnichar utf8) = 0; 133 virtual bool isSpace(SkUnichar utf8) = 0; 134 virtual bool isTabulation(SkUnichar utf8) = 0; 135 virtual bool isHardBreak(SkUnichar utf8) = 0; 136 /** 137 * Returns if a code point may start an emoji sequence. 138 * Returns true for '#', '*', and '0'-'9' since they may start an emoji sequence. 139 * To determine if a list of code points begins with an emoji sequence, use 140 * getEmojiSequence. 141 **/ 142 virtual bool isEmoji(SkUnichar utf8) = 0; 143 virtual bool isEmojiComponent(SkUnichar utf8) = 0; 144 virtual bool isEmojiModifierBase(SkUnichar utf8) = 0; 145 virtual bool isEmojiModifier(SkUnichar utf8) = 0; 146 virtual bool isRegionalIndicator(SkUnichar utf8) = 0; 147 virtual bool isIdeographic(SkUnichar utf8) = 0; 148 149 // Methods used in SkShaper and SkText 150 virtual std::unique_ptr<SkBidiIterator> makeBidiIterator 151 (const uint16_t text[], int count, SkBidiIterator::Direction) = 0; 152 virtual std::unique_ptr<SkBidiIterator> makeBidiIterator 153 (const char text[], int count, SkBidiIterator::Direction) = 0; 154 virtual std::unique_ptr<SkBreakIterator> makeBreakIterator 155 (const char locale[], BreakType breakType) = 0; 156 virtual std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) = 0; 157 158 // Methods used in SkParagraph 159 static bool hasTabulationFlag(SkUnicode::CodeUnitFlags flags); 160 static bool hasHardLineBreakFlag(SkUnicode::CodeUnitFlags flags); 161 static bool hasSoftLineBreakFlag(SkUnicode::CodeUnitFlags flags); 162 static bool hasGraphemeStartFlag(SkUnicode::CodeUnitFlags flags); 163 static bool hasControlFlag(SkUnicode::CodeUnitFlags flags); 164 static bool hasPartOfWhiteSpaceBreakFlag(SkUnicode::CodeUnitFlags flags); 165 #ifdef ENABLE_TEXT_ENHANCE 166 static bool isPunctuation(SkUnichar utf8); 167 static bool isEllipsis(SkUnichar utf8); 168 #endif 169 170 static bool extractBidi(const char utf8[], 171 int utf8Units, 172 TextDirection dir, 173 std::vector<BidiRegion>* bidiRegions); 174 virtual bool getBidiRegions(const char utf8[], 175 int utf8Units, 176 TextDirection dir, 177 std::vector<BidiRegion>* results) = 0; 178 // Returns results in utf16 179 virtual bool getWords(const char utf8[], int utf8Units, const char* locale, 180 std::vector<Position>* results) = 0; 181 virtual bool getUtf8Words(const char utf8[], 182 int utf8Units, 183 const char* locale, 184 std::vector<Position>* results) = 0; 185 virtual bool getSentences(const char utf8[], 186 int utf8Units, 187 const char* locale, 188 std::vector<Position>* results) = 0; 189 #ifdef ENABLE_TEXT_ENHANCE 190 virtual bool computeCodeUnitFlags( 191 char utf8[], int utf8Units, bool replaceTabs, const char locale[], 192 skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 193 virtual bool computeCodeUnitFlags( 194 char16_t utf16[], int utf16Units, bool replaceTabs, const char locale[], 195 skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 196 #else 197 virtual bool computeCodeUnitFlags( 198 char utf8[], int utf8Units, bool replaceTabs, 199 skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 200 virtual bool computeCodeUnitFlags( 201 char16_t utf16[], int utf16Units, bool replaceTabs, 202 skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 203 #endif 204 205 static SkString convertUtf16ToUtf8(const char16_t * utf16, int utf16Units); 206 static SkString convertUtf16ToUtf8(const std::u16string& utf16); 207 static std::u16string convertUtf8ToUtf16(const char* utf8, int utf8Units); 208 static std::u16string convertUtf8ToUtf16(const SkString& utf8); 209 210 template <typename Appender8, typename Appender16> extractUtfConversionMapping(SkSpan<const char> utf8,Appender8 && appender8,Appender16 && appender16)211 static bool extractUtfConversionMapping(SkSpan<const char> utf8, Appender8&& appender8, Appender16&& appender16) { 212 size_t size8 = 0; 213 size_t size16 = 0; 214 auto ptr = utf8.begin(); 215 auto end = utf8.end(); 216 while (ptr < end) { 217 218 size_t index = SkToSizeT(ptr - utf8.begin()); 219 SkUnichar u = SkUTF::NextUTF8(&ptr, end); 220 221 // All UTF8 code units refer to the same codepoint 222 size_t next = SkToSizeT(ptr - utf8.begin()); 223 for (auto i = index; i < next; ++i) { 224 //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); 225 appender16(size8); 226 ++size16; 227 } 228 //SkASSERT(fUTF16IndexForUTF8Index.size() == next); 229 SkASSERT(size16 == next); 230 if (size16 != next) { 231 return false; 232 } 233 234 // One or two UTF16 code units refer to the same codepoint 235 uint16_t buffer[2]; 236 size_t count = SkUTF::ToUTF16(u, buffer); 237 //fUTF8IndexForUTF16Index.emplace_back(index); 238 appender8(index); 239 ++size8; 240 if (count > 1) { 241 //fUTF8IndexForUTF16Index.emplace_back(index); 242 appender8(index); 243 ++size8; 244 } 245 } 246 //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); 247 appender16(size8); 248 ++size16; 249 //fUTF8IndexForUTF16Index.emplace_back(fText.size()); 250 appender8(utf8.size()); 251 ++size8; 252 253 return true; 254 } 255 256 template <typename Callback> forEachCodepoint(const char * utf8,int32_t utf8Units,Callback && callback)257 void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) { 258 const char* current = utf8; 259 const char* end = utf8 + utf8Units; 260 while (current < end) { 261 auto before = current - utf8; 262 SkUnichar unichar = SkUTF::NextUTF8(¤t, end); 263 if (unichar < 0) unichar = 0xFFFD; 264 auto after = current - utf8; 265 uint16_t buffer[2]; 266 size_t count = SkUTF::ToUTF16(unichar, buffer); 267 callback(unichar, before, after, count); 268 } 269 } 270 271 template <typename Callback> forEachCodepoint(const char16_t * utf16,int32_t utf16Units,Callback && callback)272 void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) { 273 const char16_t* current = utf16; 274 const char16_t* end = utf16 + utf16Units; 275 while (current < end) { 276 auto before = current - utf16; 277 SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)¤t, (const uint16_t*)end); 278 auto after = current - utf16; 279 callback(unichar, before, after); 280 } 281 } 282 283 template <typename Callback> forEachBidiRegion(const uint16_t utf16[],int utf16Units,SkBidiIterator::Direction dir,Callback && callback)284 void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) { 285 auto iter = makeBidiIterator(utf16, utf16Units, dir); 286 const uint16_t* start16 = utf16; 287 const uint16_t* end16 = utf16 + utf16Units; 288 SkBidiIterator::Level currentLevel = 0; 289 290 SkBidiIterator::Position pos16 = 0; 291 while (pos16 <= iter->getLength()) { 292 auto level = iter->getLevelAt(pos16); 293 if (pos16 == 0) { 294 currentLevel = level; 295 } else if (level != currentLevel) { 296 callback(pos16, start16 - utf16, currentLevel); 297 currentLevel = level; 298 } 299 if (start16 == end16) { 300 break; 301 } 302 SkUnichar u = SkUTF::NextUTF16(&start16, end16); 303 pos16 += SkUTF::ToUTF16(u); 304 } 305 } 306 307 template <typename Callback> 308 #ifdef ENABLE_TEXT_ENHANCE forEachBreak(const char16_t utf16[],int utf16Units,SkUnicode::BreakType type,const char locale[],Callback && callback)309 void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, 310 const char locale[], Callback&& callback) { 311 auto iter = makeBreakIterator(type); 312 #else 313 void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) { 314 auto iter = makeBreakIterator(type); 315 #endif 316 iter->setText(utf16, utf16Units); 317 auto pos = iter->first(); 318 do { 319 callback(pos, iter->status()); 320 pos = iter->next(); 321 } while (!iter->isDone()); 322 } 323 324 virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0; 325 }; 326 327 namespace sknonstd { 328 template <> struct is_bitmask_enum<SkUnicode::CodeUnitFlags> : std::true_type {}; 329 } // namespace sknonstd 330 331 #endif // SkUnicode_DEFINED 332