1 /* 2 * Copyright 2020 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 #ifndef SkUnicode_DEFINED 8 #define SkUnicode_DEFINED 9 #include "include/core/SkSpan.h" 10 #include "include/core/SkString.h" 11 #include "include/core/SkTypes.h" 12 #include "include/private/SkBitmaskEnum.h" // IWYU pragma: keep 13 #include "include/private/SkTArray.h" 14 #include "include/private/SkTo.h" 15 #include "src/utils/SkUTF.h" 16 17 #include <cstddef> 18 #include <cstdint> 19 #include <memory> 20 #include <string> 21 #include <vector> 22 23 #if !defined(SKUNICODE_IMPLEMENTATION) 24 #define SKUNICODE_IMPLEMENTATION 0 25 #endif 26 27 #if !defined(SKUNICODE_API) 28 #if defined(SKUNICODE_DLL) 29 #if defined(_MSC_VER) 30 #if SKUNICODE_IMPLEMENTATION 31 #define SKUNICODE_API __declspec(dllexport) 32 #else 33 #define SKUNICODE_API __declspec(dllimport) 34 #endif 35 #else 36 #define SKUNICODE_API __attribute__((visibility("default"))) 37 #endif 38 #else 39 #define SKUNICODE_API 40 #endif 41 #endif 42 43 class SKUNICODE_API SkBidiIterator { 44 public: 45 typedef int32_t Position; 46 typedef uint8_t Level; 47 struct Region { RegionRegion48 Region(Position start, Position end, Level level) 49 : start(start), end(end), level(level) { } 50 Position start; 51 Position end; 52 Level level; 53 }; 54 enum Direction { 55 kLTR, 56 kRTL, 57 }; 58 virtual ~SkBidiIterator() = default; 59 virtual Position getLength() = 0; 60 virtual Level getLevelAt(Position) = 0; 61 }; 62 63 class SKUNICODE_API SkBreakIterator { 64 public: 65 typedef int32_t Position; 66 typedef int32_t Status; 67 virtual ~SkBreakIterator() = default; 68 virtual Position first() = 0; 69 virtual Position current() = 0; 70 virtual Position next() = 0; 71 virtual Status status() = 0; 72 virtual bool isDone() = 0; 73 virtual bool setText(const char utftext8[], int utf8Units) = 0; 74 virtual bool setText(const char16_t utftext16[], int utf16Units) = 0; 75 }; 76 77 class SKUNICODE_API SkUnicode { 78 public: 79 enum CodeUnitFlags { 80 kNoCodeUnitFlag = 0x00, 81 kPartOfWhiteSpaceBreak = 0x01, 82 kGraphemeStart = 0x02, 83 kSoftLineBreakBefore = 0x04, 84 kHardLineBreakBefore = 0x08, 85 kPartOfIntraWordBreak = 0x10, 86 kControl = 0x20, 87 kTabulation = 0x40, 88 kGlyphClusterStart = 0x80, 89 kIdeographic = 0x100, 90 #ifdef OHOS_SUPPORT 91 kCombine = 0x200, 92 kPunctuation = 0x400, 93 kEllipsis = 0x800, 94 #endif 95 }; 96 enum class TextDirection { 97 kLTR, 98 kRTL, 99 }; 100 typedef size_t Position; 101 typedef uint8_t BidiLevel; 102 struct BidiRegion { BidiRegionBidiRegion103 BidiRegion(Position start, Position end, BidiLevel level) 104 : start(start), end(end), level(level) { } 105 Position start; 106 Position end; 107 BidiLevel level; 108 }; 109 enum class LineBreakType { 110 kSoftLineBreak = 0, 111 kHardLineBreak = 100, 112 }; 113 114 enum class BreakType { 115 kWords, 116 kGraphemes, 117 kLines 118 }; 119 struct LineBreakBefore { LineBreakBeforeLineBreakBefore120 LineBreakBefore(Position pos, LineBreakType breakType) 121 : pos(pos), breakType(breakType) { } 122 Position pos; 123 LineBreakType breakType; 124 }; 125 126 virtual ~SkUnicode() = default; 127 128 virtual SkString toUpper(const SkString&) = 0; 129 130 // Methods used in SkShaper and SkText 131 virtual std::unique_ptr<SkBidiIterator> makeBidiIterator 132 (const uint16_t text[], int count, SkBidiIterator::Direction) = 0; 133 virtual std::unique_ptr<SkBidiIterator> makeBidiIterator 134 (const char text[], int count, SkBidiIterator::Direction) = 0; 135 virtual std::unique_ptr<SkBreakIterator> makeBreakIterator 136 (const char locale[], BreakType breakType) = 0; 137 virtual std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) = 0; 138 139 // Methods used in SkParagraph 140 static bool isTabulation(SkUnicode::CodeUnitFlags flags); 141 static bool isHardLineBreak(SkUnicode::CodeUnitFlags flags); 142 static bool isSoftLineBreak(SkUnicode::CodeUnitFlags flags); 143 static bool isGraphemeStart(SkUnicode::CodeUnitFlags flags); 144 static bool isControl(SkUnicode::CodeUnitFlags flags); 145 static bool isPartOfWhiteSpaceBreak(SkUnicode::CodeUnitFlags flags); 146 static bool isIdeographic(SkUnichar utf8); 147 #ifdef OHOS_SUPPORT 148 static bool isPunctuation(SkUnichar utf8); 149 static bool isEllipsis(SkUnichar utf8); 150 #endif 151 static bool extractBidi(const char utf8[], 152 int utf8Units, 153 TextDirection dir, 154 std::vector<BidiRegion>* bidiRegions); 155 virtual bool getBidiRegions(const char utf8[], 156 int utf8Units, 157 TextDirection dir, 158 std::vector<BidiRegion>* results) = 0; 159 virtual bool getWords(const char utf8[], int utf8Units, const char* locale, 160 std::vector<Position>* results) = 0; 161 #ifdef OHOS_SUPPORT 162 virtual bool computeCodeUnitFlags( 163 char utf8[], int utf8Units, bool replaceTabs, const char locale[], 164 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 165 virtual bool computeCodeUnitFlags( 166 char16_t utf16[], int utf16Units, bool replaceTabs, const char locale[], 167 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 168 #else 169 virtual bool computeCodeUnitFlags( 170 char utf8[], int utf8Units, bool replaceTabs, 171 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 172 virtual bool computeCodeUnitFlags( 173 char16_t utf16[], int utf16Units, bool replaceTabs, 174 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 175 #endif 176 177 static SkString convertUtf16ToUtf8(const char16_t * utf16, int utf16Units); 178 static SkString convertUtf16ToUtf8(const std::u16string& utf16); 179 static std::u16string convertUtf8ToUtf16(const char* utf8, int utf8Units); 180 static std::u16string convertUtf8ToUtf16(const SkString& utf8); 181 182 template <typename Appender8, typename Appender16> extractUtfConversionMapping(SkSpan<const char> utf8,Appender8 && appender8,Appender16 && appender16)183 static bool extractUtfConversionMapping(SkSpan<const char> utf8, Appender8&& appender8, Appender16&& appender16) { 184 size_t size8 = 0; 185 size_t size16 = 0; 186 auto ptr = utf8.begin(); 187 auto end = utf8.end(); 188 while (ptr < end) { 189 190 size_t index = SkToSizeT(ptr - utf8.begin()); 191 SkUnichar u = SkUTF::NextUTF8(&ptr, end); 192 193 // All UTF8 code units refer to the same codepoint 194 size_t next = SkToSizeT(ptr - utf8.begin()); 195 for (auto i = index; i < next; ++i) { 196 //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); 197 appender16(size8); 198 ++size16; 199 } 200 //SkASSERT(fUTF16IndexForUTF8Index.size() == next); 201 SkASSERT(size16 == next); 202 if (size16 != next) { 203 return false; 204 } 205 206 // One or two UTF16 code units refer to the same codepoint 207 uint16_t buffer[2]; 208 size_t count = SkUTF::ToUTF16(u, buffer); 209 //fUTF8IndexForUTF16Index.emplace_back(index); 210 appender8(index); 211 ++size8; 212 if (count > 1) { 213 //fUTF8IndexForUTF16Index.emplace_back(index); 214 appender8(index); 215 ++size8; 216 } 217 } 218 //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); 219 appender16(size8); 220 ++size16; 221 //fUTF8IndexForUTF16Index.emplace_back(fText.size()); 222 appender8(utf8.size()); 223 ++size8; 224 225 return true; 226 } 227 228 template <typename Callback> forEachCodepoint(const char * utf8,int32_t utf8Units,Callback && callback)229 void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) { 230 const char* current = utf8; 231 const char* end = utf8 + utf8Units; 232 while (current < end) { 233 auto before = current - utf8; 234 SkUnichar unichar = SkUTF::NextUTF8(¤t, end); 235 if (unichar < 0) unichar = 0xFFFD; 236 auto after = current - utf8; 237 uint16_t buffer[2]; 238 size_t count = SkUTF::ToUTF16(unichar, buffer); 239 callback(unichar, before, after, count); 240 } 241 } 242 243 template <typename Callback> forEachCodepoint(const char16_t * utf16,int32_t utf16Units,Callback && callback)244 void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) { 245 const char16_t* current = utf16; 246 const char16_t* end = utf16 + utf16Units; 247 while (current < end) { 248 auto before = current - utf16; 249 SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)¤t, (const uint16_t*)end); 250 auto after = current - utf16; 251 callback(unichar, before, after); 252 } 253 } 254 255 template <typename Callback> forEachBidiRegion(const uint16_t utf16[],int utf16Units,SkBidiIterator::Direction dir,Callback && callback)256 void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) { 257 auto iter = makeBidiIterator(utf16, utf16Units, dir); 258 const uint16_t* start16 = utf16; 259 const uint16_t* end16 = utf16 + utf16Units; 260 SkBidiIterator::Level currentLevel = 0; 261 262 SkBidiIterator::Position pos16 = 0; 263 while (pos16 <= iter->getLength()) { 264 auto level = iter->getLevelAt(pos16); 265 if (pos16 == 0) { 266 currentLevel = level; 267 } else if (level != currentLevel) { 268 callback(pos16, start16 - utf16, currentLevel); 269 currentLevel = level; 270 } 271 if (start16 == end16) { 272 break; 273 } 274 SkUnichar u = SkUTF::NextUTF16(&start16, end16); 275 pos16 += SkUTF::ToUTF16(u); 276 } 277 } 278 279 template <typename Callback> 280 #ifdef OHOS_SUPPORT forEachBreak(const char16_t utf16[],int utf16Units,SkUnicode::BreakType type,const char locale[],Callback && callback)281 void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, 282 const char locale[], Callback&& callback) { 283 auto iter = makeBreakIterator(type); 284 #else 285 void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) { 286 auto iter = makeBreakIterator(type); 287 #endif 288 iter->setText(utf16, utf16Units); 289 auto pos = iter->first(); 290 do { 291 callback(pos, iter->status()); 292 pos = iter->next(); 293 } while (!iter->isDone()); 294 } 295 296 virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0; 297 298 virtual std::unique_ptr<SkUnicode> copy() = 0; 299 300 static std::unique_ptr<SkUnicode> Make(); 301 302 static std::unique_ptr<SkUnicode> MakeIcuBasedUnicode(); 303 304 static std::unique_ptr<SkUnicode> MakeClientBasedUnicode( 305 SkSpan<char> text, 306 std::vector<SkUnicode::Position> words, 307 std::vector<SkUnicode::Position> graphemeBreaks, 308 std::vector<SkUnicode::LineBreakBefore> lineBreaks); 309 }; 310 311 namespace sknonstd { 312 template <> struct is_bitmask_enum<SkUnicode::CodeUnitFlags> : std::true_type {}; 313 } // namespace sknonstd 314 #endif // SkUnicode_DEFINED 315