1 /*
2 * Copyright 2006 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8
9 #include "SkUtils.h"
10
11 /* 0xxxxxxx 1 total
12 10xxxxxx // never a leading byte
13 110xxxxx 2 total
14 1110xxxx 3 total
15 11110xxx 4 total
16
17 11 10 01 01 xx xx xx xx 0...
18 0xE5XX0000
19 0xE5 << 24
20 */
21
utf8_byte_is_valid(uint8_t c)22 static bool utf8_byte_is_valid(uint8_t c) {
23 return c < 0xF5 && (c & 0xFE) != 0xC0;
24 }
utf8_byte_is_continuation(uint8_t c)25 static bool utf8_byte_is_continuation(uint8_t c) {
26 return (c & 0xC0) == 0x80;
27 }
utf8_byte_is_leading_byte(uint8_t c)28 static bool utf8_byte_is_leading_byte(uint8_t c) {
29 return utf8_byte_is_valid(c) && !utf8_byte_is_continuation(c);
30 }
31
32 #ifdef SK_DEBUG
assert_utf8_leadingbyte(unsigned c)33 static void assert_utf8_leadingbyte(unsigned c) {
34 SkASSERT(utf8_byte_is_leading_byte(SkToU8(c)));
35 }
36
SkUTF8_LeadByteToCount(unsigned c)37 int SkUTF8_LeadByteToCount(unsigned c) {
38 assert_utf8_leadingbyte(c);
39 return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
40 }
41 #else
42 #define assert_utf8_leadingbyte(c)
43 #endif
44
45 /**
46 * @returns -1 iff invalid UTF8 byte,
47 * 0 iff UTF8 continuation byte,
48 * 1 iff ASCII byte,
49 * 2 iff leading byte of 2-byte sequence,
50 * 3 iff leading byte of 3-byte sequence, and
51 * 4 iff leading byte of 4-byte sequence.
52 *
53 * I.e.: if return value > 0, then gives length of sequence.
54 */
utf8_byte_type(uint8_t c)55 static int utf8_byte_type(uint8_t c) {
56 if (c < 0x80) {
57 return 1;
58 } else if (c < 0xC0) {
59 return 0;
60 } else if (c < 0xF5 && (c & 0xFE) != 0xC0) { // "octet values C0, C1, F5 to FF never appear"
61 return (((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
62 } else {
63 return -1;
64 }
65 }
utf8_type_is_valid_leading_byte(int type)66 static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
67
SkUTF8_CountUnichars(const char utf8[])68 int SkUTF8_CountUnichars(const char utf8[]) {
69 SkASSERT(utf8);
70
71 int count = 0;
72
73 for (;;) {
74 int c = *(const uint8_t*)utf8;
75 if (c == 0) {
76 break;
77 }
78 utf8 += SkUTF8_LeadByteToCount(c);
79 count += 1;
80 }
81 return count;
82 }
83
84 // SAFE: returns -1 if invalid UTF-8
SkUTF8_CountUnicharsWithError(const char utf8[],size_t byteLength)85 int SkUTF8_CountUnicharsWithError(const char utf8[], size_t byteLength) {
86 SkASSERT(utf8 || 0 == byteLength);
87
88 int count = 0;
89 const char* stop = utf8 + byteLength;
90
91 while (utf8 < stop) {
92 int type = utf8_byte_type(*(const uint8_t*)utf8);
93 SkASSERT(type >= -1 && type <= 4);
94 if (!utf8_type_is_valid_leading_byte(type) ||
95 utf8 + type > stop) { // Sequence extends beyond end.
96 return -1;
97 }
98 while(type-- > 1) {
99 ++utf8;
100 if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
101 return -1;
102 }
103 }
104 ++utf8;
105 ++count;
106 }
107 return count;
108 }
109
SkUTF8_ToUnichar(const char utf8[])110 SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
111 SkASSERT(utf8);
112
113 const uint8_t* p = (const uint8_t*)utf8;
114 int c = *p;
115 int hic = c << 24;
116
117 assert_utf8_leadingbyte(c);
118
119 if (hic < 0) {
120 uint32_t mask = (uint32_t)~0x3F;
121 hic = SkLeftShift(hic, 1);
122 do {
123 c = (c << 6) | (*++p & 0x3F);
124 mask <<= 5;
125 } while ((hic = SkLeftShift(hic, 1)) < 0);
126 c &= ~mask;
127 }
128 return c;
129 }
130
131 // SAFE: returns -1 on invalid UTF-8 sequence.
SkUTF8_NextUnicharWithError(const char ** ptr,const char * end)132 SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end) {
133 SkASSERT(ptr && *ptr);
134 SkASSERT(*ptr < end);
135 const uint8_t* p = (const uint8_t*)*ptr;
136 int c = *p;
137 int hic = c << 24;
138
139 if (!utf8_byte_is_leading_byte(c)) {
140 return -1;
141 }
142 if (hic < 0) {
143 uint32_t mask = (uint32_t)~0x3F;
144 hic = SkLeftShift(hic, 1);
145 do {
146 ++p;
147 if (p >= (const uint8_t*)end) {
148 return -1;
149 }
150 // check before reading off end of array.
151 uint8_t nextByte = *p;
152 if (!utf8_byte_is_continuation(nextByte)) {
153 return -1;
154 }
155 c = (c << 6) | (nextByte & 0x3F);
156 mask <<= 5;
157 } while ((hic = SkLeftShift(hic, 1)) < 0);
158 c &= ~mask;
159 }
160 *ptr = (char*)p + 1;
161 return c;
162 }
163
SkUTF8_NextUnichar(const char ** ptr)164 SkUnichar SkUTF8_NextUnichar(const char** ptr) {
165 SkASSERT(ptr && *ptr);
166
167 const uint8_t* p = (const uint8_t*)*ptr;
168 int c = *p;
169 int hic = c << 24;
170
171 assert_utf8_leadingbyte(c);
172
173 if (hic < 0) {
174 uint32_t mask = (uint32_t)~0x3F;
175 hic = SkLeftShift(hic, 1);
176 do {
177 c = (c << 6) | (*++p & 0x3F);
178 mask <<= 5;
179 } while ((hic = SkLeftShift(hic, 1)) < 0);
180 c &= ~mask;
181 }
182 *ptr = (char*)p + 1;
183 return c;
184 }
185
SkUTF8_PrevUnichar(const char ** ptr)186 SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
187 SkASSERT(ptr && *ptr);
188
189 const char* p = *ptr;
190
191 if (*--p & 0x80) {
192 while (*--p & 0x40) {
193 ;
194 }
195 }
196
197 *ptr = (char*)p;
198 return SkUTF8_NextUnichar(&p);
199 }
200
SkUTF8_FromUnichar(SkUnichar uni,char utf8[])201 size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
202 if ((uint32_t)uni > 0x10FFFF) {
203 SkDEBUGFAIL("bad unichar");
204 return 0;
205 }
206
207 if (uni <= 127) {
208 if (utf8) {
209 *utf8 = (char)uni;
210 }
211 return 1;
212 }
213
214 char tmp[4];
215 char* p = tmp;
216 size_t count = 1;
217
218 SkDEBUGCODE(SkUnichar orig = uni;)
219
220 while (uni > 0x7F >> count) {
221 *p++ = (char)(0x80 | (uni & 0x3F));
222 uni >>= 6;
223 count += 1;
224 }
225
226 if (utf8) {
227 p = tmp;
228 utf8 += count;
229 while (p < tmp + count - 1) {
230 *--utf8 = *p++;
231 }
232 *--utf8 = (char)(~(0xFF >> count) | uni);
233 }
234
235 SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
236 return count;
237 }
238
239 ///////////////////////////////////////////////////////////////////////////////
240
SkUTF16_CountUnichars(const uint16_t src[])241 int SkUTF16_CountUnichars(const uint16_t src[]) {
242 SkASSERT(src);
243
244 int count = 0;
245 unsigned c;
246 while ((c = *src++) != 0) {
247 SkASSERT(!SkUTF16_IsLowSurrogate(c));
248 if (SkUTF16_IsHighSurrogate(c)) {
249 c = *src++;
250 SkASSERT(SkUTF16_IsLowSurrogate(c));
251 }
252 count += 1;
253 }
254 return count;
255 }
256
SkUTF16_CountUnichars(const uint16_t src[],int numberOf16BitValues)257 int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) {
258 SkASSERT(src);
259
260 const uint16_t* stop = src + numberOf16BitValues;
261 int count = 0;
262 while (src < stop) {
263 unsigned c = *src++;
264 SkASSERT(!SkUTF16_IsLowSurrogate(c));
265 if (SkUTF16_IsHighSurrogate(c)) {
266 SkASSERT(src < stop);
267 c = *src++;
268 SkASSERT(SkUTF16_IsLowSurrogate(c));
269 }
270 count += 1;
271 }
272 return count;
273 }
274
SkUTF16_NextUnichar(const uint16_t ** srcPtr)275 SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
276 SkASSERT(srcPtr && *srcPtr);
277
278 const uint16_t* src = *srcPtr;
279 SkUnichar c = *src++;
280
281 SkASSERT(!SkUTF16_IsLowSurrogate(c));
282 if (SkUTF16_IsHighSurrogate(c)) {
283 unsigned c2 = *src++;
284 SkASSERT(SkUTF16_IsLowSurrogate(c2));
285
286 // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000
287 // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF)
288 c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
289 }
290 *srcPtr = src;
291 return c;
292 }
293
SkUTF16_PrevUnichar(const uint16_t ** srcPtr)294 SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
295 SkASSERT(srcPtr && *srcPtr);
296
297 const uint16_t* src = *srcPtr;
298 SkUnichar c = *--src;
299
300 SkASSERT(!SkUTF16_IsHighSurrogate(c));
301 if (SkUTF16_IsLowSurrogate(c)) {
302 unsigned c2 = *--src;
303 SkASSERT(SkUTF16_IsHighSurrogate(c2));
304 c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
305 }
306 *srcPtr = src;
307 return c;
308 }
309
SkUTF16_FromUnichar(SkUnichar uni,uint16_t dst[])310 size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
311 SkASSERT((unsigned)uni <= 0x10FFFF);
312
313 int extra = (uni > 0xFFFF);
314
315 if (dst) {
316 if (extra) {
317 // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
318 // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
319 dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
320 dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
321
322 SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
323 SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
324 } else {
325 dst[0] = SkToU16(uni);
326 SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
327 SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
328 }
329 }
330 return 1 + extra;
331 }
332
SkUTF16_ToUTF8(const uint16_t utf16[],int numberOf16BitValues,char utf8[])333 size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
334 char utf8[]) {
335 SkASSERT(numberOf16BitValues >= 0);
336 if (numberOf16BitValues <= 0) {
337 return 0;
338 }
339
340 SkASSERT(utf16 != nullptr);
341
342 const uint16_t* stop = utf16 + numberOf16BitValues;
343 size_t size = 0;
344
345 if (utf8 == nullptr) { // just count
346 while (utf16 < stop) {
347 size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
348 }
349 } else {
350 char* start = utf8;
351 while (utf16 < stop) {
352 utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
353 }
354 size = utf8 - start;
355 }
356 return size;
357 }
358
359 const char SkHexadecimalDigits::gUpper[16] =
360 { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
361 const char SkHexadecimalDigits::gLower[16] =
362 { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
363
364