• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2006 The Android Open Source Project
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 
9 #include "SkUtils.h"
10 
11 /*  0xxxxxxx    1 total
12     10xxxxxx    // never a leading byte
13     110xxxxx    2 total
14     1110xxxx    3 total
15     11110xxx    4 total
16 
17     11 10 01 01 xx xx xx xx 0...
18     0xE5XX0000
19     0xE5 << 24
20 */
21 
utf8_byte_is_valid(uint8_t c)22 static bool utf8_byte_is_valid(uint8_t c) {
23     return c < 0xF5 && (c & 0xFE) != 0xC0;
24 }
utf8_byte_is_continuation(uint8_t c)25 static bool utf8_byte_is_continuation(uint8_t c) {
26     return  (c & 0xC0) == 0x80;
27 }
utf8_byte_is_leading_byte(uint8_t c)28 static bool utf8_byte_is_leading_byte(uint8_t c) {
29     return utf8_byte_is_valid(c) && !utf8_byte_is_continuation(c);
30 }
31 
32 #ifdef SK_DEBUG
assert_utf8_leadingbyte(unsigned c)33     static void assert_utf8_leadingbyte(unsigned c) {
34         SkASSERT(utf8_byte_is_leading_byte(SkToU8(c)));
35     }
36 
SkUTF8_LeadByteToCount(unsigned c)37     int SkUTF8_LeadByteToCount(unsigned c) {
38         assert_utf8_leadingbyte(c);
39         return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
40     }
41 #else
42     #define assert_utf8_leadingbyte(c)
43 #endif
44 
45 /**
46  * @returns -1  iff invalid UTF8 byte,
47  *           0  iff UTF8 continuation byte,
48  *           1  iff ASCII byte,
49  *           2  iff leading byte of 2-byte sequence,
50  *           3  iff leading byte of 3-byte sequence, and
51  *           4  iff leading byte of 4-byte sequence.
52  *
53  * I.e.: if return value > 0, then gives length of sequence.
54 */
utf8_byte_type(uint8_t c)55 static int utf8_byte_type(uint8_t c) {
56     if (c < 0x80) {
57         return 1;
58     } else if (c < 0xC0) {
59         return 0;
60     } else if (c < 0xF5 && (c & 0xFE) != 0xC0) { // "octet values C0, C1, F5 to FF never appear"
61         return (((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
62     } else {
63         return -1;
64     }
65 }
utf8_type_is_valid_leading_byte(int type)66 static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
67 
SkUTF8_CountUnichars(const char utf8[])68 int SkUTF8_CountUnichars(const char utf8[]) {
69     SkASSERT(utf8);
70 
71     int count = 0;
72 
73     for (;;) {
74         int c = *(const uint8_t*)utf8;
75         if (c == 0) {
76             break;
77         }
78         utf8 += SkUTF8_LeadByteToCount(c);
79         count += 1;
80     }
81     return count;
82 }
83 
84 // SAFE: returns -1 if invalid UTF-8
SkUTF8_CountUnicharsWithError(const char utf8[],size_t byteLength)85 int SkUTF8_CountUnicharsWithError(const char utf8[], size_t byteLength) {
86     SkASSERT(utf8 || 0 == byteLength);
87 
88     int         count = 0;
89     const char* stop = utf8 + byteLength;
90 
91     while (utf8 < stop) {
92         int type = utf8_byte_type(*(const uint8_t*)utf8);
93         SkASSERT(type >= -1 && type <= 4);
94         if (!utf8_type_is_valid_leading_byte(type) ||
95             utf8 + type > stop) {  // Sequence extends beyond end.
96             return -1;
97         }
98         while(type-- > 1) {
99             ++utf8;
100             if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
101                 return -1;
102             }
103         }
104         ++utf8;
105         ++count;
106     }
107     return count;
108 }
109 
SkUTF8_ToUnichar(const char utf8[])110 SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
111     SkASSERT(utf8);
112 
113     const uint8_t*  p = (const uint8_t*)utf8;
114     int             c = *p;
115     int             hic = c << 24;
116 
117     assert_utf8_leadingbyte(c);
118 
119     if (hic < 0) {
120         uint32_t mask = (uint32_t)~0x3F;
121         hic = SkLeftShift(hic, 1);
122         do {
123             c = (c << 6) | (*++p & 0x3F);
124             mask <<= 5;
125         } while ((hic = SkLeftShift(hic, 1)) < 0);
126         c &= ~mask;
127     }
128     return c;
129 }
130 
131 // SAFE: returns -1 on invalid UTF-8 sequence.
SkUTF8_NextUnicharWithError(const char ** ptr,const char * end)132 SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end) {
133     SkASSERT(ptr && *ptr);
134     SkASSERT(*ptr < end);
135     const uint8_t*  p = (const uint8_t*)*ptr;
136     int             c = *p;
137     int             hic = c << 24;
138 
139     if (!utf8_byte_is_leading_byte(c)) {
140         return -1;
141     }
142     if (hic < 0) {
143         uint32_t mask = (uint32_t)~0x3F;
144         hic = SkLeftShift(hic, 1);
145         do {
146             ++p;
147             if (p >= (const uint8_t*)end) {
148                 return -1;
149             }
150             // check before reading off end of array.
151             uint8_t nextByte = *p;
152             if (!utf8_byte_is_continuation(nextByte)) {
153                 return -1;
154             }
155             c = (c << 6) | (nextByte & 0x3F);
156             mask <<= 5;
157         } while ((hic = SkLeftShift(hic, 1)) < 0);
158         c &= ~mask;
159     }
160     *ptr = (char*)p + 1;
161     return c;
162 }
163 
SkUTF8_NextUnichar(const char ** ptr)164 SkUnichar SkUTF8_NextUnichar(const char** ptr) {
165     SkASSERT(ptr && *ptr);
166 
167     const uint8_t*  p = (const uint8_t*)*ptr;
168     int             c = *p;
169     int             hic = c << 24;
170 
171     assert_utf8_leadingbyte(c);
172 
173     if (hic < 0) {
174         uint32_t mask = (uint32_t)~0x3F;
175         hic = SkLeftShift(hic, 1);
176         do {
177             c = (c << 6) | (*++p & 0x3F);
178             mask <<= 5;
179         } while ((hic = SkLeftShift(hic, 1)) < 0);
180         c &= ~mask;
181     }
182     *ptr = (char*)p + 1;
183     return c;
184 }
185 
SkUTF8_PrevUnichar(const char ** ptr)186 SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
187     SkASSERT(ptr && *ptr);
188 
189     const char* p = *ptr;
190 
191     if (*--p & 0x80) {
192         while (*--p & 0x40) {
193             ;
194         }
195     }
196 
197     *ptr = (char*)p;
198     return SkUTF8_NextUnichar(&p);
199 }
200 
SkUTF8_FromUnichar(SkUnichar uni,char utf8[])201 size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
202     if ((uint32_t)uni > 0x10FFFF) {
203         SkDEBUGFAIL("bad unichar");
204         return 0;
205     }
206 
207     if (uni <= 127) {
208         if (utf8) {
209             *utf8 = (char)uni;
210         }
211         return 1;
212     }
213 
214     char    tmp[4];
215     char*   p = tmp;
216     size_t  count = 1;
217 
218     SkDEBUGCODE(SkUnichar orig = uni;)
219 
220     while (uni > 0x7F >> count) {
221         *p++ = (char)(0x80 | (uni & 0x3F));
222         uni >>= 6;
223         count += 1;
224     }
225 
226     if (utf8) {
227         p = tmp;
228         utf8 += count;
229         while (p < tmp + count - 1) {
230             *--utf8 = *p++;
231         }
232         *--utf8 = (char)(~(0xFF >> count) | uni);
233     }
234 
235     SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
236     return count;
237 }
238 
239 ///////////////////////////////////////////////////////////////////////////////
240 
SkUTF16_CountUnichars(const uint16_t src[])241 int SkUTF16_CountUnichars(const uint16_t src[]) {
242     SkASSERT(src);
243 
244     int count = 0;
245     unsigned c;
246     while ((c = *src++) != 0) {
247         SkASSERT(!SkUTF16_IsLowSurrogate(c));
248         if (SkUTF16_IsHighSurrogate(c)) {
249             c = *src++;
250             SkASSERT(SkUTF16_IsLowSurrogate(c));
251         }
252         count += 1;
253     }
254     return count;
255 }
256 
SkUTF16_CountUnichars(const uint16_t src[],int numberOf16BitValues)257 int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) {
258     SkASSERT(src);
259 
260     const uint16_t* stop = src + numberOf16BitValues;
261     int count = 0;
262     while (src < stop) {
263         unsigned c = *src++;
264         SkASSERT(!SkUTF16_IsLowSurrogate(c));
265         if (SkUTF16_IsHighSurrogate(c)) {
266             SkASSERT(src < stop);
267             c = *src++;
268             SkASSERT(SkUTF16_IsLowSurrogate(c));
269         }
270         count += 1;
271     }
272     return count;
273 }
274 
SkUTF16_NextUnichar(const uint16_t ** srcPtr)275 SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
276     SkASSERT(srcPtr && *srcPtr);
277 
278     const uint16_t* src = *srcPtr;
279     SkUnichar       c = *src++;
280 
281     SkASSERT(!SkUTF16_IsLowSurrogate(c));
282     if (SkUTF16_IsHighSurrogate(c)) {
283         unsigned c2 = *src++;
284         SkASSERT(SkUTF16_IsLowSurrogate(c2));
285 
286         // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000
287         // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF)
288         c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
289     }
290     *srcPtr = src;
291     return c;
292 }
293 
SkUTF16_PrevUnichar(const uint16_t ** srcPtr)294 SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
295     SkASSERT(srcPtr && *srcPtr);
296 
297     const uint16_t* src = *srcPtr;
298     SkUnichar       c = *--src;
299 
300     SkASSERT(!SkUTF16_IsHighSurrogate(c));
301     if (SkUTF16_IsLowSurrogate(c)) {
302         unsigned c2 = *--src;
303         SkASSERT(SkUTF16_IsHighSurrogate(c2));
304         c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
305     }
306     *srcPtr = src;
307     return c;
308 }
309 
SkUTF16_FromUnichar(SkUnichar uni,uint16_t dst[])310 size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
311     SkASSERT((unsigned)uni <= 0x10FFFF);
312 
313     int extra = (uni > 0xFFFF);
314 
315     if (dst) {
316         if (extra) {
317             // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
318             // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
319             dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
320             dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
321 
322             SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
323             SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
324         } else {
325             dst[0] = SkToU16(uni);
326             SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
327             SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
328         }
329     }
330     return 1 + extra;
331 }
332 
SkUTF16_ToUTF8(const uint16_t utf16[],int numberOf16BitValues,char utf8[])333 size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
334                       char utf8[]) {
335     SkASSERT(numberOf16BitValues >= 0);
336     if (numberOf16BitValues <= 0) {
337         return 0;
338     }
339 
340     SkASSERT(utf16 != nullptr);
341 
342     const uint16_t* stop = utf16 + numberOf16BitValues;
343     size_t          size = 0;
344 
345     if (utf8 == nullptr) {    // just count
346         while (utf16 < stop) {
347             size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
348         }
349     } else {
350         char* start = utf8;
351         while (utf16 < stop) {
352             utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
353         }
354         size = utf8 - start;
355     }
356     return size;
357 }
358 
359 const char SkHexadecimalDigits::gUpper[16] =
360            { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
361 const char SkHexadecimalDigits::gLower[16] =
362            { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
363 
364