1
2 /*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
9
10 #include "SkUtils.h"
11
12 /* 0xxxxxxx 1 total
13 10xxxxxx // never a leading byte
14 110xxxxx 2 total
15 1110xxxx 3 total
16 11110xxx 4 total
17
18 11 10 01 01 xx xx xx xx 0...
19 0xE5XX0000
20 0xE5 << 24
21 */
22
23 #ifdef SK_DEBUG
assert_utf8_leadingbyte(unsigned c)24 static void assert_utf8_leadingbyte(unsigned c) {
25 SkASSERT(c <= 0xF7); // otherwise leading byte is too big (more than 4 bytes)
26 SkASSERT((c & 0xC0) != 0x80); // can't begin with a middle char
27 }
28
SkUTF8_LeadByteToCount(unsigned c)29 int SkUTF8_LeadByteToCount(unsigned c) {
30 assert_utf8_leadingbyte(c);
31 return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
32 }
33 #else
34 #define assert_utf8_leadingbyte(c)
35 #endif
36
SkUTF8_CountUnichars(const char utf8[])37 int SkUTF8_CountUnichars(const char utf8[]) {
38 SkASSERT(utf8);
39
40 int count = 0;
41
42 for (;;) {
43 int c = *(const uint8_t*)utf8;
44 if (c == 0) {
45 break;
46 }
47 utf8 += SkUTF8_LeadByteToCount(c);
48 count += 1;
49 }
50 return count;
51 }
52
SkUTF8_CountUnichars(const char utf8[],size_t byteLength)53 int SkUTF8_CountUnichars(const char utf8[], size_t byteLength) {
54 SkASSERT(utf8 || 0 == byteLength);
55
56 int count = 0;
57 const char* stop = utf8 + byteLength;
58
59 while (utf8 < stop) {
60 utf8 += SkUTF8_LeadByteToCount(*(const uint8_t*)utf8);
61 count += 1;
62 }
63 return count;
64 }
65
SkUTF8_ToUnichar(const char utf8[])66 SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
67 SkASSERT(utf8);
68
69 const uint8_t* p = (const uint8_t*)utf8;
70 int c = *p;
71 int hic = c << 24;
72
73 assert_utf8_leadingbyte(c);
74
75 if (hic < 0) {
76 uint32_t mask = (uint32_t)~0x3F;
77 hic = SkLeftShift(hic, 1);
78 do {
79 c = (c << 6) | (*++p & 0x3F);
80 mask <<= 5;
81 } while ((hic = SkLeftShift(hic, 1)) < 0);
82 c &= ~mask;
83 }
84 return c;
85 }
86
SkUTF8_NextUnichar(const char ** ptr)87 SkUnichar SkUTF8_NextUnichar(const char** ptr) {
88 SkASSERT(ptr && *ptr);
89
90 const uint8_t* p = (const uint8_t*)*ptr;
91 int c = *p;
92 int hic = c << 24;
93
94 assert_utf8_leadingbyte(c);
95
96 if (hic < 0) {
97 uint32_t mask = (uint32_t)~0x3F;
98 hic = SkLeftShift(hic, 1);
99 do {
100 c = (c << 6) | (*++p & 0x3F);
101 mask <<= 5;
102 } while ((hic = SkLeftShift(hic, 1)) < 0);
103 c &= ~mask;
104 }
105 *ptr = (char*)p + 1;
106 return c;
107 }
108
SkUTF8_PrevUnichar(const char ** ptr)109 SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
110 SkASSERT(ptr && *ptr);
111
112 const char* p = *ptr;
113
114 if (*--p & 0x80) {
115 while (*--p & 0x40) {
116 ;
117 }
118 }
119
120 *ptr = (char*)p;
121 return SkUTF8_NextUnichar(&p);
122 }
123
SkUTF8_FromUnichar(SkUnichar uni,char utf8[])124 size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
125 if ((uint32_t)uni > 0x10FFFF) {
126 SkDEBUGFAIL("bad unichar");
127 return 0;
128 }
129
130 if (uni <= 127) {
131 if (utf8) {
132 *utf8 = (char)uni;
133 }
134 return 1;
135 }
136
137 char tmp[4];
138 char* p = tmp;
139 size_t count = 1;
140
141 SkDEBUGCODE(SkUnichar orig = uni;)
142
143 while (uni > 0x7F >> count) {
144 *p++ = (char)(0x80 | (uni & 0x3F));
145 uni >>= 6;
146 count += 1;
147 }
148
149 if (utf8) {
150 p = tmp;
151 utf8 += count;
152 while (p < tmp + count - 1) {
153 *--utf8 = *p++;
154 }
155 *--utf8 = (char)(~(0xFF >> count) | uni);
156 }
157
158 SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
159 return count;
160 }
161
162 ///////////////////////////////////////////////////////////////////////////////
163
SkUTF16_CountUnichars(const uint16_t src[])164 int SkUTF16_CountUnichars(const uint16_t src[]) {
165 SkASSERT(src);
166
167 int count = 0;
168 unsigned c;
169 while ((c = *src++) != 0) {
170 SkASSERT(!SkUTF16_IsLowSurrogate(c));
171 if (SkUTF16_IsHighSurrogate(c)) {
172 c = *src++;
173 SkASSERT(SkUTF16_IsLowSurrogate(c));
174 }
175 count += 1;
176 }
177 return count;
178 }
179
SkUTF16_CountUnichars(const uint16_t src[],int numberOf16BitValues)180 int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) {
181 SkASSERT(src);
182
183 const uint16_t* stop = src + numberOf16BitValues;
184 int count = 0;
185 while (src < stop) {
186 unsigned c = *src++;
187 SkASSERT(!SkUTF16_IsLowSurrogate(c));
188 if (SkUTF16_IsHighSurrogate(c)) {
189 SkASSERT(src < stop);
190 c = *src++;
191 SkASSERT(SkUTF16_IsLowSurrogate(c));
192 }
193 count += 1;
194 }
195 return count;
196 }
197
SkUTF16_NextUnichar(const uint16_t ** srcPtr)198 SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
199 SkASSERT(srcPtr && *srcPtr);
200
201 const uint16_t* src = *srcPtr;
202 SkUnichar c = *src++;
203
204 SkASSERT(!SkUTF16_IsLowSurrogate(c));
205 if (SkUTF16_IsHighSurrogate(c)) {
206 unsigned c2 = *src++;
207 SkASSERT(SkUTF16_IsLowSurrogate(c2));
208
209 // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000
210 // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF)
211 c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
212 }
213 *srcPtr = src;
214 return c;
215 }
216
SkUTF16_PrevUnichar(const uint16_t ** srcPtr)217 SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
218 SkASSERT(srcPtr && *srcPtr);
219
220 const uint16_t* src = *srcPtr;
221 SkUnichar c = *--src;
222
223 SkASSERT(!SkUTF16_IsHighSurrogate(c));
224 if (SkUTF16_IsLowSurrogate(c)) {
225 unsigned c2 = *--src;
226 SkASSERT(SkUTF16_IsHighSurrogate(c2));
227 c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
228 }
229 *srcPtr = src;
230 return c;
231 }
232
SkUTF16_FromUnichar(SkUnichar uni,uint16_t dst[])233 size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
234 SkASSERT((unsigned)uni <= 0x10FFFF);
235
236 int extra = (uni > 0xFFFF);
237
238 if (dst) {
239 if (extra) {
240 // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
241 // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
242 dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
243 dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
244
245 SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
246 SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
247 } else {
248 dst[0] = SkToU16(uni);
249 SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
250 SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
251 }
252 }
253 return 1 + extra;
254 }
255
SkUTF16_ToUTF8(const uint16_t utf16[],int numberOf16BitValues,char utf8[])256 size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
257 char utf8[]) {
258 SkASSERT(numberOf16BitValues >= 0);
259 if (numberOf16BitValues <= 0) {
260 return 0;
261 }
262
263 SkASSERT(utf16 != nullptr);
264
265 const uint16_t* stop = utf16 + numberOf16BitValues;
266 size_t size = 0;
267
268 if (utf8 == nullptr) { // just count
269 while (utf16 < stop) {
270 size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
271 }
272 } else {
273 char* start = utf8;
274 while (utf16 < stop) {
275 utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
276 }
277 size = utf8 - start;
278 }
279 return size;
280 }
281