1 #include <stdint.h>
2 #include <stdlib.h>
3
4 #include <harfbuzz-external.h>
5 #include <harfbuzz-impl.h>
6 #include <harfbuzz-shaper.h>
7 #include "harfbuzz-unicode.h"
8
9 #include "tables/grapheme-break-properties.h"
10 #include "tables/mirroring-properties.h"
11 #include "tables/script-properties.h"
12
13 uint32_t
utf16_to_code_point(const uint16_t * chars,size_t len,ssize_t * iter)14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) {
15 const uint16_t v = chars[(*iter)++];
16 if (HB_IsHighSurrogate(v)) {
17 // surrogate pair
18 if (*iter >= len) {
19 // the surrogate is incomplete.
20 return HB_InvalidCodePoint;
21 }
22 const uint16_t v2 = chars[(*iter)++];
23 if (!HB_IsLowSurrogate(v2)) {
24 // invalidate surrogate pair.
25 return HB_InvalidCodePoint;
26 }
27
28 return HB_SurrogateToUcs4(v, v2);
29 }
30
31 if (HB_IsLowSurrogate(v)) {
32 // this isn't a valid code point
33 return HB_InvalidCodePoint;
34 }
35
36 return v;
37 }
38
39 uint32_t
utf16_to_code_point_prev(const uint16_t * chars,size_t len,ssize_t * iter)40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) {
41 const uint16_t v = chars[(*iter)--];
42 if (HB_IsLowSurrogate(v)) {
43 // surrogate pair
44 if (*iter < 0) {
45 // the surrogate is incomplete.
46 return HB_InvalidCodePoint;
47 }
48 const uint16_t v2 = chars[(*iter)--];
49 if (!HB_IsHighSurrogate(v2)) {
50 // invalidate surrogate pair.
51 return HB_InvalidCodePoint;
52 }
53
54 return HB_SurrogateToUcs4(v2, v);
55 }
56
57 if (HB_IsHighSurrogate(v)) {
58 // this isn't a valid code point
59 return HB_InvalidCodePoint;
60 }
61
62 return v;
63 }
64
65 static int
script_property_cmp(const void * vkey,const void * vcandidate)66 script_property_cmp(const void *vkey, const void *vcandidate) {
67 const uint32_t key = (uint32_t) (intptr_t) vkey;
68 const struct script_property *candidate = vcandidate;
69
70 if (key < candidate->range_start) {
71 return -1;
72 } else if (key > candidate->range_end) {
73 return 1;
74 } else {
75 return 0;
76 }
77 }
78
79 HB_Script
code_point_to_script(uint32_t cp)80 code_point_to_script(uint32_t cp) {
81 const void *vprop = bsearch((void *) (intptr_t) cp, script_properties,
82 script_properties_count,
83 sizeof(struct script_property),
84 script_property_cmp);
85 if (!vprop)
86 return HB_Script_Common;
87
88 return ((const struct script_property *) vprop)->script;
89 }
90
91 char
hb_utf16_script_run_next(unsigned * num_code_points,HB_ScriptItem * output,const uint16_t * chars,size_t len,ssize_t * iter)92 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output,
93 const uint16_t *chars, size_t len, ssize_t *iter) {
94 if (*iter == len)
95 return 0;
96
97 output->pos = *iter;
98 const uint32_t init_cp = utf16_to_code_point(chars, len, iter);
99 unsigned cps = 1;
100 if (init_cp == HB_InvalidCodePoint)
101 return 0;
102 const HB_Script init_script = code_point_to_script(init_cp);
103 HB_Script current_script = init_script;
104 output->script = init_script;
105
106 for (;;) {
107 if (*iter == len)
108 break;
109 const ssize_t prev_iter = *iter;
110 const uint32_t cp = utf16_to_code_point(chars, len, iter);
111 if (cp == HB_InvalidCodePoint)
112 return 0;
113 cps++;
114 const HB_Script script = code_point_to_script(cp);
115
116 if (script != current_script) {
117 /* BEGIN android-changed
118 The condition was not correct by doing "a == b == constant"
119 END android-changed */
120 if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) {
121 // If we started off as inherited, we take whatever we can find.
122 output->script = script;
123 current_script = script;
124 continue;
125 } else if (script == HB_Script_Inherited) {
126 continue;
127 } else {
128 *iter = prev_iter;
129 cps--;
130 break;
131 }
132 }
133 }
134
135 if (output->script == HB_Script_Inherited)
136 output->script = HB_Script_Common;
137
138 output->length = *iter - output->pos;
139 if (num_code_points)
140 *num_code_points = cps;
141 return 1;
142 }
143
144 char
hb_utf16_script_run_prev(unsigned * num_code_points,HB_ScriptItem * output,const uint16_t * chars,size_t len,ssize_t * iter)145 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output,
146 const uint16_t *chars, size_t len, ssize_t *iter) {
147 if (*iter == (size_t) -1)
148 return 0;
149
150 const size_t ending_index = *iter;
151 const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter);
152 unsigned cps = 1;
153 if (init_cp == HB_InvalidCodePoint)
154 return 0;
155 const HB_Script init_script = code_point_to_script(init_cp);
156 HB_Script current_script = init_script;
157 output->script = init_script;
158
159 for (;;) {
160 if (*iter < 0)
161 break;
162 const ssize_t prev_iter = *iter;
163 const uint32_t cp = utf16_to_code_point_prev(chars, len, iter);
164 if (cp == HB_InvalidCodePoint)
165 return 0;
166 cps++;
167 const HB_Script script = code_point_to_script(cp);
168
169 if (script != current_script) {
170 if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) {
171 // If we started off as inherited, we take whatever we can find.
172 output->script = script;
173 current_script = script;
174 continue;
175 } else if (script == HB_Script_Inherited) {
176 /* BEGIN android-changed
177 We apply the same fix for Chrome to Android.
178 Chrome team will talk with upsteam about it.
179 Just assume that whatever follows this combining character is within
180 the same script. This is incorrect if you had language1 + combining
181 char + language 2, but that is rare and this code is suspicious
182 anyway.
183 END android-changed */
184 continue;
185 } else {
186 *iter = prev_iter;
187 cps--;
188 break;
189 }
190 }
191 }
192
193 if (output->script == HB_Script_Inherited)
194 output->script = HB_Script_Common;
195
196 output->pos = *iter + 1;
197 output->length = ending_index - *iter;
198 if (num_code_points)
199 *num_code_points = cps;
200 return 1;
201 }
202
203 static int
grapheme_break_property_cmp(const void * vkey,const void * vcandidate)204 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) {
205 const uint32_t key = (uint32_t) (intptr_t) vkey;
206 const struct grapheme_break_property *candidate = vcandidate;
207
208 if (key < candidate->range_start) {
209 return -1;
210 } else if (key > candidate->range_end) {
211 return 1;
212 } else {
213 return 0;
214 }
215 }
216
217 HB_GraphemeClass
HB_GetGraphemeClass(HB_UChar32 ch)218 HB_GetGraphemeClass(HB_UChar32 ch) {
219 const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties,
220 grapheme_break_properties_count,
221 sizeof(struct grapheme_break_property),
222 grapheme_break_property_cmp);
223 if (!vprop)
224 return HB_Grapheme_Other;
225
226 return ((const struct grapheme_break_property *) vprop)->klass;
227 }
228
229 HB_WordClass
HB_GetWordClass(HB_UChar32 ch)230 HB_GetWordClass(HB_UChar32 ch) {
231 abort();
232 return 0;
233 }
234
235 HB_SentenceClass
HB_GetSentenceClass(HB_UChar32 ch)236 HB_GetSentenceClass(HB_UChar32 ch) {
237 abort();
238 return 0;
239 }
240
241 void
HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch,HB_GraphemeClass * gclass,HB_LineBreakClass * breakclass)242 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) {
243 *gclass = HB_GetGraphemeClass(ch);
244 *breakclass = HB_GetLineBreakClass(ch);
245 }
246
247 static int
mirroring_property_cmp(const void * vkey,const void * vcandidate)248 mirroring_property_cmp(const void *vkey, const void *vcandidate) {
249 const uint32_t key = (uint32_t) (intptr_t) vkey;
250 const struct mirroring_property *candidate = vcandidate;
251
252 if (key < candidate->a) {
253 return -1;
254 } else if (key > candidate->a) {
255 return 1;
256 } else {
257 return 0;
258 }
259 }
260
261 HB_UChar16
HB_GetMirroredChar(HB_UChar16 ch)262 HB_GetMirroredChar(HB_UChar16 ch) {
263 const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties,
264 mirroring_properties_count,
265 sizeof(struct mirroring_property),
266 mirroring_property_cmp);
267 if (!mprop)
268 return ch;
269
270 return ((const struct mirroring_property *) mprop)->b;
271 }
272
273 void *
HB_Library_Resolve(const char * library,int version,const char * symbol)274 HB_Library_Resolve(const char *library, int version, const char *symbol) {
275 abort();
276 return NULL;
277 }
278