1 #include <stdint.h>
2 #include <stdlib.h>
3
4 #include <harfbuzz-external.h>
5 #include <harfbuzz-impl.h>
6 #include <harfbuzz-shaper.h>
7 #include "harfbuzz-unicode.h"
8
9 #include "tables/grapheme-break-properties.h"
10 #include "tables/mirroring-properties.h"
11 #include "tables/script-properties.h"
12
13 uint32_t
utf16_to_code_point(const uint16_t * chars,size_t len,ssize_t * iter)14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) {
15 const uint16_t v = chars[(*iter)++];
16 if (HB_IsHighSurrogate(v)) {
17 // surrogate pair
18 if (*iter >= len) {
19 // the surrogate is incomplete.
20 return HB_InvalidCodePoint;
21 }
22 const uint16_t v2 = chars[(*iter)++];
23 if (!HB_IsLowSurrogate(v2)) {
24 // invalidate surrogate pair.
25 return HB_InvalidCodePoint;
26 }
27
28 return HB_SurrogateToUcs4(v, v2);
29 }
30
31 if (HB_IsLowSurrogate(v)) {
32 // this isn't a valid code point
33 return HB_InvalidCodePoint;
34 }
35
36 return v;
37 }
38
39 uint32_t
utf16_to_code_point_prev(const uint16_t * chars,size_t len,ssize_t * iter)40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) {
41 const uint16_t v = chars[(*iter)--];
42 if (HB_IsLowSurrogate(v)) {
43 // surrogate pair
44 if (*iter < 0) {
45 // the surrogate is incomplete.
46 return HB_InvalidCodePoint;
47 }
48 const uint16_t v2 = chars[(*iter)--];
49 if (!HB_IsHighSurrogate(v2)) {
50 // invalidate surrogate pair.
51 return HB_InvalidCodePoint;
52 }
53
54 return HB_SurrogateToUcs4(v2, v);
55 }
56
57 if (HB_IsHighSurrogate(v)) {
58 // this isn't a valid code point
59 return HB_InvalidCodePoint;
60 }
61
62 return v;
63 }
64
65 static int
script_property_cmp(const void * vkey,const void * vcandidate)66 script_property_cmp(const void *vkey, const void *vcandidate) {
67 const uint32_t key = (uint32_t) (intptr_t) vkey;
68 const struct script_property *candidate = vcandidate;
69
70 if (key < candidate->range_start) {
71 return -1;
72 } else if (key > candidate->range_end) {
73 return 1;
74 } else {
75 return 0;
76 }
77 }
78
79 HB_Script
code_point_to_script(uint32_t cp)80 code_point_to_script(uint32_t cp) {
81 /* BEGIN android-changed
82 For the purpose of aggregating script runs together, we treat space
83 as belonging to the same script as surrounding characters. This is a
84 performance optimization to keep the number of runs down. */
85 if (cp == ' ') return HB_Script_Inherited;
86 /* END android-changed */
87
88 const void *vprop = bsearch((void *) (intptr_t) cp, script_properties,
89 script_properties_count,
90 sizeof(struct script_property),
91 script_property_cmp);
92 if (!vprop)
93 return HB_Script_Common;
94
95 return ((const struct script_property *) vprop)->script;
96 }
97
98 char
hb_utf16_script_run_next(unsigned * num_code_points,HB_ScriptItem * output,const uint16_t * chars,size_t len,ssize_t * iter)99 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output,
100 const uint16_t *chars, size_t len, ssize_t *iter) {
101 if (*iter == len)
102 return 0;
103
104 output->pos = *iter;
105 const uint32_t init_cp = utf16_to_code_point(chars, len, iter);
106 unsigned cps = 1;
107 if (init_cp == HB_InvalidCodePoint)
108 return 0;
109 const HB_Script init_script = code_point_to_script(init_cp);
110 HB_Script current_script = init_script;
111 output->script = init_script;
112
113 for (;;) {
114 if (*iter == len)
115 break;
116 const ssize_t prev_iter = *iter;
117 const uint32_t cp = utf16_to_code_point(chars, len, iter);
118 if (cp == HB_InvalidCodePoint)
119 return 0;
120 cps++;
121 const HB_Script script = code_point_to_script(cp);
122
123 if (script != current_script) {
124 /* BEGIN android-changed
125 The condition was not correct by doing "a == b == constant"
126 END android-changed */
127 if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) {
128 // If we started off as inherited, we take whatever we can find.
129 output->script = script;
130 current_script = script;
131 continue;
132 } else if (script == HB_Script_Inherited) {
133 continue;
134 } else {
135 *iter = prev_iter;
136 cps--;
137 break;
138 }
139 }
140 }
141
142 if (output->script == HB_Script_Inherited)
143 output->script = HB_Script_Common;
144
145 output->length = *iter - output->pos;
146 if (num_code_points)
147 *num_code_points = cps;
148 return 1;
149 }
150
151 char
hb_utf16_script_run_prev(unsigned * num_code_points,HB_ScriptItem * output,const uint16_t * chars,size_t len,ssize_t * iter)152 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output,
153 const uint16_t *chars, size_t len, ssize_t *iter) {
154 if (*iter == (size_t) -1)
155 return 0;
156
157 const size_t ending_index = *iter;
158 const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter);
159 unsigned cps = 1;
160 if (init_cp == HB_InvalidCodePoint)
161 return 0;
162 const HB_Script init_script = code_point_to_script(init_cp);
163 HB_Script current_script = init_script;
164 output->script = init_script;
165
166 for (;;) {
167 if (*iter < 0)
168 break;
169 const ssize_t prev_iter = *iter;
170 const uint32_t cp = utf16_to_code_point_prev(chars, len, iter);
171 if (cp == HB_InvalidCodePoint)
172 return 0;
173 cps++;
174 const HB_Script script = code_point_to_script(cp);
175
176 if (script != current_script) {
177 if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) {
178 // If we started off as inherited, we take whatever we can find.
179 output->script = script;
180 current_script = script;
181 continue;
182 } else if (script == HB_Script_Inherited) {
183 /* BEGIN android-changed
184 We apply the same fix for Chrome to Android.
185 Chrome team will talk with upsteam about it.
186 Just assume that whatever follows this combining character is within
187 the same script. This is incorrect if you had language1 + combining
188 char + language 2, but that is rare and this code is suspicious
189 anyway.
190 END android-changed */
191 continue;
192 } else {
193 *iter = prev_iter;
194 cps--;
195 break;
196 }
197 }
198 }
199
200 if (output->script == HB_Script_Inherited)
201 output->script = HB_Script_Common;
202
203 output->pos = *iter + 1;
204 output->length = ending_index - *iter;
205 if (num_code_points)
206 *num_code_points = cps;
207 return 1;
208 }
209
210 static int
grapheme_break_property_cmp(const void * vkey,const void * vcandidate)211 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) {
212 const uint32_t key = (uint32_t) (intptr_t) vkey;
213 const struct grapheme_break_property *candidate = vcandidate;
214
215 if (key < candidate->range_start) {
216 return -1;
217 } else if (key > candidate->range_end) {
218 return 1;
219 } else {
220 return 0;
221 }
222 }
223
224 HB_GraphemeClass
HB_GetGraphemeClass(HB_UChar32 ch)225 HB_GetGraphemeClass(HB_UChar32 ch) {
226 const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties,
227 grapheme_break_properties_count,
228 sizeof(struct grapheme_break_property),
229 grapheme_break_property_cmp);
230 if (!vprop)
231 return HB_Grapheme_Other;
232
233 return ((const struct grapheme_break_property *) vprop)->klass;
234 }
235
236 HB_WordClass
HB_GetWordClass(HB_UChar32 ch)237 HB_GetWordClass(HB_UChar32 ch) {
238 abort();
239 return 0;
240 }
241
242 HB_SentenceClass
HB_GetSentenceClass(HB_UChar32 ch)243 HB_GetSentenceClass(HB_UChar32 ch) {
244 abort();
245 return 0;
246 }
247
248 void
HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch,HB_GraphemeClass * gclass,HB_LineBreakClass * breakclass)249 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) {
250 *gclass = HB_GetGraphemeClass(ch);
251 *breakclass = HB_GetLineBreakClass(ch);
252 }
253
254 static int
mirroring_property_cmp(const void * vkey,const void * vcandidate)255 mirroring_property_cmp(const void *vkey, const void *vcandidate) {
256 const uint32_t key = (uint32_t) (intptr_t) vkey;
257 const struct mirroring_property *candidate = vcandidate;
258
259 if (key < candidate->a) {
260 return -1;
261 } else if (key > candidate->a) {
262 return 1;
263 } else {
264 return 0;
265 }
266 }
267
268 HB_UChar16
HB_GetMirroredChar(HB_UChar16 ch)269 HB_GetMirroredChar(HB_UChar16 ch) {
270 const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties,
271 mirroring_properties_count,
272 sizeof(struct mirroring_property),
273 mirroring_property_cmp);
274 if (!mprop)
275 return ch;
276
277 return ((const struct mirroring_property *) mprop)->b;
278 }
279
280 void *
HB_Library_Resolve(const char * library,int version,const char * symbol)281 HB_Library_Resolve(const char *library, int version, const char *symbol) {
282 abort();
283 return NULL;
284 }
285