1 #include <stdint.h>
2 #include <stdlib.h>
3
4 #include <harfbuzz-external.h>
5 #include <harfbuzz-impl.h>
6 #include <harfbuzz-shaper.h>
7 #include "harfbuzz-unicode.h"
8
9 #include "tables/grapheme-break-properties.h"
10 #include "tables/mirroring-properties.h"
11 #include "tables/script-properties.h"
12
13 uint32_t
utf16_to_code_point(const uint16_t * chars,size_t len,ssize_t * iter)14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) {
15 const uint16_t v = chars[(*iter)++];
16 if (HB_IsHighSurrogate(v)) {
17 // surrogate pair
18 if (*iter >= len) {
19 // the surrogate is incomplete.
20 return HB_InvalidCodePoint;
21 }
22 const uint16_t v2 = chars[(*iter)++];
23 if (!HB_IsLowSurrogate(v2)) {
24 // invalidate surrogate pair.
25 return HB_InvalidCodePoint;
26 }
27
28 return HB_SurrogateToUcs4(v, v2);
29 }
30
31 if (HB_IsLowSurrogate(v)) {
32 // this isn't a valid code point
33 return HB_InvalidCodePoint;
34 }
35
36 return v;
37 }
38
39 uint32_t
utf16_to_code_point_prev(const uint16_t * chars,size_t len,ssize_t * iter)40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) {
41 const uint16_t v = chars[(*iter)--];
42 if (HB_IsLowSurrogate(v)) {
43 // surrogate pair
44 if (*iter < 0) {
45 // the surrogate is incomplete.
46 return HB_InvalidCodePoint;
47 }
48 const uint16_t v2 = chars[(*iter)--];
49 if (!HB_IsHighSurrogate(v2)) {
50 // invalidate surrogate pair.
51 return HB_InvalidCodePoint;
52 }
53
54 return HB_SurrogateToUcs4(v2, v);
55 }
56
57 if (HB_IsHighSurrogate(v)) {
58 // this isn't a valid code point
59 return HB_InvalidCodePoint;
60 }
61
62 return v;
63 }
64
65 static int
script_property_cmp(const void * vkey,const void * vcandidate)66 script_property_cmp(const void *vkey, const void *vcandidate) {
67 const uint32_t key = (uint32_t) (intptr_t) vkey;
68 const struct script_property *candidate = vcandidate;
69
70 if (key < candidate->range_start) {
71 return -1;
72 } else if (key > candidate->range_end) {
73 return 1;
74 } else {
75 return 0;
76 }
77 }
78
79 HB_Script
code_point_to_script(uint32_t cp)80 code_point_to_script(uint32_t cp) {
81 const void *vprop = bsearch((void *) (intptr_t) cp, script_properties,
82 script_properties_count,
83 sizeof(struct script_property),
84 script_property_cmp);
85 if (!vprop)
86 return HB_Script_Common;
87
88 return ((const struct script_property *) vprop)->script;
89 }
90
91 char
hb_utf16_script_run_next(unsigned * num_code_points,HB_ScriptItem * output,const uint16_t * chars,size_t len,ssize_t * iter)92 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output,
93 const uint16_t *chars, size_t len, ssize_t *iter) {
94 if (*iter == len)
95 return 0;
96
97 output->pos = *iter;
98 const uint32_t init_cp = utf16_to_code_point(chars, len, iter);
99 unsigned cps = 1;
100 if (init_cp == HB_InvalidCodePoint)
101 return 0;
102 const HB_Script init_script = code_point_to_script(init_cp);
103 HB_Script current_script = init_script;
104 output->script = init_script;
105
106 for (;;) {
107 if (*iter == len)
108 break;
109 const ssize_t prev_iter = *iter;
110 const uint32_t cp = utf16_to_code_point(chars, len, iter);
111 if (cp == HB_InvalidCodePoint)
112 return 0;
113 cps++;
114 const HB_Script script = code_point_to_script(cp);
115
116 if (script != current_script) {
117 if (current_script == init_script == HB_Script_Inherited) {
118 // If we started off as inherited, we take whatever we can find.
119 output->script = script;
120 current_script = script;
121 continue;
122 } else if (script == HB_Script_Inherited) {
123 continue;
124 } else {
125 *iter = prev_iter;
126 cps--;
127 break;
128 }
129 }
130 }
131
132 if (output->script == HB_Script_Inherited)
133 output->script = HB_Script_Common;
134
135 output->length = *iter - output->pos;
136 if (num_code_points)
137 *num_code_points = cps;
138 return 1;
139 }
140
141 char
hb_utf16_script_run_prev(unsigned * num_code_points,HB_ScriptItem * output,const uint16_t * chars,size_t len,ssize_t * iter)142 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output,
143 const uint16_t *chars, size_t len, ssize_t *iter) {
144 if (*iter == (size_t) -1)
145 return 0;
146
147 const size_t ending_index = *iter;
148 const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter);
149 unsigned cps = 1;
150 if (init_cp == HB_InvalidCodePoint)
151 return 0;
152 const HB_Script init_script = code_point_to_script(init_cp);
153 HB_Script current_script = init_script;
154 output->script = init_script;
155
156 for (;;) {
157 if (*iter < 0)
158 break;
159 const ssize_t prev_iter = *iter;
160 const uint32_t cp = utf16_to_code_point_prev(chars, len, iter);
161 if (cp == HB_InvalidCodePoint)
162 return 0;
163 cps++;
164 const HB_Script script = code_point_to_script(cp);
165
166 if (script != current_script) {
167 if (current_script == init_script == HB_Script_Inherited) {
168 // If we started off as inherited, we take whatever we can find.
169 output->script = script;
170 current_script = script;
171 continue;
172 } else if (script == HB_Script_Inherited) {
173 /* BEGIN android-changed
174 We apply the same fix for Chrome to Android.
175 Chrome team will talk with upsteam about it.
176 Just assume that whatever follows this combining character is within
177 the same script. This is incorrect if you had language1 + combining
178 char + language 2, but that is rare and this code is suspicious
179 anyway.
180 END android-changed */
181 continue;
182 } else {
183 *iter = prev_iter;
184 cps--;
185 break;
186 }
187 }
188 }
189
190 if (output->script == HB_Script_Inherited)
191 output->script = HB_Script_Common;
192
193 output->pos = *iter + 1;
194 output->length = ending_index - *iter;
195 if (num_code_points)
196 *num_code_points = cps;
197 return 1;
198 }
199
200 static int
grapheme_break_property_cmp(const void * vkey,const void * vcandidate)201 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) {
202 const uint32_t key = (uint32_t) (intptr_t) vkey;
203 const struct grapheme_break_property *candidate = vcandidate;
204
205 if (key < candidate->range_start) {
206 return -1;
207 } else if (key > candidate->range_end) {
208 return 1;
209 } else {
210 return 0;
211 }
212 }
213
214 HB_GraphemeClass
HB_GetGraphemeClass(HB_UChar32 ch)215 HB_GetGraphemeClass(HB_UChar32 ch) {
216 const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties,
217 grapheme_break_properties_count,
218 sizeof(struct grapheme_break_property),
219 grapheme_break_property_cmp);
220 if (!vprop)
221 return HB_Grapheme_Other;
222
223 return ((const struct grapheme_break_property *) vprop)->klass;
224 }
225
226 HB_WordClass
HB_GetWordClass(HB_UChar32 ch)227 HB_GetWordClass(HB_UChar32 ch) {
228 abort();
229 return 0;
230 }
231
232 HB_SentenceClass
HB_GetSentenceClass(HB_UChar32 ch)233 HB_GetSentenceClass(HB_UChar32 ch) {
234 abort();
235 return 0;
236 }
237
238 void
HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch,HB_GraphemeClass * gclass,HB_LineBreakClass * breakclass)239 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) {
240 *gclass = HB_GetGraphemeClass(ch);
241 *breakclass = HB_GetLineBreakClass(ch);
242 }
243
244 static int
mirroring_property_cmp(const void * vkey,const void * vcandidate)245 mirroring_property_cmp(const void *vkey, const void *vcandidate) {
246 const uint32_t key = (uint32_t) (intptr_t) vkey;
247 const struct mirroring_property *candidate = vcandidate;
248
249 if (key < candidate->a) {
250 return -1;
251 } else if (key > candidate->a) {
252 return 1;
253 } else {
254 return 0;
255 }
256 }
257
258 HB_UChar16
HB_GetMirroredChar(HB_UChar16 ch)259 HB_GetMirroredChar(HB_UChar16 ch) {
260 const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties,
261 mirroring_properties_count,
262 sizeof(struct mirroring_property),
263 mirroring_property_cmp);
264 if (!mprop)
265 return ch;
266
267 return ((const struct mirroring_property *) mprop)->b;
268 }
269
270 void *
HB_Library_Resolve(const char * library,int version,const char * symbol)271 HB_Library_Resolve(const char *library, int version, const char *symbol) {
272 abort();
273 return NULL;
274 }
275