• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdint.h>
2 #include <stdlib.h>
3 
4 #include <harfbuzz-external.h>
5 #include <harfbuzz-impl.h>
6 #include <harfbuzz-shaper.h>
7 #include "harfbuzz-unicode.h"
8 
9 #include "tables/grapheme-break-properties.h"
10 #include "tables/mirroring-properties.h"
11 #include "tables/script-properties.h"
12 
13 uint32_t
utf16_to_code_point(const uint16_t * chars,size_t len,ssize_t * iter)14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) {
15   const uint16_t v = chars[(*iter)++];
16   if (HB_IsHighSurrogate(v)) {
17     // surrogate pair
18     if (*iter >= len) {
19       // the surrogate is incomplete.
20       return HB_InvalidCodePoint;
21     }
22     const uint16_t v2 = chars[(*iter)++];
23     if (!HB_IsLowSurrogate(v2)) {
24       // invalidate surrogate pair.
25       return HB_InvalidCodePoint;
26     }
27 
28     return HB_SurrogateToUcs4(v, v2);
29   }
30 
31   if (HB_IsLowSurrogate(v)) {
32     // this isn't a valid code point
33     return HB_InvalidCodePoint;
34   }
35 
36   return v;
37 }
38 
39 uint32_t
utf16_to_code_point_prev(const uint16_t * chars,size_t len,ssize_t * iter)40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) {
41   const uint16_t v = chars[(*iter)--];
42   if (HB_IsLowSurrogate(v)) {
43     // surrogate pair
44     if (*iter < 0) {
45       // the surrogate is incomplete.
46       return HB_InvalidCodePoint;
47     }
48     const uint16_t v2 = chars[(*iter)--];
49     if (!HB_IsHighSurrogate(v2)) {
50       // invalidate surrogate pair.
51       return HB_InvalidCodePoint;
52     }
53 
54     return HB_SurrogateToUcs4(v2, v);
55   }
56 
57   if (HB_IsHighSurrogate(v)) {
58     // this isn't a valid code point
59     return HB_InvalidCodePoint;
60   }
61 
62   return v;
63 }
64 
65 static int
script_property_cmp(const void * vkey,const void * vcandidate)66 script_property_cmp(const void *vkey, const void *vcandidate) {
67   const uint32_t key = (uint32_t) (intptr_t) vkey;
68   const struct script_property *candidate = vcandidate;
69 
70   if (key < candidate->range_start) {
71     return -1;
72   } else if (key > candidate->range_end) {
73     return 1;
74   } else {
75     return 0;
76   }
77 }
78 
79 HB_Script
code_point_to_script(uint32_t cp)80 code_point_to_script(uint32_t cp) {
81   const void *vprop = bsearch((void *) (intptr_t) cp, script_properties,
82                               script_properties_count,
83                               sizeof(struct script_property),
84                               script_property_cmp);
85   if (!vprop)
86     return HB_Script_Common;
87 
88   return ((const struct script_property *) vprop)->script;
89 }
90 
91 char
hb_utf16_script_run_next(unsigned * num_code_points,HB_ScriptItem * output,const uint16_t * chars,size_t len,ssize_t * iter)92 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output,
93                          const uint16_t *chars, size_t len, ssize_t *iter) {
94   if (*iter == len)
95     return 0;
96 
97   output->pos = *iter;
98   const uint32_t init_cp = utf16_to_code_point(chars, len, iter);
99   unsigned cps = 1;
100   if (init_cp == HB_InvalidCodePoint)
101     return 0;
102   const HB_Script init_script = code_point_to_script(init_cp);
103   HB_Script current_script = init_script;
104   output->script = init_script;
105 
106   for (;;) {
107     if (*iter == len)
108       break;
109     const ssize_t prev_iter = *iter;
110     const uint32_t cp = utf16_to_code_point(chars, len, iter);
111     if (cp == HB_InvalidCodePoint)
112       return 0;
113     cps++;
114     const HB_Script script = code_point_to_script(cp);
115 
116     if (script != current_script) {
117       if (current_script == init_script == HB_Script_Inherited) {
118         // If we started off as inherited, we take whatever we can find.
119         output->script = script;
120         current_script = script;
121         continue;
122       } else if (script == HB_Script_Inherited) {
123         continue;
124       } else {
125         *iter = prev_iter;
126         cps--;
127         break;
128       }
129     }
130   }
131 
132   if (output->script == HB_Script_Inherited)
133     output->script = HB_Script_Common;
134 
135   output->length = *iter - output->pos;
136   if (num_code_points)
137     *num_code_points = cps;
138   return 1;
139 }
140 
141 char
hb_utf16_script_run_prev(unsigned * num_code_points,HB_ScriptItem * output,const uint16_t * chars,size_t len,ssize_t * iter)142 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output,
143                          const uint16_t *chars, size_t len, ssize_t *iter) {
144   if (*iter == (size_t) -1)
145     return 0;
146 
147   const size_t ending_index = *iter;
148   const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter);
149   unsigned cps = 1;
150   if (init_cp == HB_InvalidCodePoint)
151     return 0;
152   const HB_Script init_script = code_point_to_script(init_cp);
153   HB_Script current_script = init_script;
154   output->script = init_script;
155 
156   for (;;) {
157     if (*iter < 0)
158       break;
159     const ssize_t prev_iter = *iter;
160     const uint32_t cp = utf16_to_code_point_prev(chars, len, iter);
161     if (cp == HB_InvalidCodePoint)
162       return 0;
163     cps++;
164     const HB_Script script = code_point_to_script(cp);
165 
166     if (script != current_script) {
167       if (current_script == init_script == HB_Script_Inherited) {
168         // If we started off as inherited, we take whatever we can find.
169         output->script = script;
170         current_script = script;
171         continue;
172       } else if (script == HB_Script_Inherited) {
173         // Just assume that whatever follows this combining character is within
174         // the same script.  This is incorrect if you had language1 + combining
175         // char + language 2, but that is rare and this code is suspicious
176         // anyway.
177         continue;
178       } else {
179         *iter = prev_iter;
180         cps--;
181         break;
182       }
183     }
184   }
185 
186   if (output->script == HB_Script_Inherited)
187     output->script = HB_Script_Common;
188 
189   output->pos = *iter + 1;
190   output->length = ending_index - *iter;
191   if (num_code_points)
192     *num_code_points = cps;
193   return 1;
194 }
195 
196 static int
grapheme_break_property_cmp(const void * vkey,const void * vcandidate)197 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) {
198   const uint32_t key = (uint32_t) (intptr_t) vkey;
199   const struct grapheme_break_property *candidate = vcandidate;
200 
201   if (key < candidate->range_start) {
202     return -1;
203   } else if (key > candidate->range_end) {
204     return 1;
205   } else {
206     return 0;
207   }
208 }
209 
210 HB_GraphemeClass
HB_GetGraphemeClass(HB_UChar32 ch)211 HB_GetGraphemeClass(HB_UChar32 ch) {
212   const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties,
213                               grapheme_break_properties_count,
214                               sizeof(struct grapheme_break_property),
215                               grapheme_break_property_cmp);
216   if (!vprop)
217     return HB_Grapheme_Other;
218 
219   return ((const struct grapheme_break_property *) vprop)->klass;
220 }
221 
222 HB_WordClass
HB_GetWordClass(HB_UChar32 ch)223 HB_GetWordClass(HB_UChar32 ch) {
224   abort();
225   return 0;
226 }
227 
228 HB_SentenceClass
HB_GetSentenceClass(HB_UChar32 ch)229 HB_GetSentenceClass(HB_UChar32 ch) {
230   abort();
231   return 0;
232 }
233 
234 void
HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch,HB_GraphemeClass * gclass,HB_LineBreakClass * breakclass)235 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) {
236   *gclass = HB_GetGraphemeClass(ch);
237   *breakclass = HB_GetLineBreakClass(ch);
238 }
239 
240 static int
mirroring_property_cmp(const void * vkey,const void * vcandidate)241 mirroring_property_cmp(const void *vkey, const void *vcandidate) {
242   const uint32_t key = (uint32_t) (intptr_t) vkey;
243   const struct mirroring_property *candidate = vcandidate;
244 
245   if (key < candidate->a) {
246     return -1;
247   } else if (key > candidate->a) {
248     return 1;
249   } else {
250     return 0;
251   }
252 }
253 
254 HB_UChar16
HB_GetMirroredChar(HB_UChar16 ch)255 HB_GetMirroredChar(HB_UChar16 ch) {
256   const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties,
257                               mirroring_properties_count,
258                               sizeof(struct mirroring_property),
259                               mirroring_property_cmp);
260   if (!mprop)
261     return ch;
262 
263   return ((const struct mirroring_property *) mprop)->b;
264 }
265 
266 void *
HB_Library_Resolve(const char * library,const char * symbol)267 HB_Library_Resolve(const char *library, const char *symbol) {
268   abort();
269   return NULL;
270 }
271 
272 void *
HB_TextCodecForMib(int mib)273 HB_TextCodecForMib(int mib) {
274   abort();
275   return NULL;
276 }
277 
278 char *
HB_TextCodec_ConvertFromUnicode(void * codec,const HB_UChar16 * unicode,hb_uint32 length,hb_uint32 * outputLength)279 HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength) {
280   abort();
281   return NULL;
282 }
283 
284 void
HB_TextCodec_FreeResult(char * v)285 HB_TextCodec_FreeResult(char *v) {
286   abort();
287 }
288