1 /*
2 * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
3 *
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include "hb.hh"
18 #include "hb-unicode.hh"
19 #include "hb-machinery.hh"
20
21 #include "hb-ucd-table.hh"
22
23 static hb_unicode_combining_class_t
hb_ucd_combining_class(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t unicode,void * user_data HB_UNUSED)24 hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
25 hb_codepoint_t unicode,
26 void *user_data HB_UNUSED)
27 {
28 return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode);
29 }
30
31 static hb_unicode_general_category_t
hb_ucd_general_category(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t unicode,void * user_data HB_UNUSED)32 hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
33 hb_codepoint_t unicode,
34 void *user_data HB_UNUSED)
35 {
36 return (hb_unicode_general_category_t) _hb_ucd_gc (unicode);
37 }
38
39 static hb_codepoint_t
hb_ucd_mirroring(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t unicode,void * user_data HB_UNUSED)40 hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
41 hb_codepoint_t unicode,
42 void *user_data HB_UNUSED)
43 {
44 return unicode + _hb_ucd_bmg (unicode);
45 }
46
47 static hb_script_t
hb_ucd_script(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t unicode,void * user_data HB_UNUSED)48 hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
49 hb_codepoint_t unicode,
50 void *user_data HB_UNUSED)
51 {
52 return _hb_ucd_sc_map[_hb_ucd_sc (unicode)];
53 }
54
55
56 #define SBASE 0xAC00u
57 #define LBASE 0x1100u
58 #define VBASE 0x1161u
59 #define TBASE 0x11A7u
60 #define SCOUNT 11172u
61 #define LCOUNT 19u
62 #define VCOUNT 21u
63 #define TCOUNT 28u
64 #define NCOUNT (VCOUNT * TCOUNT)
65
66 static inline bool
_hb_ucd_decompose_hangul(hb_codepoint_t ab,hb_codepoint_t * a,hb_codepoint_t * b)67 _hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b)
68 {
69 unsigned si = ab - SBASE;
70
71 if (si >= SCOUNT)
72 return false;
73
74 if (si % TCOUNT)
75 {
76 /* LV,T */
77 *a = SBASE + (si / TCOUNT) * TCOUNT;
78 *b = TBASE + (si % TCOUNT);
79 return true;
80 } else {
81 /* L,V */
82 *a = LBASE + (si / NCOUNT);
83 *b = VBASE + (si % NCOUNT) / TCOUNT;
84 return true;
85 }
86 }
87
88 static inline bool
_hb_ucd_compose_hangul(hb_codepoint_t a,hb_codepoint_t b,hb_codepoint_t * ab)89 _hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab)
90 {
91 if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) &&
92 !((a - SBASE) % TCOUNT))
93 {
94 /* LV,T */
95 *ab = a + (b - TBASE);
96 return true;
97 }
98 else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT))
99 {
100 /* L,V */
101 int li = a - LBASE;
102 int vi = b - VBASE;
103 *ab = SBASE + li * NCOUNT + vi * TCOUNT;
104 return true;
105 }
106 else
107 return false;
108 }
109
110 static int
_cmp_pair(const void * _key,const void * _item)111 _cmp_pair (const void *_key, const void *_item)
112 {
113 uint64_t& a = * (uint64_t*) _key;
114 uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0);
115
116 return a < b ? -1 : a > b ? +1 : 0;
117 }
118 static int
_cmp_pair_11_7_14(const void * _key,const void * _item)119 _cmp_pair_11_7_14 (const void *_key, const void *_item)
120 {
121 uint32_t& a = * (uint32_t*) _key;
122 uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0);
123
124 return a < b ? -1 : a > b ? +1 : 0;
125 }
126
127 static hb_bool_t
hb_ucd_compose(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t a,hb_codepoint_t b,hb_codepoint_t * ab,void * user_data HB_UNUSED)128 hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
129 hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab,
130 void *user_data HB_UNUSED)
131 {
132 if (_hb_ucd_compose_hangul (a, b, ab)) return true;
133
134 hb_codepoint_t u = 0;
135
136 if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u)
137 {
138 uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0);
139 uint32_t *v = (uint32_t*) hb_bsearch (&k, _hb_ucd_dm2_u32_map,
140 ARRAY_LENGTH (_hb_ucd_dm2_u32_map),
141 sizeof (*_hb_ucd_dm2_u32_map),
142 _cmp_pair_11_7_14);
143 if (likely (!v)) return false;
144 u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v);
145 }
146 else
147 {
148 uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0);
149 uint64_t *v = (uint64_t*) hb_bsearch (&k, _hb_ucd_dm2_u64_map,
150 ARRAY_LENGTH (_hb_ucd_dm2_u64_map),
151 sizeof (*_hb_ucd_dm2_u64_map),
152 _cmp_pair);
153 if (likely (!v)) return false;
154 u = HB_CODEPOINT_DECODE3_3 (*v);
155 }
156
157 if (unlikely (!u)) return false;
158 *ab = u;
159 return true;
160 }
161
162 static hb_bool_t
hb_ucd_decompose(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t ab,hb_codepoint_t * a,hb_codepoint_t * b,void * user_data HB_UNUSED)163 hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
164 hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b,
165 void *user_data HB_UNUSED)
166 {
167 if (_hb_ucd_decompose_hangul (ab, a, b)) return true;
168
169 unsigned i = _hb_ucd_dm (ab);
170
171 if (likely (!i)) return false;
172 i--;
173
174 if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map))
175 {
176 if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map))
177 *a = _hb_ucd_dm1_p0_map[i];
178 else
179 {
180 i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map);
181 *a = 0x20000 | _hb_ucd_dm1_p2_map[i];
182 }
183 *b = 0;
184 return true;
185 }
186 i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map);
187
188 if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map))
189 {
190 uint32_t v = _hb_ucd_dm2_u32_map[i];
191 *a = HB_CODEPOINT_DECODE3_11_7_14_1 (v);
192 *b = HB_CODEPOINT_DECODE3_11_7_14_2 (v);
193 return true;
194 }
195 i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map);
196
197 uint64_t v = _hb_ucd_dm2_u64_map[i];
198 *a = HB_CODEPOINT_DECODE3_1 (v);
199 *b = HB_CODEPOINT_DECODE3_2 (v);
200 return true;
201 }
202
203
204 #if HB_USE_ATEXIT
205 static void free_static_ucd_funcs ();
206 #endif
207
208 static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t>
209 {
createhb_ucd_unicode_funcs_lazy_loader_t210 static hb_unicode_funcs_t *create ()
211 {
212 hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
213
214 hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr);
215 hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr);
216 hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr);
217 hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr);
218 hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr);
219 hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr);
220
221 hb_unicode_funcs_make_immutable (funcs);
222
223 #if HB_USE_ATEXIT
224 atexit (free_static_ucd_funcs);
225 #endif
226
227 return funcs;
228 }
229 } static_ucd_funcs;
230
231 #if HB_USE_ATEXIT
232 static
free_static_ucd_funcs()233 void free_static_ucd_funcs ()
234 {
235 static_ucd_funcs.free_instance ();
236 }
237 #endif
238
239 hb_unicode_funcs_t *
hb_ucd_get_unicode_funcs()240 hb_ucd_get_unicode_funcs ()
241 {
242 #ifdef HB_NO_UCD
243 return hb_unicode_funcs_get_empty ();
244 #endif
245 return static_ucd_funcs.get_unconst ();
246 }
247