• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2009  Red Hat, Inc.
3  * Copyright © 2011  Codethink Limited
4  * Copyright © 2011,2012  Google, Inc.
5  *
6  *  This is part of HarfBuzz, a text shaping library.
7  *
8  * Permission is hereby granted, without written agreement and without
9  * license or royalty fees, to use, copy, modify, and distribute this
10  * software and its documentation for any purpose, provided that the
11  * above copyright notice and the following two paragraphs appear in
12  * all copies of this software.
13  *
14  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18  * DAMAGE.
19  *
20  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
23  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25  *
26  * Red Hat Author(s): Behdad Esfahbod
27  * Codethink Author(s): Ryan Lortie
28  * Google Author(s): Behdad Esfahbod
29  */
30 
31 #ifndef HB_H_IN
32 #error "Include <hb.h> instead."
33 #endif
34 
35 #ifndef HB_UNICODE_H
36 #define HB_UNICODE_H
37 
38 #include "hb-common.h"
39 
40 HB_BEGIN_DECLS
41 
42 
43 /* hb_unicode_general_category_t */
44 
45 /* Unicode Character Database property: General_Category (gc) */
46 typedef enum
47 {
48   HB_UNICODE_GENERAL_CATEGORY_CONTROL,			/* Cc */
49   HB_UNICODE_GENERAL_CATEGORY_FORMAT,			/* Cf */
50   HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,		/* Cn */
51   HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE,		/* Co */
52   HB_UNICODE_GENERAL_CATEGORY_SURROGATE,		/* Cs */
53   HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,		/* Ll */
54   HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER,		/* Lm */
55   HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,		/* Lo */
56   HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,		/* Lt */
57   HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER,		/* Lu */
58   HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK,		/* Mc */
59   HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK,		/* Me */
60   HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,		/* Mn */
61   HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER,		/* Nd */
62   HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER,		/* Nl */
63   HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER,		/* No */
64   HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION,	/* Pc */
65   HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION,		/* Pd */
66   HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION,	/* Pe */
67   HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION,	/* Pf */
68   HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION,	/* Pi */
69   HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION,	/* Po */
70   HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION,		/* Ps */
71   HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL,		/* Sc */
72   HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL,		/* Sk */
73   HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL,		/* Sm */
74   HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL,		/* So */
75   HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR,		/* Zl */
76   HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR,	/* Zp */
77   HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR		/* Zs */
78 } hb_unicode_general_category_t;
79 
80 /* hb_unicode_combining_class_t */
81 
82 /* Note: newer versions of Unicode may add new values.  Clients should be ready to handle
83  * any value in the 0..254 range being returned from hb_unicode_combining_class().
84  */
85 
86 /* Unicode Character Database property: Canonical_Combining_Class (ccc) */
87 typedef enum
88 {
89   HB_UNICODE_COMBINING_CLASS_NOT_REORDERED	= 0,
90   HB_UNICODE_COMBINING_CLASS_OVERLAY		= 1,
91   HB_UNICODE_COMBINING_CLASS_NUKTA		= 7,
92   HB_UNICODE_COMBINING_CLASS_KANA_VOICING	= 8,
93   HB_UNICODE_COMBINING_CLASS_VIRAMA		= 9,
94 
95   /* Hebrew */
96   HB_UNICODE_COMBINING_CLASS_CCC10	=  10,
97   HB_UNICODE_COMBINING_CLASS_CCC11	=  11,
98   HB_UNICODE_COMBINING_CLASS_CCC12	=  12,
99   HB_UNICODE_COMBINING_CLASS_CCC13	=  13,
100   HB_UNICODE_COMBINING_CLASS_CCC14	=  14,
101   HB_UNICODE_COMBINING_CLASS_CCC15	=  15,
102   HB_UNICODE_COMBINING_CLASS_CCC16	=  16,
103   HB_UNICODE_COMBINING_CLASS_CCC17	=  17,
104   HB_UNICODE_COMBINING_CLASS_CCC18	=  18,
105   HB_UNICODE_COMBINING_CLASS_CCC19	=  19,
106   HB_UNICODE_COMBINING_CLASS_CCC20	=  20,
107   HB_UNICODE_COMBINING_CLASS_CCC21	=  21,
108   HB_UNICODE_COMBINING_CLASS_CCC22	=  22,
109   HB_UNICODE_COMBINING_CLASS_CCC23	=  23,
110   HB_UNICODE_COMBINING_CLASS_CCC24	=  24,
111   HB_UNICODE_COMBINING_CLASS_CCC25	=  25,
112   HB_UNICODE_COMBINING_CLASS_CCC26	=  26,
113 
114   /* Arabic */
115   HB_UNICODE_COMBINING_CLASS_CCC27	=  27,
116   HB_UNICODE_COMBINING_CLASS_CCC28	=  28,
117   HB_UNICODE_COMBINING_CLASS_CCC29	=  29,
118   HB_UNICODE_COMBINING_CLASS_CCC30	=  30,
119   HB_UNICODE_COMBINING_CLASS_CCC31	=  31,
120   HB_UNICODE_COMBINING_CLASS_CCC32	=  32,
121   HB_UNICODE_COMBINING_CLASS_CCC33	=  33,
122   HB_UNICODE_COMBINING_CLASS_CCC34	=  34,
123   HB_UNICODE_COMBINING_CLASS_CCC35	=  35,
124 
125   /* Syriac */
126   HB_UNICODE_COMBINING_CLASS_CCC36	=  36,
127 
128   /* Telugu */
129   HB_UNICODE_COMBINING_CLASS_CCC84	=  84,
130   HB_UNICODE_COMBINING_CLASS_CCC91	=  91,
131 
132   /* Thai */
133   HB_UNICODE_COMBINING_CLASS_CCC103	= 103,
134   HB_UNICODE_COMBINING_CLASS_CCC107	= 107,
135 
136   /* Lao */
137   HB_UNICODE_COMBINING_CLASS_CCC118	= 118,
138   HB_UNICODE_COMBINING_CLASS_CCC122	= 122,
139 
140   /* Tibetan */
141   HB_UNICODE_COMBINING_CLASS_CCC129	= 129,
142   HB_UNICODE_COMBINING_CLASS_CCC130	= 130,
143   HB_UNICODE_COMBINING_CLASS_CCC133	= 132,
144 
145 
146   HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT	= 200,
147   HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW		= 202,
148   HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE		= 214,
149   HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT	= 216,
150   HB_UNICODE_COMBINING_CLASS_BELOW_LEFT			= 218,
151   HB_UNICODE_COMBINING_CLASS_BELOW			= 220,
152   HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT		= 222,
153   HB_UNICODE_COMBINING_CLASS_LEFT			= 224,
154   HB_UNICODE_COMBINING_CLASS_RIGHT			= 226,
155   HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT			= 228,
156   HB_UNICODE_COMBINING_CLASS_ABOVE			= 230,
157   HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT		= 232,
158   HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW		= 233,
159   HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE		= 234,
160 
161   HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT		= 240,
162 
163   HB_UNICODE_COMBINING_CLASS_INVALID	= 255
164 } hb_unicode_combining_class_t;
165 
166 
167 /*
168  * hb_unicode_funcs_t
169  */
170 
171 typedef struct hb_unicode_funcs_t hb_unicode_funcs_t;
172 
173 
174 /*
175  * just give me the best implementation you've got there.
176  */
177 hb_unicode_funcs_t *
178 hb_unicode_funcs_get_default (void);
179 
180 
181 hb_unicode_funcs_t *
182 hb_unicode_funcs_create (hb_unicode_funcs_t *parent);
183 
184 hb_unicode_funcs_t *
185 hb_unicode_funcs_get_empty (void);
186 
187 hb_unicode_funcs_t *
188 hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs);
189 
190 void
191 hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs);
192 
193 hb_bool_t
194 hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
195 			        hb_user_data_key_t *key,
196 			        void *              data,
197 			        hb_destroy_func_t   destroy,
198 				hb_bool_t           replace);
199 
200 
201 void *
202 hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
203 			        hb_user_data_key_t *key);
204 
205 
206 void
207 hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs);
208 
209 hb_bool_t
210 hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs);
211 
212 hb_unicode_funcs_t *
213 hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
214 
215 
216 /*
217  * funcs
218  */
219 
220 /* typedefs */
221 
222 typedef hb_unicode_combining_class_t	(*hb_unicode_combining_class_func_t)	(hb_unicode_funcs_t *ufuncs,
223 										 hb_codepoint_t      unicode,
224 										 void               *user_data);
225 typedef unsigned int			(*hb_unicode_eastasian_width_func_t)	(hb_unicode_funcs_t *ufuncs,
226 										 hb_codepoint_t      unicode,
227 										 void               *user_data);
228 typedef hb_unicode_general_category_t	(*hb_unicode_general_category_func_t)	(hb_unicode_funcs_t *ufuncs,
229 										 hb_codepoint_t      unicode,
230 										 void               *user_data);
231 typedef hb_codepoint_t			(*hb_unicode_mirroring_func_t)		(hb_unicode_funcs_t *ufuncs,
232 										 hb_codepoint_t      unicode,
233 										 void               *user_data);
234 typedef hb_script_t			(*hb_unicode_script_func_t)		(hb_unicode_funcs_t *ufuncs,
235 										 hb_codepoint_t      unicode,
236 										 void               *user_data);
237 
238 typedef hb_bool_t			(*hb_unicode_compose_func_t)		(hb_unicode_funcs_t *ufuncs,
239 										 hb_codepoint_t      a,
240 										 hb_codepoint_t      b,
241 										 hb_codepoint_t     *ab,
242 										 void               *user_data);
243 typedef hb_bool_t			(*hb_unicode_decompose_func_t)		(hb_unicode_funcs_t *ufuncs,
244 										 hb_codepoint_t      ab,
245 										 hb_codepoint_t     *a,
246 										 hb_codepoint_t     *b,
247 										 void               *user_data);
248 
249 /**
250  * hb_unicode_decompose_compatibility_func_t:
251  * @ufuncs: Unicode function structure
252  * @u: codepoint to decompose
253  * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into
254  * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func()
255  *
256  * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed.
257  * The complete length of the decomposition will be returned.
258  *
259  * If @u has no compatibility decomposition, zero should be returned.
260  *
261  * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any
262  * compatibility decomposition plus an terminating value of 0.  Consequently, @decompose must be allocated by the caller to be at least this length.  Implementations
263  * of this function type must ensure that they do not write past the provided array.
264  *
265  * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available.
266  */
267 typedef unsigned int			(*hb_unicode_decompose_compatibility_func_t)	(hb_unicode_funcs_t *ufuncs,
268 											 hb_codepoint_t      u,
269 											 hb_codepoint_t     *decomposed,
270 											 void               *user_data);
271 
272 /* See Unicode 6.1 for details on the maximum decomposition length. */
273 #define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */
274 
275 /* setters */
276 
277 void
278 hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
279 					   hb_unicode_combining_class_func_t combining_class_func,
280 					   void *user_data, hb_destroy_func_t destroy);
281 
282 void
283 hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
284 					   hb_unicode_eastasian_width_func_t eastasian_width_func,
285 					   void *user_data, hb_destroy_func_t destroy);
286 
287 void
288 hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
289 					    hb_unicode_general_category_func_t general_category_func,
290 					    void *user_data, hb_destroy_func_t destroy);
291 
292 void
293 hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
294 				     hb_unicode_mirroring_func_t mirroring_func,
295 				     void *user_data, hb_destroy_func_t destroy);
296 
297 void
298 hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
299 				  hb_unicode_script_func_t script_func,
300 				  void *user_data, hb_destroy_func_t destroy);
301 
302 void
303 hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
304 				   hb_unicode_compose_func_t compose_func,
305 				   void *user_data, hb_destroy_func_t destroy);
306 
307 void
308 hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
309 				     hb_unicode_decompose_func_t decompose_func,
310 				     void *user_data, hb_destroy_func_t destroy);
311 
312 void
313 hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs,
314 						   hb_unicode_decompose_compatibility_func_t decompose_compatibility_func,
315 						   void *user_data, hb_destroy_func_t destroy);
316 
317 /* accessors */
318 
319 hb_unicode_combining_class_t
320 hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
321 			    hb_codepoint_t unicode);
322 
323 unsigned int
324 hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs,
325 			    hb_codepoint_t unicode);
326 
327 hb_unicode_general_category_t
328 hb_unicode_general_category (hb_unicode_funcs_t *ufuncs,
329 			     hb_codepoint_t unicode);
330 
331 hb_codepoint_t
332 hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs,
333 		      hb_codepoint_t unicode);
334 
335 hb_script_t
336 hb_unicode_script (hb_unicode_funcs_t *ufuncs,
337 		   hb_codepoint_t unicode);
338 
339 hb_bool_t
340 hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
341 		    hb_codepoint_t      a,
342 		    hb_codepoint_t      b,
343 		    hb_codepoint_t     *ab);
344 hb_bool_t
345 hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
346 		      hb_codepoint_t      ab,
347 		      hb_codepoint_t     *a,
348 		      hb_codepoint_t     *b);
349 
350 unsigned int
351 hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
352 				    hb_codepoint_t      u,
353 				    hb_codepoint_t     *decomposed);
354 
355 HB_END_DECLS
356 
357 #endif /* HB_UNICODE_H */
358