1 /*
2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
3 *
4 * This is part of HarfBuzz, an OpenType Layout engine library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 */
24
25 #include "harfbuzz-shaper.h"
26 #include "harfbuzz-shaper-private.h"
27
28 #include <assert.h>
29 #include <stdio.h>
30
31 #define FLAG(x) (1 << (x))
32
isLetter(HB_UChar16 ucs)33 static HB_Bool isLetter(HB_UChar16 ucs)
34 {
35 const int test = FLAG(HB_Letter_Uppercase) |
36 FLAG(HB_Letter_Lowercase) |
37 FLAG(HB_Letter_Titlecase) |
38 FLAG(HB_Letter_Modifier) |
39 FLAG(HB_Letter_Other);
40 return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test);
41 }
42
isMark(HB_UChar16 ucs)43 static HB_Bool isMark(HB_UChar16 ucs)
44 {
45 const int test = FLAG(HB_Mark_NonSpacing) |
46 FLAG(HB_Mark_SpacingCombining) |
47 FLAG(HB_Mark_Enclosing);
48 return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test);
49 }
50
51 enum Form {
52 Invalid = 0x0,
53 UnknownForm = Invalid,
54 Consonant,
55 Nukta,
56 Halant,
57 Matra,
58 VowelMark,
59 StressMark,
60 IndependentVowel,
61 LengthMark,
62 Control,
63 Other
64 };
65
66 static const unsigned char indicForms[0xe00-0x900] = {
67 // Devangari
68 Invalid, VowelMark, VowelMark, VowelMark,
69 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
70 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
71 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
72
73 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
74 IndependentVowel, Consonant, Consonant, Consonant,
75 Consonant, Consonant, Consonant, Consonant,
76 Consonant, Consonant, Consonant, Consonant,
77
78 Consonant, Consonant, Consonant, Consonant,
79 Consonant, Consonant, Consonant, Consonant,
80 Consonant, Consonant, Consonant, Consonant,
81 Consonant, Consonant, Consonant, Consonant,
82
83 Consonant, Consonant, Consonant, Consonant,
84 Consonant, Consonant, Consonant, Consonant,
85 Consonant, Consonant, UnknownForm, UnknownForm,
86 Nukta, Other, Matra, Matra,
87
88 Matra, Matra, Matra, Matra,
89 Matra, Matra, Matra, Matra,
90 Matra, Matra, Matra, Matra,
91 Matra, Halant, UnknownForm, UnknownForm,
92
93 Other, StressMark, StressMark, StressMark,
94 StressMark, UnknownForm, UnknownForm, UnknownForm,
95 Consonant, Consonant, Consonant, Consonant,
96 Consonant, Consonant, Consonant, Consonant,
97
98 IndependentVowel, IndependentVowel, VowelMark, VowelMark,
99 Other, Other, Other, Other,
100 Other, Other, Other, Other,
101 Other, Other, Other, Other,
102
103 Other, Other, Other, Other,
104 Other, Other, Other, Other,
105 Other, Other, Other, Consonant,
106 Consonant, Consonant /* ??? */, Consonant, Consonant,
107
108 // Bengali
109 Invalid, VowelMark, VowelMark, VowelMark,
110 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
111 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
112 IndependentVowel, Invalid, Invalid, IndependentVowel,
113
114 IndependentVowel, Invalid, Invalid, IndependentVowel,
115 IndependentVowel, Consonant, Consonant, Consonant,
116 Consonant, Consonant, Consonant, Consonant,
117 Consonant, Consonant, Consonant, Consonant,
118
119 Consonant, Consonant, Consonant, Consonant,
120 Consonant, Consonant, Consonant, Consonant,
121 Consonant, Invalid, Consonant, Consonant,
122 Consonant, Consonant, Consonant, Consonant,
123
124 Consonant, Invalid, Consonant, Invalid,
125 Invalid, Invalid, Consonant, Consonant,
126 Consonant, Consonant, UnknownForm, UnknownForm,
127 Nukta, Other, Matra, Matra,
128
129 Matra, Matra, Matra, Matra,
130 Matra, Invalid, Invalid, Matra,
131 Matra, Invalid, Invalid, Matra,
132 Matra, Halant, Consonant, UnknownForm,
133
134 Invalid, Invalid, Invalid, Invalid,
135 Invalid, Invalid, Invalid, VowelMark,
136 Invalid, Invalid, Invalid, Invalid,
137 Consonant, Consonant, Invalid, Consonant,
138
139 IndependentVowel, IndependentVowel, VowelMark, VowelMark,
140 Other, Other, Other, Other,
141 Other, Other, Other, Other,
142 Other, Other, Other, Other,
143
144 Consonant, Consonant, Other, Other,
145 Other, Other, Other, Other,
146 Other, Other, Other, Other,
147 Other, Other, Other, Other,
148
149 // Gurmukhi
150 Invalid, VowelMark, VowelMark, VowelMark,
151 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
152 IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
153 Invalid, Invalid, Invalid, IndependentVowel,
154
155 IndependentVowel, Invalid, Invalid, IndependentVowel,
156 IndependentVowel, Consonant, Consonant, Consonant,
157 Consonant, Consonant, Consonant, Consonant,
158 Consonant, Consonant, Consonant, Consonant,
159
160 Consonant, Consonant, Consonant, Consonant,
161 Consonant, Consonant, Consonant, Consonant,
162 Consonant, Invalid, Consonant, Consonant,
163 Consonant, Consonant, Consonant, Consonant,
164
165 Consonant, Invalid, Consonant, Consonant,
166 Invalid, Consonant, Consonant, Invalid,
167 Consonant, Consonant, UnknownForm, UnknownForm,
168 Nukta, Other, Matra, Matra,
169
170 Matra, Matra, Matra, Invalid,
171 Invalid, Invalid, Invalid, Matra,
172 Matra, Invalid, Invalid, Matra,
173 Matra, Halant, UnknownForm, UnknownForm,
174
175 Invalid, Invalid, Invalid, Invalid,
176 Invalid, UnknownForm, UnknownForm, UnknownForm,
177 Invalid, Consonant, Consonant, Consonant,
178 Consonant, Invalid, Consonant, Invalid,
179
180 Other, Other, Invalid, Invalid,
181 Other, Other, Other, Other,
182 Other, Other, Other, Other,
183 Other, Other, Other, Other,
184
185 StressMark, StressMark, Consonant, Consonant,
186 Other, Other, Other, Other,
187 Other, Other, Other, Other,
188 Other, Other, Other, Other,
189
190 // Gujarati
191 Invalid, VowelMark, VowelMark, VowelMark,
192 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
193 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
194 IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
195
196 IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
197 IndependentVowel, Consonant, Consonant, Consonant,
198 Consonant, Consonant, Consonant, Consonant,
199 Consonant, Consonant, Consonant, Consonant,
200
201 Consonant, Consonant, Consonant, Consonant,
202 Consonant, Consonant, Consonant, Consonant,
203 Consonant, Invalid, Consonant, Consonant,
204 Consonant, Consonant, Consonant, Consonant,
205
206 Consonant, Invalid, Consonant, Consonant,
207 Invalid, Consonant, Consonant, Consonant,
208 Consonant, Consonant, UnknownForm, UnknownForm,
209 Nukta, Other, Matra, Matra,
210
211 Matra, Matra, Matra, Matra,
212 Matra, Matra, Invalid, Matra,
213 Matra, Matra, Invalid, Matra,
214 Matra, Halant, UnknownForm, UnknownForm,
215
216 Other, UnknownForm, UnknownForm, UnknownForm,
217 UnknownForm, UnknownForm, UnknownForm, UnknownForm,
218 UnknownForm, UnknownForm, UnknownForm, UnknownForm,
219 UnknownForm, UnknownForm, UnknownForm, UnknownForm,
220
221 IndependentVowel, IndependentVowel, VowelMark, VowelMark,
222 Other, Other, Other, Other,
223 Other, Other, Other, Other,
224 Other, Other, Other, Other,
225
226 Other, Other, Other, Other,
227 Other, Other, Other, Other,
228 Other, Other, Other, Other,
229 Other, Other, Other, Other,
230
231 // Oriya
232 Invalid, VowelMark, VowelMark, VowelMark,
233 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
234 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
235 IndependentVowel, Invalid, Invalid, IndependentVowel,
236
237 IndependentVowel, Invalid, Invalid, IndependentVowel,
238 IndependentVowel, Consonant, Consonant, Consonant,
239 Consonant, Consonant, Consonant, Consonant,
240 Consonant, Consonant, Consonant, Consonant,
241
242 Consonant, Consonant, Consonant, Consonant,
243 Consonant, Consonant, Consonant, Consonant,
244 Consonant, Invalid, Consonant, Consonant,
245 Consonant, Consonant, Consonant, Consonant,
246
247 Consonant, Invalid, Consonant, Consonant,
248 Invalid, Consonant, Consonant, Consonant,
249 Consonant, Consonant, UnknownForm, UnknownForm,
250 Nukta, Other, Matra, Matra,
251
252 Matra, Matra, Matra, Matra,
253 Invalid, Invalid, Invalid, Matra,
254 Matra, Invalid, Invalid, Matra,
255 Matra, Halant, UnknownForm, UnknownForm,
256
257 Other, Invalid, Invalid, Invalid,
258 Invalid, UnknownForm, LengthMark, LengthMark,
259 Invalid, Invalid, Invalid, Invalid,
260 Consonant, Consonant, Invalid, Consonant,
261
262 IndependentVowel, IndependentVowel, Invalid, Invalid,
263 Invalid, Invalid, Other, Other,
264 Other, Other, Other, Other,
265 Other, Other, Other, Other,
266
267 Other, Consonant, Other, Other,
268 Other, Other, Other, Other,
269 Other, Other, Other, Other,
270 Other, Other, Other, Other,
271
272 //Tamil
273 Invalid, Invalid, VowelMark, Other,
274 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
275 IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
276 Invalid, Invalid, IndependentVowel, IndependentVowel,
277
278 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
279 IndependentVowel, Consonant, Invalid, Invalid,
280 Invalid, Consonant, Consonant, Invalid,
281 Consonant, Invalid, Consonant, Consonant,
282
283 Invalid, Invalid, Invalid, Consonant,
284 Consonant, Invalid, Invalid, Invalid,
285 Consonant, Consonant, Consonant, Invalid,
286 Invalid, Invalid, Consonant, Consonant,
287
288 Consonant, Consonant, Consonant, Consonant,
289 Consonant, Consonant, Consonant, Consonant,
290 Consonant, Consonant, UnknownForm, UnknownForm,
291 Invalid, Invalid, Matra, Matra,
292
293 Matra, Matra, Matra, Invalid,
294 Invalid, Invalid, Matra, Matra,
295 Matra, Invalid, Matra, Matra,
296 Matra, Halant, Invalid, Invalid,
297
298 Invalid, Invalid, Invalid, Invalid,
299 Invalid, Invalid, Invalid, LengthMark,
300 Invalid, Invalid, Invalid, Invalid,
301 Invalid, Invalid, Invalid, Invalid,
302
303 Invalid, Invalid, Invalid, Invalid,
304 Invalid, Invalid, Other, Other,
305 Other, Other, Other, Other,
306 Other, Other, Other, Other,
307
308 Other, Other, Other, Other,
309 Other, Other, Other, Other,
310 Other, Other, Other, Other,
311 Other, Other, Other, Other,
312
313 // Telugu
314 Invalid, VowelMark, VowelMark, VowelMark,
315 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
316 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
317 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
318
319 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
320 IndependentVowel, Consonant, Consonant, Consonant,
321 Consonant, Consonant, Consonant, Consonant,
322 Consonant, Consonant, Consonant, Consonant,
323
324 Consonant, Consonant, Consonant, Consonant,
325 Consonant, Consonant, Consonant, Consonant,
326 Consonant, Invalid, Consonant, Consonant,
327 Consonant, Consonant, Consonant, Consonant,
328
329 Consonant, Consonant, Consonant, Consonant,
330 Invalid, Consonant, Consonant, Consonant,
331 Consonant, Consonant, UnknownForm, UnknownForm,
332 Invalid, Invalid, Matra, Matra,
333
334 Matra, Matra, Matra, Matra,
335 Matra, Invalid, Matra, Matra,
336 Matra, Invalid, Matra, Matra,
337 Matra, Halant, Invalid, Invalid,
338
339 Invalid, Invalid, Invalid, Invalid,
340 Invalid, LengthMark, Matra, Invalid,
341 Invalid, Invalid, Invalid, Invalid,
342 Invalid, Invalid, Invalid, Invalid,
343
344 IndependentVowel, IndependentVowel, Invalid, Invalid,
345 Invalid, Invalid, Other, Other,
346 Other, Other, Other, Other,
347 Other, Other, Other, Other,
348
349 Other, Other, Other, Other,
350 Other, Other, Other, Other,
351 Other, Other, Other, Other,
352 Other, Other, Other, Other,
353
354 // Kannada
355 Invalid, Invalid, VowelMark, VowelMark,
356 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
357 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
358 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
359
360 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
361 IndependentVowel, Consonant, Consonant, Consonant,
362 Consonant, Consonant, Consonant, Consonant,
363 Consonant, Consonant, Consonant, Consonant,
364
365 Consonant, Consonant, Consonant, Consonant,
366 Consonant, Consonant, Consonant, Consonant,
367 Consonant, Invalid, Consonant, Consonant,
368 Consonant, Consonant, Consonant, Consonant,
369
370 Consonant, Consonant, Consonant, Consonant,
371 Invalid, Consonant, Consonant, Consonant,
372 Consonant, Consonant, UnknownForm, UnknownForm,
373 Nukta, Other, Matra, Matra,
374
375 Matra, Matra, Matra, Matra,
376 Matra, Invalid, Matra, Matra,
377 Matra, Invalid, Matra, Matra,
378 Matra, Halant, Invalid, Invalid,
379
380 Invalid, Invalid, Invalid, Invalid,
381 Invalid, LengthMark, LengthMark, Invalid,
382 Invalid, Invalid, Invalid, Invalid,
383 Invalid, Invalid, Consonant, Invalid,
384
385 IndependentVowel, IndependentVowel, VowelMark, VowelMark,
386 Invalid, Invalid, Other, Other,
387 Other, Other, Other, Other,
388 Other, Other, Other, Other,
389
390 Other, Other, Other, Other,
391 Other, Other, Other, Other,
392 Other, Other, Other, Other,
393 Other, Other, Other, Other,
394
395 // Malayalam
396 Invalid, Invalid, VowelMark, VowelMark,
397 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
398 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
399 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
400
401 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
402 IndependentVowel, Consonant, Consonant, Consonant,
403 Consonant, Consonant, Consonant, Consonant,
404 Consonant, Consonant, Consonant, Consonant,
405
406 Consonant, Consonant, Consonant, Consonant,
407 Consonant, Consonant, Consonant, Consonant,
408 Consonant, Invalid, Consonant, Consonant,
409 Consonant, Consonant, Consonant, Consonant,
410
411 Consonant, Consonant, Consonant, Consonant,
412 Consonant, Consonant, Consonant, Consonant,
413 Consonant, Consonant, UnknownForm, UnknownForm,
414 Invalid, Invalid, Matra, Matra,
415
416 Matra, Matra, Matra, Matra,
417 Invalid, Invalid, Matra, Matra,
418 Matra, Invalid, Matra, Matra,
419 Matra, Halant, Invalid, Invalid,
420
421 Invalid, Invalid, Invalid, Invalid,
422 Invalid, Invalid, Invalid, Matra,
423 Invalid, Invalid, Invalid, Invalid,
424 Invalid, Invalid, Invalid, Invalid,
425
426 IndependentVowel, IndependentVowel, Invalid, Invalid,
427 Invalid, Invalid, Other, Other,
428 Other, Other, Other, Other,
429 Other, Other, Other, Other,
430
431 Other, Other, Other, Other,
432 Other, Other, Other, Other,
433 Other, Other, Other, Other,
434 Other, Other, Other, Other,
435
436 // Sinhala
437 Invalid, Invalid, VowelMark, VowelMark,
438 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
439 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
440 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
441
442 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
443 IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
444 Invalid, Invalid, Consonant, Consonant,
445 Consonant, Consonant, Consonant, Consonant,
446
447 Consonant, Consonant, Consonant, Consonant,
448 Consonant, Consonant, Consonant, Consonant,
449 Consonant, Consonant, Consonant, Consonant,
450 Consonant, Consonant, Consonant, Consonant,
451
452 Consonant, Consonant, Invalid, Consonant,
453 Consonant, Consonant, Consonant, Consonant,
454 Consonant, Consonant, Consonant, Consonant,
455 Invalid, Consonant, Invalid, Invalid,
456
457 Consonant, Consonant, Consonant, Consonant,
458 Consonant, Consonant, Consonant, Invalid,
459 Invalid, Invalid, Halant, Invalid,
460 Invalid, Invalid, Invalid, Matra,
461
462 Matra, Matra, Matra, Matra,
463 Matra, Invalid, Matra, Invalid,
464 Matra, Matra, Matra, Matra,
465 Matra, Matra, Matra, Matra,
466
467 Invalid, Invalid, Invalid, Invalid,
468 Invalid, Invalid, Invalid, Invalid,
469 Invalid, Invalid, Invalid, Invalid,
470 Invalid, Invalid, Invalid, Invalid,
471
472 Invalid, Invalid, Matra, Matra,
473 Other, Other, Other, Other,
474 Other, Other, Other, Other,
475 Other, Other, Other, Other,
476 };
477
478 enum Position {
479 None,
480 Pre,
481 Above,
482 Below,
483 Post,
484 Split,
485 Base,
486 Reph,
487 Vattu,
488 Inherit
489 };
490
491 static const unsigned char indicPosition[0xe00-0x900] = {
492 // Devanagari
493 None, Above, Above, Post,
494 None, None, None, None,
495 None, None, None, None,
496 None, None, None, None,
497
498 None, None, None, None,
499 None, None, None, None,
500 None, None, None, None,
501 None, None, None, None,
502
503 None, None, None, None,
504 None, None, None, None,
505 None, None, None, None,
506 None, None, None, None,
507
508 Below, None, None, None,
509 None, None, None, None,
510 None, None, None, None,
511 None, None, Post, Pre,
512
513 Post, Below, Below, Below,
514 Below, Above, Above, Above,
515 Above, Post, Post, Post,
516 Post, None, None, None,
517
518 None, Above, Below, Above,
519 Above, None, None, None,
520 None, None, None, None,
521 None, None, None, None,
522
523 None, None, Below, Below,
524 None, None, None, None,
525 None, None, None, None,
526 None, None, None, None,
527
528 None, None, None, None,
529 None, None, None, None,
530 None, None, None, None,
531 None, None, None, None,
532
533 // Bengali
534 None, Above, Post, Post,
535 None, None, None, None,
536 None, None, None, None,
537 None, None, None, None,
538
539 None, None, None, None,
540 None, None, None, None,
541 None, None, None, None,
542 None, None, None, None,
543
544 None, None, None, None,
545 None, None, None, None,
546 None, None, None, None,
547 Below, None, None, Post,
548
549 Below, None, None, None,
550 None, None, None, None,
551 None, None, None, None,
552 Below, None, Post, Pre,
553
554 Post, Below, Below, Below,
555 Below, None, None, Pre,
556 Pre, None, None, Split,
557 Split, Below, None, None,
558
559 None, None, None, None,
560 None, None, None, Post,
561 None, None, None, None,
562 None, None, None, None,
563
564 None, None, Below, Below,
565 None, None, None, None,
566 None, None, None, None,
567 None, None, None, None,
568
569 Below, None, None, None,
570 None, None, None, None,
571 None, None, None, None,
572 None, None, None, None,
573
574 // Gurmukhi
575 None, Above, Above, Post,
576 None, None, None, None,
577 None, None, None, None,
578 None, None, None, None,
579
580 None, None, None, None,
581 None, None, None, None,
582 None, None, None, None,
583 None, None, None, None,
584
585 None, None, None, None,
586 None, None, None, None,
587 None, None, None, None,
588 None, None, None, Post,
589
590 Below, None, None, None,
591 None, Below, None, None,
592 None, Below, None, None,
593 Below, None, Post, Pre,
594
595 Post, Below, Below, None,
596 None, None, None, Above,
597 Above, None, None, Above,
598 Above, None, None, None,
599
600 None, None, None, None,
601 None, None, None, None,
602 None, None, None, None,
603 None, None, None, None,
604
605 None, None, None, None,
606 None, None, None, None,
607 None, None, None, None,
608 None, None, None, None,
609
610 Above, Above, None, None,
611 None, None, None, None,
612 None, None, None, None,
613 None, None, None, None,
614
615 // Gujarati
616 None, Above, Above, Post,
617 None, None, None, None,
618 None, None, None, None,
619 None, None, None, None,
620
621 None, None, None, None,
622 None, None, None, None,
623 None, None, None, None,
624 None, None, None, None,
625
626 None, None, None, None,
627 None, None, None, None,
628 None, None, None, None,
629 None, None, None, None,
630
631 Below, None, None, None,
632 None, None, None, None,
633 None, None, None, None,
634 None, None, Post, Pre,
635
636 Post, Below, Below, Below,
637 Below, Above, None, Above,
638 Above, Post, None, Post,
639 Post, None, None, None,
640
641 None, None, None, None,
642 None, None, None, None,
643 None, None, None, None,
644 None, None, None, None,
645
646 None, None, Below, Below,
647 None, None, None, None,
648 None, None, None, None,
649 None, None, None, None,
650
651 None, None, None, None,
652 None, None, None, None,
653 None, None, None, None,
654 None, None, None, None,
655
656 // Oriya
657 None, Above, Post, Post,
658 None, None, None, None,
659 None, None, None, None,
660 None, None, None, None,
661
662 None, None, None, None,
663 None, None, None, None,
664 None, None, None, None,
665 None, None, None, None,
666
667 None, None, None, None,
668 Below, None, None, None,
669 Below, None, None, None,
670 Below, Below, Below, Post,
671
672 Below, None, Below, Below,
673 None, None, None, None,
674 None, None, None, None,
675 None, None, Post, Above,
676
677 Post, Below, Below, Below,
678 None, None, None, Pre,
679 Split, None, None, Split,
680 Split, None, None, None,
681
682 None, None, None, None,
683 None, None, Above, Post,
684 None, None, None, None,
685 None, None, None, Post,
686
687 None, None, None, None,
688 None, None, None, None,
689 None, None, None, None,
690 None, None, None, None,
691
692 None, Below, None, None,
693 None, None, None, None,
694 None, None, None, None,
695 None, None, None, None,
696
697 // Tamil
698 None, None, Above, None,
699 None, None, None, None,
700 None, None, None, None,
701 None, None, None, None,
702
703 None, None, None, None,
704 None, None, None, None,
705 None, None, None, None,
706 None, None, None, None,
707
708 None, None, None, None,
709 None, None, None, None,
710 None, None, None, None,
711 None, None, None, None,
712
713 None, None, None, None,
714 None, None, None, None,
715 None, None, None, None,
716 None, None, Post, Post,
717
718 Above, Below, Below, None,
719 None, None, Pre, Pre,
720 Pre, None, Split, Split,
721 Split, Halant, None, None,
722
723 None, None, None, None,
724 None, None, None, Post,
725 None, None, None, None,
726 None, None, None, None,
727
728 None, None, None, None,
729 None, None, None, None,
730 None, None, None, None,
731 None, None, None, None,
732
733 None, None, None, None,
734 None, None, None, None,
735 None, None, None, None,
736 None, None, None, None,
737
738 // Telugu
739 None, Post, Post, Post,
740 None, None, None, None,
741 None, None, None, None,
742 None, None, None, None,
743
744 None, None, None, None,
745 None, Below, Below, Below,
746 Below, Below, Below, Below,
747 Below, Below, Below, Below,
748
749 Below, Below, Below, Below,
750 Below, Below, Below, Below,
751 Below, None, Below, Below,
752 Below, Below, Below, Below,
753
754 Below, None, Below, Below,
755 None, Below, Below, Below,
756 Below, Below, None, None,
757 None, None, Post, Above,
758
759 Above, Post, Post, Post,
760 Post, None, Above, Above,
761 Split, None, Post, Above,
762 Above, Halant, None, None,
763
764 None, None, None, None,
765 None, Above, Below, None,
766 None, None, None, None,
767 None, None, None, None,
768
769 None, None, None, None,
770 None, None, None, None,
771 None, None, None, None,
772 None, None, None, None,
773
774 None, None, None, None,
775 None, None, None, None,
776 None, None, None, None,
777 None, None, None, None,
778
779 // Kannada
780 None, None, Post, Post,
781 None, None, None, None,
782 None, None, None, None,
783 None, None, None, None,
784
785 None, None, None, None,
786 None, Below, Below, Below,
787 Below, Below, Below, Below,
788 Below, Below, Below, Below,
789
790 Below, Below, Below, Below,
791 Below, Below, Below, Below,
792 Below, Below, Below, Below,
793 Below, Below, Below, Below,
794
795 Below, None, Below, Below,
796 None, Below, Below, Below,
797 Below, Below, None, None,
798 None, None, Post, Above,
799
800 Split, Post, Post, Post,
801 Post, None, Above, Split,
802 Split, None, Split, Split,
803 Above, Halant, None, None,
804
805 None, None, None, None,
806 None, Post, Post, None,
807 None, None, None, None,
808 None, None, Below, None,
809
810 None, None, Below, Below,
811 None, None, None, None,
812 None, None, None, None,
813 None, None, None, None,
814
815 None, None, None, None,
816 None, None, None, None,
817 None, None, None, None,
818 None, None, None, None,
819
820 // Malayalam
821 None, None, Post, Post,
822 None, None, None, None,
823 None, None, None, None,
824 None, None, None, None,
825
826 None, None, None, None,
827 None, None, None, None,
828 None, None, None, None,
829 None, None, None, None,
830
831 None, None, None, None,
832 None, None, None, None,
833 None, None, None, None,
834 None, None, None, Post,
835
836 Post, None, Below, None,
837 None, Post, None, None,
838 None, None, None, None,
839 None, None, Post, Post,
840
841 Post, Post, Post, Post,
842 None, None, Pre, Pre,
843 Pre, None, Split, Split,
844 Split, Halant, None, None,
845
846 None, None, None, None,
847 None, None, None, Post,
848 None, None, None, None,
849 None, None, None, None,
850
851 None, None, None, None,
852 None, None, None, None,
853 None, None, None, None,
854 None, None, None, None,
855
856 None, None, None, None,
857 None, None, None, None,
858 None, None, None, None,
859 None, None, None, None,
860
861 // Sinhala
862 None, None, Post, Post,
863 None, None, None, None,
864 None, None, None, None,
865 None, None, None, None,
866
867 None, None, None, None,
868 None, None, None, None,
869 None, None, None, None,
870 None, None, None, None,
871
872 None, None, None, None,
873 None, None, None, None,
874 None, None, None, None,
875 None, None, None, None,
876
877 None, None, None, None,
878 None, None, None, None,
879 None, None, None, None,
880 None, None, None, None,
881
882 None, None, None, None,
883 None, None, None, None,
884 None, None, None, None,
885 None, None, None, Post,
886
887 Post, Post, Above, Above,
888 Below, None, Below, None,
889 Post, Pre, Split, Pre,
890 Split, Split, Split, Post,
891
892 None, None, None, None,
893 None, None, None, None,
894 None, None, None, None,
895 None, None, None, None,
896
897 None, None, Post, Post,
898 None, None, None, None,
899 None, None, None, None,
900 None, None, None, None
901 };
902
form(unsigned short uc)903 static inline Form form(unsigned short uc) {
904 if (uc < 0x900 || uc > 0xdff) {
905 if (uc == 0x25cc)
906 return Consonant;
907 if (uc == 0x200c || uc == 0x200d)
908 return Control;
909 return Other;
910 }
911 return (Form)indicForms[uc-0x900];
912 }
913
indic_position(unsigned short uc)914 static inline Position indic_position(unsigned short uc) {
915 if (uc < 0x900 || uc > 0xdff)
916 return None;
917 return (Position) indicPosition[uc-0x900];
918 }
919
920
921 enum IndicScriptProperties {
922 HasReph = 0x01,
923 HasSplit = 0x02
924 };
925
926 const hb_uint8 scriptProperties[10] = {
927 // Devanagari,
928 HasReph,
929 // Bengali,
930 HasReph|HasSplit,
931 // Gurmukhi,
932 0,
933 // Gujarati,
934 HasReph,
935 // Oriya,
936 HasReph|HasSplit,
937 // Tamil,
938 HasSplit,
939 // Telugu,
940 HasSplit,
941 // Kannada,
942 HasSplit|HasReph,
943 // Malayalam,
944 HasSplit,
945 // Sinhala,
946 HasSplit
947 };
948
949 struct IndicOrdering {
950 Form form;
951 Position position;
952 };
953
954 static const IndicOrdering devanagari_order [] = {
955 { Consonant, Below },
956 { Matra, Below },
957 { VowelMark, Below },
958 { StressMark, Below },
959 { Matra, Above },
960 { Matra, Post },
961 { Consonant, Reph },
962 { VowelMark, Above },
963 { StressMark, Above },
964 { VowelMark, Post },
965 { (Form)0, None }
966 };
967
968 static const IndicOrdering bengali_order [] = {
969 { Consonant, Below },
970 { Matra, Below },
971 { Matra, Above },
972 { Consonant, Reph },
973 { VowelMark, Above },
974 { Consonant, Post },
975 { Matra, Post },
976 { VowelMark, Post },
977 { (Form)0, None }
978 };
979
980 static const IndicOrdering gurmukhi_order [] = {
981 { Consonant, Below },
982 { Matra, Below },
983 { Matra, Above },
984 { Consonant, Post },
985 { Matra, Post },
986 { VowelMark, Above },
987 { (Form)0, None }
988 };
989
990 static const IndicOrdering tamil_order [] = {
991 { Matra, Above },
992 { Matra, Post },
993 { VowelMark, Post },
994 { (Form)0, None }
995 };
996
997 static const IndicOrdering telugu_order [] = {
998 { Matra, Above },
999 { Matra, Below },
1000 { Matra, Post },
1001 { Consonant, Below },
1002 { Consonant, Post },
1003 { VowelMark, Post },
1004 { (Form)0, None }
1005 };
1006
1007 static const IndicOrdering kannada_order [] = {
1008 { Matra, Above },
1009 { Matra, Post },
1010 { Consonant, Below },
1011 { Consonant, Post },
1012 { LengthMark, Post },
1013 { Consonant, Reph },
1014 { VowelMark, Post },
1015 { (Form)0, None }
1016 };
1017
1018 static const IndicOrdering malayalam_order [] = {
1019 { Consonant, Below },
1020 { Matra, Below },
1021 { Consonant, Reph },
1022 { Consonant, Post },
1023 { Matra, Post },
1024 { VowelMark, Post },
1025 { (Form)0, None }
1026 };
1027
1028 static const IndicOrdering sinhala_order [] = {
1029 { Matra, Below },
1030 { Matra, Above },
1031 { Matra, Post },
1032 { VowelMark, Post },
1033 { (Form)0, None }
1034 };
1035
1036 static const IndicOrdering * const indic_order[] = {
1037 devanagari_order, // Devanagari
1038 bengali_order, // Bengali
1039 gurmukhi_order, // Gurmukhi
1040 devanagari_order, // Gujarati
1041 bengali_order, // Oriya
1042 tamil_order, // Tamil
1043 telugu_order, // Telugu
1044 kannada_order, // Kannada
1045 malayalam_order, // Malayalam
1046 sinhala_order // Sinhala
1047 };
1048
1049
1050
1051 // vowel matras that have to be split into two parts.
1052 static const unsigned short split_matras[] = {
1053 // matra, split1, split2, split3
1054
1055 // bengalis
1056 0x9cb, 0x9c7, 0x9be, 0x0,
1057 0x9cc, 0x9c7, 0x9d7, 0x0,
1058 // oriya
1059 0xb48, 0xb47, 0xb56, 0x0,
1060 0xb4b, 0xb47, 0xb3e, 0x0,
1061 0xb4c, 0xb47, 0xb57, 0x0,
1062 // tamil
1063 0xbca, 0xbc6, 0xbbe, 0x0,
1064 0xbcb, 0xbc7, 0xbbe, 0x0,
1065 0xbcc, 0xbc6, 0xbd7, 0x0,
1066 // telugu
1067 0xc48, 0xc46, 0xc56, 0x0,
1068 // kannada
1069 0xcc0, 0xcbf, 0xcd5, 0x0,
1070 0xcc7, 0xcc6, 0xcd5, 0x0,
1071 0xcc8, 0xcc6, 0xcd6, 0x0,
1072 0xcca, 0xcc6, 0xcc2, 0x0,
1073 0xccb, 0xcc6, 0xcc2, 0xcd5,
1074 // malayalam
1075 0xd4a, 0xd46, 0xd3e, 0x0,
1076 0xd4b, 0xd47, 0xd3e, 0x0,
1077 0xd4c, 0xd46, 0xd57, 0x0,
1078 // sinhala
1079 0xdda, 0xdd9, 0xdca, 0x0,
1080 0xddc, 0xdd9, 0xdcf, 0x0,
1081 0xddd, 0xdd9, 0xdcf, 0xdca,
1082 0xdde, 0xdd9, 0xddf, 0x0,
1083 0xffff
1084 };
1085
splitMatra(unsigned short * reordered,int matra,int & len)1086 static inline void splitMatra(unsigned short *reordered, int matra, int &len)
1087 {
1088 unsigned short matra_uc = reordered[matra];
1089 //qDebug("matra=%d, reordered[matra]=%x", matra, reordered[matra]);
1090
1091 const unsigned short *split = split_matras;
1092 while (split[0] < matra_uc)
1093 split += 4;
1094
1095 assert(*split == matra_uc);
1096 ++split;
1097
1098 int added_chars = split[2] == 0x0 ? 1 : 2;
1099
1100 memmove(reordered + matra + added_chars, reordered + matra, (len-matra)*sizeof(unsigned short));
1101 reordered[matra] = split[0];
1102 reordered[matra+1] = split[1];
1103 if(added_chars == 2)
1104 reordered[matra+2] = split[2];
1105 len += added_chars;
1106 }
1107
1108 #ifndef NO_OPENTYPE
1109 static const HB_OpenTypeFeature indic_features[] = {
1110 { HB_MAKE_TAG('l', 'o', 'c', 'a'), LocaProperty },
1111 { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
1112 { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
1113 { HB_MAKE_TAG('n', 'u', 'k', 't'), NuktaProperty },
1114 { HB_MAKE_TAG('a', 'k', 'h', 'n'), AkhantProperty },
1115 { HB_MAKE_TAG('r', 'p', 'h', 'f'), RephProperty },
1116 { HB_MAKE_TAG('b', 'l', 'w', 'f'), BelowFormProperty },
1117 { HB_MAKE_TAG('h', 'a', 'l', 'f'), HalfFormProperty },
1118 { HB_MAKE_TAG('p', 's', 't', 'f'), PostFormProperty },
1119 { HB_MAKE_TAG('c', 'j', 'c', 't'), ConjunctFormProperty },
1120 { HB_MAKE_TAG('v', 'a', 't', 'u'), VattuProperty },
1121 { HB_MAKE_TAG('p', 'r', 'e', 's'), PreSubstProperty },
1122 { HB_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty },
1123 { HB_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty },
1124 { HB_MAKE_TAG('p', 's', 't', 's'), PostSubstProperty },
1125 { HB_MAKE_TAG('h', 'a', 'l', 'n'), HalantProperty },
1126 { HB_MAKE_TAG('c', 'a', 'l', 't'), IndicCaltProperty },
1127 { 0, 0 }
1128 };
1129 #endif
1130
1131 // #define INDIC_DEBUG
1132 #ifdef INDIC_DEBUG
1133 #define IDEBUG hb_debug
1134 #include <stdarg.h>
1135
hb_debug(const char * msg,...)1136 static void hb_debug(const char *msg, ...)
1137 {
1138 va_list ap;
1139 va_start(ap, msg); // use variable arg list
1140 vfprintf(stderr, msg, ap);
1141 va_end(ap);
1142 fprintf(stderr, "\n");
1143 }
1144
1145 #else
1146 #define IDEBUG if(0) printf
1147 #endif
1148
1149 #if 0 //def INDIC_DEBUG
1150 static QString propertiesToString(int properties)
1151 {
1152 QString res;
1153 properties = ~properties;
1154 if (properties & LocaProperty)
1155 res += "Loca ";
1156 if (properties & CcmpProperty)
1157 res += "Ccmp ";
1158 if (properties & InitProperty)
1159 res += "Init ";
1160 if (properties & NuktaProperty)
1161 res += "Nukta ";
1162 if (properties & AkhantProperty)
1163 res += "Akhant ";
1164 if (properties & RephProperty)
1165 res += "Reph ";
1166 if (properties & PreFormProperty)
1167 res += "PreForm ";
1168 if (properties & BelowFormProperty)
1169 res += "BelowForm ";
1170 if (properties & AboveFormProperty)
1171 res += "AboveForm ";
1172 if (properties & HalfFormProperty)
1173 res += "HalfForm ";
1174 if (properties & PostFormProperty)
1175 res += "PostForm ";
1176 if (properties & ConjunctFormProperty)
1177 res += "PostForm ";
1178 if (properties & VattuProperty)
1179 res += "Vattu ";
1180 if (properties & PreSubstProperty)
1181 res += "PreSubst ";
1182 if (properties & BelowSubstProperty)
1183 res += "BelowSubst ";
1184 if (properties & AboveSubstProperty)
1185 res += "AboveSubst ";
1186 if (properties & PostSubstProperty)
1187 res += "PostSubst ";
1188 if (properties & HalantProperty)
1189 res += "Halant ";
1190 if (properties & CligProperty)
1191 res += "Clig ";
1192 if (properties & IndicCaltProperty)
1193 res += "Calt ";
1194 return res;
1195 }
1196 #endif
1197
indic_shape_syllable(HB_Bool openType,HB_ShaperItem * item,bool invalid)1198 static bool indic_shape_syllable(HB_Bool openType, HB_ShaperItem *item, bool invalid)
1199 {
1200 HB_Script script = item->item.script;
1201 assert(script >= HB_Script_Devanagari && script <= HB_Script_Sinhala);
1202 const unsigned short script_base = 0x0900 + 0x80*(script-HB_Script_Devanagari);
1203 const unsigned short ra = script_base + 0x30;
1204 const unsigned short halant = script_base + 0x4d;
1205 const unsigned short nukta = script_base + 0x3c;
1206 bool control = false;
1207
1208 int len = (int)item->item.length;
1209 IDEBUG(">>>>> indic shape: from=%d, len=%d invalid=%d", item->item.pos, item->item.length, invalid);
1210
1211 if ((int)item->num_glyphs < len+4) {
1212 item->num_glyphs = len+4;
1213 return false;
1214 }
1215
1216 HB_STACKARRAY(HB_UChar16, reordered, len + 4);
1217 HB_STACKARRAY(hb_uint8, position, len + 4);
1218
1219 unsigned char properties = scriptProperties[script-HB_Script_Devanagari];
1220
1221 if (invalid) {
1222 *reordered = 0x25cc;
1223 memcpy(reordered+1, item->string + item->item.pos, len*sizeof(HB_UChar16));
1224 len++;
1225 } else {
1226 memcpy(reordered, item->string + item->item.pos, len*sizeof(HB_UChar16));
1227 }
1228 if (reordered[len-1] == 0x200c) // zero width non joiner
1229 len--;
1230
1231 int i;
1232 int base = 0;
1233 int reph = -1;
1234
1235 #ifdef INDIC_DEBUG
1236 IDEBUG("original:");
1237 for (i = 0; i < len; i++) {
1238 IDEBUG(" %d: %4x", i, reordered[i]);
1239 }
1240 #endif
1241
1242 if (len != 1) {
1243 HB_UChar16 *uc = reordered;
1244 bool beginsWithRa = false;
1245
1246 // Rule 1: find base consonant
1247 //
1248 // The shaping engine finds the base consonant of the
1249 // syllable, using the following algorithm: starting from the
1250 // end of the syllable, move backwards until a consonant is
1251 // found that does not have a below-base or post-base form
1252 // (post-base forms have to follow below-base forms), or
1253 // arrive at the first consonant. The consonant stopped at
1254 // will be the base.
1255 //
1256 // * If the syllable starts with Ra + H (in a script that has
1257 // 'Reph'), Ra is excluded from candidates for base
1258 // consonants.
1259 //
1260 // * In Kannada and Telugu, the base consonant cannot be
1261 // farther than 3 consonants from the end of the syllable.
1262 // #### replace the HasReph property by testing if the feature exists in the font!
1263 if (form(*uc) == Consonant || (script == HB_Script_Bengali && form(*uc) == IndependentVowel)) {
1264 if ((properties & HasReph) && (len > 2) &&
1265 (*uc == ra || *uc == 0x9f0) && *(uc+1) == halant)
1266 beginsWithRa = true;
1267
1268 if (beginsWithRa && form(*(uc+2)) == Control)
1269 beginsWithRa = false;
1270
1271 base = (beginsWithRa ? 2 : 0);
1272 IDEBUG(" length = %d, beginsWithRa = %d, base=%d", len, beginsWithRa, base);
1273
1274 int lastConsonant = 0;
1275 int matra = -1;
1276 // we remember:
1277 // * the last consonant since we need it for rule 2
1278 // * the matras position for rule 3 and 4
1279
1280 // figure out possible base glyphs
1281 memset(position, 0, len);
1282 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
1283 bool vattu = false;
1284 for (i = base; i < len; ++i) {
1285 position[i] = form(uc[i]);
1286 if (position[i] == Consonant) {
1287 lastConsonant = i;
1288 vattu = (!vattu && uc[i] == ra);
1289 if (vattu) {
1290 IDEBUG("excluding vattu glyph at %d from base candidates", i);
1291 position[i] = Vattu;
1292 }
1293 } else if (position[i] == Matra) {
1294 matra = i;
1295 }
1296 }
1297 } else {
1298 for (i = base; i < len; ++i) {
1299 position[i] = form(uc[i]);
1300 if (position[i] == Consonant)
1301 lastConsonant = i;
1302 else if (matra < 0 && position[i] == Matra)
1303 matra = i;
1304 }
1305 }
1306 int skipped = 0;
1307 Position pos = Post;
1308 for (i = len-1; i >= base; i--) {
1309 if (position[i] != Consonant && (position[i] != Control || script == HB_Script_Kannada))
1310 continue;
1311
1312 if (i < len-1 && position[i] == Control && position[i+1] == Consonant) {
1313 base = i+1;
1314 break;
1315 }
1316
1317 Position charPosition = indic_position(uc[i]);
1318 if (pos == Post && charPosition == Post) {
1319 pos = Post;
1320 } else if ((pos == Post || pos == Below) && charPosition == Below) {
1321 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati)
1322 base = i;
1323 pos = Below;
1324 } else {
1325 base = i;
1326 break;
1327 }
1328 if (skipped == 2 && (script == HB_Script_Kannada || script == HB_Script_Telugu)) {
1329 base = i;
1330 break;
1331 }
1332 ++skipped;
1333 }
1334
1335 IDEBUG(" base consonant at %d skipped=%d, lastConsonant=%d", base, skipped, lastConsonant);
1336
1337 // Rule 2:
1338 //
1339 // If the base consonant is not the last one, Uniscribe
1340 // moves the halant from the base consonant to the last
1341 // one.
1342 if (lastConsonant > base) {
1343 int halantPos = 0;
1344 if (uc[base+1] == halant)
1345 halantPos = base + 1;
1346 else if (uc[base+1] == nukta && uc[base+2] == halant)
1347 halantPos = base + 2;
1348 if (halantPos > 0) {
1349 IDEBUG(" moving halant from %d to %d!", base+1, lastConsonant);
1350 for (i = halantPos; i < lastConsonant; i++)
1351 uc[i] = uc[i+1];
1352 uc[lastConsonant] = halant;
1353 }
1354 }
1355
1356 // Rule 3:
1357 //
1358 // If the syllable starts with Ra + H, Uniscribe moves
1359 // this combination so that it follows either:
1360
1361 // * the post-base 'matra' (if any) or the base consonant
1362 // (in scripts that show similarity to Devanagari, i.e.,
1363 // Devanagari, Gujarati, Bengali)
1364 // * the base consonant (other scripts)
1365 // * the end of the syllable (Kannada)
1366
1367 Position matra_position = None;
1368 if (matra > 0)
1369 matra_position = indic_position(uc[matra]);
1370 IDEBUG(" matra at %d with form %d, base=%d", matra, matra_position, base);
1371
1372 if (beginsWithRa && base != 0) {
1373 int toPos = base+1;
1374 if (toPos < len && uc[toPos] == nukta)
1375 toPos++;
1376 if (toPos < len && uc[toPos] == halant)
1377 toPos++;
1378 if (toPos < len && uc[toPos] == 0x200d)
1379 toPos++;
1380 if (toPos < len-1 && uc[toPos] == ra && uc[toPos+1] == halant)
1381 toPos += 2;
1382 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati || script == HB_Script_Bengali) {
1383 if (matra_position == Post || matra_position == Split) {
1384 toPos = matra+1;
1385 matra -= 2;
1386 }
1387 } else if (script == HB_Script_Kannada) {
1388 toPos = len;
1389 matra -= 2;
1390 }
1391
1392 IDEBUG("moving leading ra+halant to position %d", toPos);
1393 for (i = 2; i < toPos; i++)
1394 uc[i-2] = uc[i];
1395 uc[toPos-2] = ra;
1396 uc[toPos-1] = halant;
1397 base -= 2;
1398 if (properties & HasReph)
1399 reph = toPos-2;
1400 }
1401
1402 // Rule 4:
1403
1404 // Uniscribe splits two- or three-part matras into their
1405 // parts. This splitting is a character-to-character
1406 // operation).
1407 //
1408 // Uniscribe describes some moving operations for these
1409 // matras here. For shaping however all pre matras need
1410 // to be at the beginning of the syllable, so we just move
1411 // them there now.
1412 if (matra_position == Split) {
1413 splitMatra(uc, matra, len);
1414 // Handle three-part matras (0xccb in Kannada)
1415 matra_position = indic_position(uc[matra]);
1416 }
1417
1418 if (matra_position == Pre) {
1419 unsigned short m = uc[matra];
1420 while (matra--)
1421 uc[matra+1] = uc[matra];
1422 uc[0] = m;
1423 base++;
1424 }
1425 }
1426
1427 // Rule 5:
1428 //
1429 // Uniscribe classifies consonants and 'matra' parts as
1430 // pre-base, above-base (Reph), below-base or post-base. This
1431 // classification exists on the character code level and is
1432 // language-dependent, not font-dependent.
1433 for (i = 0; i < base; ++i)
1434 position[i] = Pre;
1435 position[base] = Base;
1436 for (i = base+1; i < len; ++i) {
1437 position[i] = indic_position(uc[i]);
1438 // #### replace by adjusting table
1439 if (uc[i] == nukta || uc[i] == halant)
1440 position[i] = Inherit;
1441 }
1442 if (reph > 0) {
1443 // recalculate reph, it might have changed.
1444 for (i = base+1; i < len; ++i)
1445 if (uc[i] == ra)
1446 reph = i;
1447 position[reph] = Reph;
1448 position[reph+1] = Inherit;
1449 }
1450
1451 // all reordering happens now to the chars after the base
1452 int fixed = base+1;
1453 if (fixed < len && uc[fixed] == nukta)
1454 fixed++;
1455 if (fixed < len && uc[fixed] == halant)
1456 fixed++;
1457 if (fixed < len && uc[fixed] == 0x200d)
1458 fixed++;
1459
1460 #ifdef INDIC_DEBUG
1461 for (i = fixed; i < len; ++i)
1462 IDEBUG("position[%d] = %d, form=%d uc=%x", i, position[i], form(uc[i]), uc[i]);
1463 #endif
1464 // we continuosly position the matras and vowel marks and increase the fixed
1465 // until we reached the end.
1466 const IndicOrdering *finalOrder = indic_order[script-HB_Script_Devanagari];
1467
1468 IDEBUG(" reordering pass:");
1469 IDEBUG(" base=%d fixed=%d", base, fixed);
1470 int toMove = 0;
1471 while (finalOrder[toMove].form && fixed < len-1) {
1472 IDEBUG(" fixed = %d, toMove=%d, moving form %d with pos %d", fixed, toMove, finalOrder[toMove].form, finalOrder[toMove].position);
1473 for (i = fixed; i < len; i++) {
1474 // IDEBUG() << " i=" << i << "uc=" << hex << uc[i] << "form=" << form(uc[i])
1475 // << "position=" << position[i];
1476 if (form(uc[i]) == finalOrder[toMove].form &&
1477 position[i] == finalOrder[toMove].position) {
1478 // need to move this glyph
1479 int to = fixed;
1480 if (i < len-1 && position[i+1] == Inherit) {
1481 IDEBUG(" moving two chars from %d to %d", i, to);
1482 unsigned short ch = uc[i];
1483 unsigned short ch2 = uc[i+1];
1484 unsigned char pos = position[i];
1485 for (int j = i+1; j > to+1; j--) {
1486 uc[j] = uc[j-2];
1487 position[j] = position[j-2];
1488 }
1489 uc[to] = ch;
1490 uc[to+1] = ch2;
1491 position[to] = pos;
1492 position[to+1] = pos;
1493 fixed += 2;
1494 } else {
1495 IDEBUG(" moving one char from %d to %d", i, to);
1496 unsigned short ch = uc[i];
1497 unsigned char pos = position[i];
1498 for (int j = i; j > to; j--) {
1499 uc[j] = uc[j-1];
1500 position[j] = position[j-1];
1501 }
1502 uc[to] = ch;
1503 position[to] = pos;
1504 fixed++;
1505 }
1506 }
1507 }
1508 toMove++;
1509 }
1510
1511 }
1512
1513 if (reph > 0) {
1514 // recalculate reph, it might have changed.
1515 for (i = base+1; i < len; ++i)
1516 if (reordered[i] == ra)
1517 reph = i;
1518 }
1519
1520 #ifndef NO_OPENTYPE
1521 const int availableGlyphs = item->num_glyphs;
1522 #endif
1523 if (!item->font->klass->convertStringToGlyphIndices(item->font,
1524 reordered, len,
1525 item->glyphs, &item->num_glyphs,
1526 item->item.bidiLevel % 2))
1527 goto error;
1528
1529
1530 IDEBUG(" base=%d, reph=%d", base, reph);
1531 IDEBUG("reordered:");
1532 for (i = 0; i < len; i++) {
1533 item->attributes[i].mark = false;
1534 item->attributes[i].clusterStart = false;
1535 item->attributes[i].justification = 0;
1536 item->attributes[i].zeroWidth = false;
1537 IDEBUG(" %d: %4x", i, reordered[i]);
1538 }
1539
1540 // now we have the syllable in the right order, and can start running it through open type.
1541
1542 for (i = 0; i < len; ++i)
1543 control |= (form(reordered[i]) == Control);
1544
1545 #ifndef NO_OPENTYPE
1546 if (openType) {
1547
1548 // we need to keep track of where the base glyph is for some
1549 // scripts and use the cluster feature for this. This
1550 // also means we have to correct the logCluster output from
1551 // the open type engine manually afterwards. for indic this
1552 // is rather simple, as all chars just point to the first
1553 // glyph in the syllable.
1554 HB_STACKARRAY(unsigned short, clusters, len);
1555 HB_STACKARRAY(unsigned int, properties, len);
1556
1557 for (i = 0; i < len; ++i)
1558 clusters[i] = i;
1559
1560 // features we should always apply
1561 for (i = 0; i < len; ++i)
1562 properties[i] = ~(LocaProperty
1563 | CcmpProperty
1564 | NuktaProperty
1565 | VattuProperty
1566 | ConjunctFormProperty
1567 | PreSubstProperty
1568 | BelowSubstProperty
1569 | AboveSubstProperty
1570 | PostSubstProperty
1571 | HalantProperty
1572 | IndicCaltProperty
1573 | PositioningProperties);
1574
1575 // Loca always applies
1576 // Ccmp always applies
1577 // Init
1578 if (item->item.pos == 0
1579 || !(isLetter(item->string[item->item.pos-1]) || isMark(item->string[item->item.pos-1])))
1580 properties[0] &= ~InitProperty;
1581
1582 // Nukta always applies
1583 // Akhant
1584 for (i = 0; i <= base; ++i)
1585 properties[i] &= ~AkhantProperty;
1586 // Reph
1587 if (reph >= 0) {
1588 properties[reph] &= ~RephProperty;
1589 properties[reph+1] &= ~RephProperty;
1590 }
1591 // BelowForm
1592 for (i = base+1; i < len; ++i)
1593 properties[i] &= ~BelowFormProperty;
1594
1595 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
1596 // vattu glyphs need this aswell
1597 bool vattu = false;
1598 for (i = base-2; i > 1; --i) {
1599 if (form(reordered[i]) == Consonant) {
1600 vattu = (!vattu && reordered[i] == ra);
1601 if (vattu) {
1602 IDEBUG("forming vattu ligature at %d", i);
1603 properties[i] &= ~BelowFormProperty;
1604 properties[i+1] &= ~BelowFormProperty;
1605 }
1606 }
1607 }
1608 }
1609 // HalfFormProperty
1610 for (i = 0; i < base; ++i)
1611 properties[i] &= ~HalfFormProperty;
1612 if (control) {
1613 for (i = 2; i < len; ++i) {
1614 if (reordered[i] == 0x200d /* ZWJ */) {
1615 properties[i-1] &= ~HalfFormProperty;
1616 properties[i-2] &= ~HalfFormProperty;
1617 } else if (reordered[i] == 0x200c /* ZWNJ */) {
1618 properties[i-1] &= ~HalfFormProperty;
1619 properties[i-2] &= ~HalfFormProperty;
1620 }
1621 }
1622 }
1623 // PostFormProperty
1624 for (i = base+1; i < len; ++i)
1625 properties[i] &= ~PostFormProperty;
1626 // vattu always applies
1627 // pres always applies
1628 // blws always applies
1629 // abvs always applies
1630 // psts always applies
1631 // halant always applies
1632 // calt always applies
1633
1634 #ifdef INDIC_DEBUG
1635 // {
1636 // IDEBUG("OT properties:");
1637 // for (int i = 0; i < len; ++i)
1638 // qDebug(" i: %s", ::propertiesToString(properties[i]).toLatin1().data());
1639 // }
1640 #endif
1641
1642 // initialize
1643 item->log_clusters = clusters;
1644 HB_OpenTypeShape(item, properties);
1645
1646 int newLen = item->face->buffer->in_length;
1647 HB_GlyphItem otl_glyphs = item->face->buffer->in_string;
1648
1649 // move the left matra back to its correct position in malayalam and tamil
1650 if ((script == HB_Script_Malayalam || script == HB_Script_Tamil) && (form(reordered[0]) == Matra)) {
1651 // qDebug("reordering matra, len=%d", newLen);
1652 // need to find the base in the shaped string and move the matra there
1653 int basePos = 0;
1654 while (basePos < newLen && (int)otl_glyphs[basePos].cluster <= base)
1655 basePos++;
1656 --basePos;
1657 if (basePos < newLen && basePos > 1) {
1658 // qDebug("moving prebase matra to position %d in syllable newlen=%d", basePos, newLen);
1659 HB_GlyphItemRec m = otl_glyphs[0];
1660 --basePos;
1661 for (i = 0; i < basePos; ++i)
1662 otl_glyphs[i] = otl_glyphs[i+1];
1663 otl_glyphs[basePos] = m;
1664 }
1665 }
1666
1667 HB_Bool positioned = HB_OpenTypePosition(item, availableGlyphs, false);
1668
1669 HB_FREE_STACKARRAY(clusters);
1670 HB_FREE_STACKARRAY(properties);
1671
1672 if (!positioned)
1673 goto error;
1674
1675 if (control) {
1676 IDEBUG("found a control char in the syllable");
1677 hb_uint32 i = 0, j = 0;
1678 while (i < item->num_glyphs) {
1679 if (form(reordered[otl_glyphs[i].cluster]) == Control) {
1680 ++i;
1681 if (i >= item->num_glyphs)
1682 break;
1683 }
1684 item->glyphs[j] = item->glyphs[i];
1685 item->attributes[j] = item->attributes[i];
1686 // BEGIN android-added
1687 item->offsets[j] = item->offsets[i];
1688 item->advances[j] = item->advances[i];
1689 // END android-added
1690 ++i;
1691 ++j;
1692 }
1693 item->num_glyphs = j;
1694 }
1695
1696 } else {
1697 HB_HeuristicPosition(item);
1698 }
1699 #endif // NO_OPENTYPE
1700 item->attributes[0].clusterStart = true;
1701
1702 HB_FREE_STACKARRAY(reordered);
1703 HB_FREE_STACKARRAY(position);
1704
1705 IDEBUG("<<<<<<");
1706 return true;
1707
1708 error:
1709 HB_FREE_STACKARRAY(reordered);
1710 HB_FREE_STACKARRAY(position);
1711 return false;
1712 }
1713
1714 /* syllables are of the form:
1715
1716 (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark?
1717 (Consonant Nukta? Halant)* Consonant Halant
1718 IndependentVowel VowelMark? StressMark?
1719
1720 We return syllable boundaries on invalid combinations aswell
1721 */
indic_nextSyllableBoundary(HB_Script script,const HB_UChar16 * s,int start,int end,bool * invalid)1722 static int indic_nextSyllableBoundary(HB_Script script, const HB_UChar16 *s, int start, int end, bool *invalid)
1723 {
1724 *invalid = false;
1725 IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end);
1726 const HB_UChar16 *uc = s+start;
1727
1728 int pos = 0;
1729 Form state = form(uc[pos]);
1730 IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);
1731 pos++;
1732
1733 if (state != Consonant && state != IndependentVowel) {
1734 if (state != Other)
1735 *invalid = true;
1736 goto finish;
1737 }
1738
1739 while (pos < end - start) {
1740 Form newState = form(uc[pos]);
1741 IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]);
1742 switch(newState) {
1743 case Control:
1744 newState = state;
1745 if (state == Halant && uc[pos] == 0x200d /* ZWJ */)
1746 break;
1747 // the control character should be the last char in the item
1748 ++pos;
1749 goto finish;
1750 case Consonant:
1751 if (state == Halant && (script != HB_Script_Sinhala || uc[pos-1] == 0x200d /* ZWJ */))
1752 break;
1753 goto finish;
1754 case Halant:
1755 if (state == Nukta || state == Consonant)
1756 break;
1757 // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya
1758 if (script == HB_Script_Bengali && pos == 1 &&
1759 (uc[0] == 0x0985 || uc[0] == 0x098f))
1760 break;
1761 // Sinhala uses the Halant as a component of certain matras. Allow these, but keep the state on Matra.
1762 if (script == HB_Script_Sinhala && state == Matra) {
1763 ++pos;
1764 continue;
1765 }
1766 if (script == HB_Script_Malayalam && state == Matra && uc[pos-1] == 0x0d41) {
1767 ++pos;
1768 continue;
1769 }
1770 goto finish;
1771 case Nukta:
1772 if (state == Consonant)
1773 break;
1774 goto finish;
1775 case StressMark:
1776 if (state == VowelMark)
1777 break;
1778 // fall through
1779 case VowelMark:
1780 if (state == Matra || state == LengthMark || state == IndependentVowel)
1781 break;
1782 // fall through
1783 case Matra:
1784 if (state == Consonant || state == Nukta)
1785 break;
1786 if (state == Matra) {
1787 // ### needs proper testing for correct two/three part matras
1788 break;
1789 }
1790 // ### not sure if this is correct. If it is, does it apply only to Bengali or should
1791 // it work for all Indic languages?
1792 // the combination Independent_A + Vowel Sign AA is allowed.
1793 if (script == HB_Script_Bengali && uc[pos] == 0x9be && uc[pos-1] == 0x985)
1794 break;
1795 if (script == HB_Script_Tamil && state == Matra) {
1796 if (uc[pos-1] == 0x0bc6 &&
1797 (uc[pos] == 0xbbe || uc[pos] == 0xbd7))
1798 break;
1799 if (uc[pos-1] == 0x0bc7 && uc[pos] == 0xbbe)
1800 break;
1801 }
1802 goto finish;
1803
1804 case LengthMark:
1805 if (state == Matra) {
1806 // ### needs proper testing for correct two/three part matras
1807 break;
1808 }
1809 case IndependentVowel:
1810 case Invalid:
1811 case Other:
1812 goto finish;
1813 }
1814 state = newState;
1815 pos++;
1816 }
1817 finish:
1818 return pos+start;
1819 }
1820
HB_IndicShape(HB_ShaperItem * item)1821 HB_Bool HB_IndicShape(HB_ShaperItem *item)
1822 {
1823 assert(item->item.script >= HB_Script_Devanagari && item->item.script <= HB_Script_Sinhala);
1824
1825 HB_Bool openType = false;
1826 #ifndef NO_OPENTYPE
1827 openType = HB_SelectScript(item, indic_features);
1828 #endif
1829 unsigned short *logClusters = item->log_clusters;
1830
1831 HB_ShaperItem syllable = *item;
1832 int first_glyph = 0;
1833
1834 int sstart = item->item.pos;
1835 int end = sstart + item->item.length;
1836 IDEBUG("indic_shape: from %d length %d", item->item.pos, item->item.length);
1837 while (sstart < end) {
1838 bool invalid;
1839 int send = indic_nextSyllableBoundary(item->item.script, item->string, sstart, end, &invalid);
1840 IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
1841 invalid ? "true" : "false");
1842 syllable.item.pos = sstart;
1843 syllable.item.length = send-sstart;
1844 syllable.glyphs = item->glyphs + first_glyph;
1845 syllable.attributes = item->attributes + first_glyph;
1846 syllable.offsets = item->offsets + first_glyph;
1847 syllable.advances = item->advances + first_glyph;
1848 syllable.num_glyphs = item->num_glyphs - first_glyph;
1849 if (!indic_shape_syllable(openType, &syllable, invalid)) {
1850 IDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
1851 item->num_glyphs += syllable.num_glyphs;
1852 return false;
1853 }
1854 // fix logcluster array
1855 IDEBUG("syllable:");
1856 hb_uint32 g;
1857 for (g = first_glyph; g < first_glyph + syllable.num_glyphs; ++g)
1858 IDEBUG(" %d -> glyph %x", g, item->glyphs[g]);
1859 IDEBUG(" logclusters:");
1860 int i;
1861 for (i = sstart; i < send; ++i) {
1862 IDEBUG(" %d -> glyph %d", i, first_glyph);
1863 logClusters[i-item->item.pos] = first_glyph;
1864 }
1865 sstart = send;
1866 first_glyph += syllable.num_glyphs;
1867 }
1868 item->num_glyphs = first_glyph;
1869 return true;
1870 }
1871
HB_IndicAttributes(HB_Script script,const HB_UChar16 * text,hb_uint32 from,hb_uint32 len,HB_CharAttributes * attributes)1872 void HB_IndicAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
1873 {
1874 int end = from + len;
1875 const HB_UChar16 *uc = text + from;
1876 attributes += from;
1877 hb_uint32 i = 0;
1878 while (i < len) {
1879 bool invalid;
1880 hb_uint32 boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
1881 attributes[i].charStop = true;
1882
1883 if (boundary > len-1) boundary = len;
1884 i++;
1885 while (i < boundary) {
1886 attributes[i].charStop = false;
1887 ++uc;
1888 ++i;
1889 }
1890 assert(i == boundary);
1891 }
1892
1893
1894 }
1895
1896
1897