• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
4  *
5  */
6 
7 #ifndef __INDICREORDERING_H
8 #define __INDICREORDERING_H
9 
10 /**
11  * \file
12  * \internal
13  */
14 
15 #include "LETypes.h"
16 #include "OpenTypeTables.h"
17 
18 U_NAMESPACE_BEGIN
19 
20 // Characters that get refered to by name...
21 #define C_SIGN_ZWNJ           0x200C
22 #define C_SIGN_ZWJ            0x200D
23 
24 // Character class values
25 #define CC_RESERVED               0U
26 #define CC_VOWEL_MODIFIER         1U
27 #define CC_STRESS_MARK            2U
28 #define CC_INDEPENDENT_VOWEL      3U
29 #define CC_INDEPENDENT_VOWEL_2    4U
30 #define CC_INDEPENDENT_VOWEL_3    5U
31 #define CC_CONSONANT              6U
32 #define CC_CONSONANT_WITH_NUKTA   7U
33 #define CC_NUKTA                  8U
34 #define CC_DEPENDENT_VOWEL        9U
35 #define CC_SPLIT_VOWEL_PIECE_1   10U
36 #define CC_SPLIT_VOWEL_PIECE_2   11U
37 #define CC_SPLIT_VOWEL_PIECE_3   12U
38 #define CC_VIRAMA                13U
39 #define CC_ZERO_WIDTH_MARK       14U
40 #define CC_COUNT                 15U
41 
42 // Character class flags
43 #define CF_CLASS_MASK    0x0000FFFFU
44 
45 #define CF_CONSONANT     0x80000000U
46 
47 #define CF_REPH          0x40000000U
48 #define CF_VATTU         0x20000000U
49 #define CF_BELOW_BASE    0x10000000U
50 #define CF_POST_BASE     0x08000000U
51 #define CF_LENGTH_MARK   0x04000000U
52 
53 #define CF_POS_BEFORE    0x00300000U
54 #define CF_POS_BELOW     0x00200000U
55 #define CF_POS_ABOVE     0x00100000U
56 #define CF_POS_AFTER     0x00000000U
57 #define CF_POS_MASK      0x00300000U
58 
59 #define CF_INDEX_MASK    0x000F0000U
60 #define CF_INDEX_SHIFT   16
61 
62 // Script flag bits
63 #define SF_MATRAS_AFTER_BASE     0x80000000U
64 #define SF_REPH_AFTER_BELOW      0x40000000U
65 #define SF_EYELASH_RA            0x20000000U
66 #define SF_MPRE_FIXUP            0x10000000U
67 #define SF_FILTER_ZERO_WIDTH     0x08000000U
68 
69 #define SF_POST_BASE_LIMIT_MASK  0x0000FFFFU
70 #define SF_NO_POST_BASE_LIMIT    0x00007FFFU
71 
72 typedef LEUnicode SplitMatra[3];
73 
74 class MPreFixups;
75 class LEGlyphStorage;
76 
77 struct IndicClassTable
78 {
79     typedef le_uint32 CharClass;
80     typedef le_uint32 ScriptFlags;
81 
82     LEUnicode firstChar;
83     LEUnicode lastChar;
84     le_int32 worstCaseExpansion;
85     ScriptFlags scriptFlags;
86     const CharClass *classTable;
87     const SplitMatra *splitMatraTable;
88 
89     inline le_int32 getWorstCaseExpansion() const;
90     inline le_bool getFilterZeroWidth() const;
91 
92     CharClass getCharClass(LEUnicode ch) const;
93 
94     inline const SplitMatra *getSplitMatra(CharClass charClass) const;
95 
96     inline le_bool isVowelModifier(LEUnicode ch) const;
97     inline le_bool isStressMark(LEUnicode ch) const;
98     inline le_bool isConsonant(LEUnicode ch) const;
99     inline le_bool isReph(LEUnicode ch) const;
100     inline le_bool isVirama(LEUnicode ch) const;
101     inline le_bool isNukta(LEUnicode ch) const;
102     inline le_bool isVattu(LEUnicode ch) const;
103     inline le_bool isMatra(LEUnicode ch) const;
104     inline le_bool isSplitMatra(LEUnicode ch) const;
105     inline le_bool isLengthMark(LEUnicode ch) const;
106     inline le_bool hasPostOrBelowBaseForm(LEUnicode ch) const;
107     inline le_bool hasPostBaseForm(LEUnicode ch) const;
108     inline le_bool hasBelowBaseForm(LEUnicode ch) const;
109 
110     inline static le_bool isVowelModifier(CharClass charClass);
111     inline static le_bool isStressMark(CharClass charClass);
112     inline static le_bool isConsonant(CharClass charClass);
113     inline static le_bool isReph(CharClass charClass);
114     inline static le_bool isVirama(CharClass charClass);
115     inline static le_bool isNukta(CharClass charClass);
116     inline static le_bool isVattu(CharClass charClass);
117     inline static le_bool isMatra(CharClass charClass);
118     inline static le_bool isSplitMatra(CharClass charClass);
119     inline static le_bool isLengthMark(CharClass charClass);
120     inline static le_bool hasPostOrBelowBaseForm(CharClass charClass);
121     inline static le_bool hasPostBaseForm(CharClass charClass);
122     inline static le_bool hasBelowBaseForm(CharClass charClass);
123 
124     static const IndicClassTable *getScriptClassTable(le_int32 scriptCode);
125 };
126 
127 class IndicReordering /* not : public UObject because all methods are static */ {
128 public:
129     static le_int32 getWorstCaseExpansion(le_int32 scriptCode);
130 
131     static le_bool getFilterZeroWidth(le_int32 scriptCode);
132 
133     static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
134         LEUnicode *outChars, LEGlyphStorage &glyphStorage,
135         MPreFixups **outMPreFixups);
136 
137     static void adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage);
138 
139     static const FeatureMap *getFeatureMap(le_int32 &count);
140 
141 private:
142     // do not instantiate
143     IndicReordering();
144 
145     static le_int32 findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
146 
147 };
148 
getWorstCaseExpansion()149 inline le_int32 IndicClassTable::getWorstCaseExpansion() const
150 {
151     return worstCaseExpansion;
152 }
153 
getFilterZeroWidth()154 inline le_bool IndicClassTable::getFilterZeroWidth() const
155 {
156     return (scriptFlags & SF_FILTER_ZERO_WIDTH) != 0;
157 }
158 
getSplitMatra(CharClass charClass)159 inline const SplitMatra *IndicClassTable::getSplitMatra(CharClass charClass) const
160 {
161     le_int32 index = (charClass & CF_INDEX_MASK) >> CF_INDEX_SHIFT;
162 
163     return &splitMatraTable[index - 1];
164 }
165 
isVowelModifier(CharClass charClass)166 inline le_bool IndicClassTable::isVowelModifier(CharClass charClass)
167 {
168     return (charClass & CF_CLASS_MASK) == CC_VOWEL_MODIFIER;
169 }
170 
isStressMark(CharClass charClass)171 inline le_bool IndicClassTable::isStressMark(CharClass charClass)
172 {
173     return (charClass & CF_CLASS_MASK) == CC_STRESS_MARK;
174 }
175 
isConsonant(CharClass charClass)176 inline le_bool IndicClassTable::isConsonant(CharClass charClass)
177 {
178     return (charClass & CF_CONSONANT) != 0;
179 }
180 
isReph(CharClass charClass)181 inline le_bool IndicClassTable::isReph(CharClass charClass)
182 {
183     return (charClass & CF_REPH) != 0;
184 }
185 
isNukta(CharClass charClass)186 inline le_bool IndicClassTable::isNukta(CharClass charClass)
187 {
188     return (charClass & CF_CLASS_MASK) == CC_NUKTA;
189 }
190 
isVirama(CharClass charClass)191 inline le_bool IndicClassTable::isVirama(CharClass charClass)
192 {
193     return (charClass & CF_CLASS_MASK) == CC_VIRAMA;
194 }
195 
isVattu(CharClass charClass)196 inline le_bool IndicClassTable::isVattu(CharClass charClass)
197 {
198     return (charClass & CF_VATTU) != 0;
199 }
200 
isMatra(CharClass charClass)201 inline le_bool IndicClassTable::isMatra(CharClass charClass)
202 {
203     charClass &= CF_CLASS_MASK;
204 
205     return charClass >= CC_DEPENDENT_VOWEL && charClass <= CC_SPLIT_VOWEL_PIECE_3;
206 }
207 
isSplitMatra(CharClass charClass)208 inline le_bool IndicClassTable::isSplitMatra(CharClass charClass)
209 {
210     return (charClass & CF_INDEX_MASK) != 0;
211 }
212 
isLengthMark(CharClass charClass)213 inline le_bool IndicClassTable::isLengthMark(CharClass charClass)
214 {
215     return (charClass & CF_LENGTH_MARK) != 0;
216 }
217 
hasPostOrBelowBaseForm(CharClass charClass)218 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(CharClass charClass)
219 {
220     return (charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0;
221 }
222 
hasPostBaseForm(CharClass charClass)223 inline le_bool IndicClassTable::hasPostBaseForm(CharClass charClass)
224 {
225     return (charClass & CF_POST_BASE) != 0;
226 }
227 
hasBelowBaseForm(CharClass charClass)228 inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass)
229 {
230     return (charClass & CF_BELOW_BASE) != 0;
231 }
232 
isVowelModifier(LEUnicode ch)233 inline le_bool IndicClassTable::isVowelModifier(LEUnicode ch) const
234 {
235     return isVowelModifier(getCharClass(ch));
236 }
237 
isStressMark(LEUnicode ch)238 inline le_bool IndicClassTable::isStressMark(LEUnicode ch) const
239 {
240     return isStressMark(getCharClass(ch));
241 }
242 
isConsonant(LEUnicode ch)243 inline le_bool IndicClassTable::isConsonant(LEUnicode ch) const
244 {
245     return isConsonant(getCharClass(ch));
246 }
247 
isReph(LEUnicode ch)248 inline le_bool IndicClassTable::isReph(LEUnicode ch) const
249 {
250     return isReph(getCharClass(ch));
251 }
252 
isVirama(LEUnicode ch)253 inline le_bool IndicClassTable::isVirama(LEUnicode ch) const
254 {
255     return isVirama(getCharClass(ch));
256 }
257 
isNukta(LEUnicode ch)258 inline le_bool IndicClassTable::isNukta(LEUnicode ch) const
259 {
260     return isNukta(getCharClass(ch));
261 }
262 
isVattu(LEUnicode ch)263 inline le_bool IndicClassTable::isVattu(LEUnicode ch) const
264 {
265     return isVattu(getCharClass(ch));
266 }
267 
isMatra(LEUnicode ch)268 inline le_bool IndicClassTable::isMatra(LEUnicode ch) const
269 {
270     return isMatra(getCharClass(ch));
271 }
272 
isSplitMatra(LEUnicode ch)273 inline le_bool IndicClassTable::isSplitMatra(LEUnicode ch) const
274 {
275     return isSplitMatra(getCharClass(ch));
276 }
277 
isLengthMark(LEUnicode ch)278 inline le_bool IndicClassTable::isLengthMark(LEUnicode ch) const
279 {
280     return isLengthMark(getCharClass(ch));
281 }
282 
hasPostOrBelowBaseForm(LEUnicode ch)283 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(LEUnicode ch) const
284 {
285     return hasPostOrBelowBaseForm(getCharClass(ch));
286 }
287 
hasPostBaseForm(LEUnicode ch)288 inline le_bool IndicClassTable::hasPostBaseForm(LEUnicode ch) const
289 {
290     return hasPostBaseForm(getCharClass(ch));
291 }
292 
hasBelowBaseForm(LEUnicode ch)293 inline le_bool IndicClassTable::hasBelowBaseForm(LEUnicode ch) const
294 {
295     return hasBelowBaseForm(getCharClass(ch));
296 }
297 
298 U_NAMESPACE_END
299 #endif
300