1 /* 2 * 3 * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved 4 * 5 */ 6 7 #ifndef __INDICREORDERING_H 8 #define __INDICREORDERING_H 9 10 /** 11 * \file 12 * \internal 13 */ 14 15 #include "LETypes.h" 16 #include "OpenTypeTables.h" 17 18 U_NAMESPACE_BEGIN 19 20 // Characters that get refered to by name... 21 #define C_SIGN_ZWNJ 0x200C 22 #define C_SIGN_ZWJ 0x200D 23 24 // Character class values 25 #define CC_RESERVED 0U 26 #define CC_VOWEL_MODIFIER 1U 27 #define CC_STRESS_MARK 2U 28 #define CC_INDEPENDENT_VOWEL 3U 29 #define CC_INDEPENDENT_VOWEL_2 4U 30 #define CC_INDEPENDENT_VOWEL_3 5U 31 #define CC_CONSONANT 6U 32 #define CC_CONSONANT_WITH_NUKTA 7U 33 #define CC_NUKTA 8U 34 #define CC_DEPENDENT_VOWEL 9U 35 #define CC_SPLIT_VOWEL_PIECE_1 10U 36 #define CC_SPLIT_VOWEL_PIECE_2 11U 37 #define CC_SPLIT_VOWEL_PIECE_3 12U 38 #define CC_VIRAMA 13U 39 #define CC_ZERO_WIDTH_MARK 14U 40 #define CC_COUNT 15U 41 42 // Character class flags 43 #define CF_CLASS_MASK 0x0000FFFFU 44 45 #define CF_CONSONANT 0x80000000U 46 47 #define CF_REPH 0x40000000U 48 #define CF_VATTU 0x20000000U 49 #define CF_BELOW_BASE 0x10000000U 50 #define CF_POST_BASE 0x08000000U 51 #define CF_LENGTH_MARK 0x04000000U 52 53 #define CF_POS_BEFORE 0x00300000U 54 #define CF_POS_BELOW 0x00200000U 55 #define CF_POS_ABOVE 0x00100000U 56 #define CF_POS_AFTER 0x00000000U 57 #define CF_POS_MASK 0x00300000U 58 59 #define CF_INDEX_MASK 0x000F0000U 60 #define CF_INDEX_SHIFT 16 61 62 // Script flag bits 63 #define SF_MATRAS_AFTER_BASE 0x80000000U 64 #define SF_REPH_AFTER_BELOW 0x40000000U 65 #define SF_EYELASH_RA 0x20000000U 66 #define SF_MPRE_FIXUP 0x10000000U 67 #define SF_FILTER_ZERO_WIDTH 0x08000000U 68 69 #define SF_POST_BASE_LIMIT_MASK 0x0000FFFFU 70 #define SF_NO_POST_BASE_LIMIT 0x00007FFFU 71 72 typedef LEUnicode SplitMatra[3]; 73 74 class MPreFixups; 75 class LEGlyphStorage; 76 77 struct IndicClassTable 78 { 79 typedef le_uint32 CharClass; 80 typedef le_uint32 ScriptFlags; 81 82 LEUnicode firstChar; 83 LEUnicode lastChar; 84 le_int32 worstCaseExpansion; 85 ScriptFlags scriptFlags; 86 const CharClass *classTable; 87 const SplitMatra *splitMatraTable; 88 89 inline le_int32 getWorstCaseExpansion() const; 90 inline le_bool getFilterZeroWidth() const; 91 92 CharClass getCharClass(LEUnicode ch) const; 93 94 inline const SplitMatra *getSplitMatra(CharClass charClass) const; 95 96 inline le_bool isVowelModifier(LEUnicode ch) const; 97 inline le_bool isStressMark(LEUnicode ch) const; 98 inline le_bool isConsonant(LEUnicode ch) const; 99 inline le_bool isReph(LEUnicode ch) const; 100 inline le_bool isVirama(LEUnicode ch) const; 101 inline le_bool isNukta(LEUnicode ch) const; 102 inline le_bool isVattu(LEUnicode ch) const; 103 inline le_bool isMatra(LEUnicode ch) const; 104 inline le_bool isSplitMatra(LEUnicode ch) const; 105 inline le_bool isLengthMark(LEUnicode ch) const; 106 inline le_bool hasPostOrBelowBaseForm(LEUnicode ch) const; 107 inline le_bool hasPostBaseForm(LEUnicode ch) const; 108 inline le_bool hasBelowBaseForm(LEUnicode ch) const; 109 110 inline static le_bool isVowelModifier(CharClass charClass); 111 inline static le_bool isStressMark(CharClass charClass); 112 inline static le_bool isConsonant(CharClass charClass); 113 inline static le_bool isReph(CharClass charClass); 114 inline static le_bool isVirama(CharClass charClass); 115 inline static le_bool isNukta(CharClass charClass); 116 inline static le_bool isVattu(CharClass charClass); 117 inline static le_bool isMatra(CharClass charClass); 118 inline static le_bool isSplitMatra(CharClass charClass); 119 inline static le_bool isLengthMark(CharClass charClass); 120 inline static le_bool hasPostOrBelowBaseForm(CharClass charClass); 121 inline static le_bool hasPostBaseForm(CharClass charClass); 122 inline static le_bool hasBelowBaseForm(CharClass charClass); 123 124 static const IndicClassTable *getScriptClassTable(le_int32 scriptCode); 125 }; 126 127 class IndicReordering /* not : public UObject because all methods are static */ { 128 public: 129 static le_int32 getWorstCaseExpansion(le_int32 scriptCode); 130 131 static le_bool getFilterZeroWidth(le_int32 scriptCode); 132 133 static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode, 134 LEUnicode *outChars, LEGlyphStorage &glyphStorage, 135 MPreFixups **outMPreFixups); 136 137 static void adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage); 138 139 static const FeatureMap *getFeatureMap(le_int32 &count); 140 141 private: 142 // do not instantiate 143 IndicReordering(); 144 145 static le_int32 findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount); 146 147 }; 148 getWorstCaseExpansion()149inline le_int32 IndicClassTable::getWorstCaseExpansion() const 150 { 151 return worstCaseExpansion; 152 } 153 getFilterZeroWidth()154inline le_bool IndicClassTable::getFilterZeroWidth() const 155 { 156 return (scriptFlags & SF_FILTER_ZERO_WIDTH) != 0; 157 } 158 getSplitMatra(CharClass charClass)159inline const SplitMatra *IndicClassTable::getSplitMatra(CharClass charClass) const 160 { 161 le_int32 index = (charClass & CF_INDEX_MASK) >> CF_INDEX_SHIFT; 162 163 return &splitMatraTable[index - 1]; 164 } 165 isVowelModifier(CharClass charClass)166inline le_bool IndicClassTable::isVowelModifier(CharClass charClass) 167 { 168 return (charClass & CF_CLASS_MASK) == CC_VOWEL_MODIFIER; 169 } 170 isStressMark(CharClass charClass)171inline le_bool IndicClassTable::isStressMark(CharClass charClass) 172 { 173 return (charClass & CF_CLASS_MASK) == CC_STRESS_MARK; 174 } 175 isConsonant(CharClass charClass)176inline le_bool IndicClassTable::isConsonant(CharClass charClass) 177 { 178 return (charClass & CF_CONSONANT) != 0; 179 } 180 isReph(CharClass charClass)181inline le_bool IndicClassTable::isReph(CharClass charClass) 182 { 183 return (charClass & CF_REPH) != 0; 184 } 185 isNukta(CharClass charClass)186inline le_bool IndicClassTable::isNukta(CharClass charClass) 187 { 188 return (charClass & CF_CLASS_MASK) == CC_NUKTA; 189 } 190 isVirama(CharClass charClass)191inline le_bool IndicClassTable::isVirama(CharClass charClass) 192 { 193 return (charClass & CF_CLASS_MASK) == CC_VIRAMA; 194 } 195 isVattu(CharClass charClass)196inline le_bool IndicClassTable::isVattu(CharClass charClass) 197 { 198 return (charClass & CF_VATTU) != 0; 199 } 200 isMatra(CharClass charClass)201inline le_bool IndicClassTable::isMatra(CharClass charClass) 202 { 203 charClass &= CF_CLASS_MASK; 204 205 return charClass >= CC_DEPENDENT_VOWEL && charClass <= CC_SPLIT_VOWEL_PIECE_3; 206 } 207 isSplitMatra(CharClass charClass)208inline le_bool IndicClassTable::isSplitMatra(CharClass charClass) 209 { 210 return (charClass & CF_INDEX_MASK) != 0; 211 } 212 isLengthMark(CharClass charClass)213inline le_bool IndicClassTable::isLengthMark(CharClass charClass) 214 { 215 return (charClass & CF_LENGTH_MARK) != 0; 216 } 217 hasPostOrBelowBaseForm(CharClass charClass)218inline le_bool IndicClassTable::hasPostOrBelowBaseForm(CharClass charClass) 219 { 220 return (charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0; 221 } 222 hasPostBaseForm(CharClass charClass)223inline le_bool IndicClassTable::hasPostBaseForm(CharClass charClass) 224 { 225 return (charClass & CF_POST_BASE) != 0; 226 } 227 hasBelowBaseForm(CharClass charClass)228inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass) 229 { 230 return (charClass & CF_BELOW_BASE) != 0; 231 } 232 isVowelModifier(LEUnicode ch)233inline le_bool IndicClassTable::isVowelModifier(LEUnicode ch) const 234 { 235 return isVowelModifier(getCharClass(ch)); 236 } 237 isStressMark(LEUnicode ch)238inline le_bool IndicClassTable::isStressMark(LEUnicode ch) const 239 { 240 return isStressMark(getCharClass(ch)); 241 } 242 isConsonant(LEUnicode ch)243inline le_bool IndicClassTable::isConsonant(LEUnicode ch) const 244 { 245 return isConsonant(getCharClass(ch)); 246 } 247 isReph(LEUnicode ch)248inline le_bool IndicClassTable::isReph(LEUnicode ch) const 249 { 250 return isReph(getCharClass(ch)); 251 } 252 isVirama(LEUnicode ch)253inline le_bool IndicClassTable::isVirama(LEUnicode ch) const 254 { 255 return isVirama(getCharClass(ch)); 256 } 257 isNukta(LEUnicode ch)258inline le_bool IndicClassTable::isNukta(LEUnicode ch) const 259 { 260 return isNukta(getCharClass(ch)); 261 } 262 isVattu(LEUnicode ch)263inline le_bool IndicClassTable::isVattu(LEUnicode ch) const 264 { 265 return isVattu(getCharClass(ch)); 266 } 267 isMatra(LEUnicode ch)268inline le_bool IndicClassTable::isMatra(LEUnicode ch) const 269 { 270 return isMatra(getCharClass(ch)); 271 } 272 isSplitMatra(LEUnicode ch)273inline le_bool IndicClassTable::isSplitMatra(LEUnicode ch) const 274 { 275 return isSplitMatra(getCharClass(ch)); 276 } 277 isLengthMark(LEUnicode ch)278inline le_bool IndicClassTable::isLengthMark(LEUnicode ch) const 279 { 280 return isLengthMark(getCharClass(ch)); 281 } 282 hasPostOrBelowBaseForm(LEUnicode ch)283inline le_bool IndicClassTable::hasPostOrBelowBaseForm(LEUnicode ch) const 284 { 285 return hasPostOrBelowBaseForm(getCharClass(ch)); 286 } 287 hasPostBaseForm(LEUnicode ch)288inline le_bool IndicClassTable::hasPostBaseForm(LEUnicode ch) const 289 { 290 return hasPostBaseForm(getCharClass(ch)); 291 } 292 hasBelowBaseForm(LEUnicode ch)293inline le_bool IndicClassTable::hasBelowBaseForm(LEUnicode ch) const 294 { 295 return hasBelowBaseForm(getCharClass(ch)); 296 } 297 298 U_NAMESPACE_END 299 #endif 300