1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2000-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: ucol_elm.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created 02/22/2001 14 * created by: Vladimir Weinstein 15 * 16 * This program reads the Franctional UCA table and generates 17 * internal format for UCA table as well as inverse UCA table. 18 * It then writes binary files containing the data: ucadata.dat 19 * & invuca.dat 20 */ 21 #ifndef UCOL_UCAELEMS_H 22 #define UCOL_UCAELEMS_H 23 24 #include "unicode/utypes.h" 25 #include "unicode/uniset.h" 26 #include "ucol_tok.h" 27 28 #if !UCONFIG_NO_COLLATION 29 30 #include "ucol_imp.h" 31 32 #ifdef UCOL_DEBUG 33 #include "cmemory.h" 34 #include <stdio.h> 35 #endif 36 37 U_CDECL_BEGIN 38 39 /* This is the maximum trie capacity for the mapping trie. 40 Due to current limitations in genuca and the design of UTrie, 41 this number can't be more than 256K. 42 As of Unicode 5, it currently could safely go to 128K without 43 a problem. Normally, less than 32K are tailored. 44 */ 45 #define UCOL_ELM_TRIE_CAPACITY 0x40000 46 47 /* This is the maxmun capacity for temparay combining class 48 * table. The table will be compacted after scanning all the 49 * Unicode codepoints. 50 */ 51 #define UCOL_MAX_CM_TAB 0x10000 52 53 54 typedef struct { 55 uint32_t *CEs; 56 int32_t position; 57 int32_t size; 58 } ExpansionTable; 59 60 typedef struct { 61 UChar prefixChars[128]; 62 UChar *prefix; 63 uint32_t prefixSize; 64 UChar uchars[128]; 65 UChar *cPoints; 66 uint32_t cSize; /* Number of characters in sequence - for contraction */ 67 uint32_t noOfCEs; /* Number of collation elements */ 68 uint32_t CEs[128]; /* These are collation elements - there could be more than one - in case of expansion */ 69 uint32_t mapCE; /* This is the value element maps in original table */ 70 uint32_t sizePrim[128]; 71 uint32_t sizeSec[128]; 72 uint32_t sizeTer[128]; 73 UBool caseBit; 74 UBool isThai; 75 } UCAElements; 76 77 typedef struct { 78 uint32_t *endExpansionCE; 79 UBool *isV; 80 int32_t position; 81 int32_t size; 82 uint8_t maxLSize; 83 uint8_t maxVSize; 84 uint8_t maxTSize; 85 } MaxJamoExpansionTable; 86 87 typedef struct { 88 uint32_t *endExpansionCE; 89 uint8_t *expansionCESize; 90 int32_t position; 91 int32_t size; 92 } MaxExpansionTable; 93 94 typedef struct { 95 uint16_t index[256]; /* index of cPoints by combining class 0-255. */ 96 UChar *cPoints; /* code point array of all combining marks */ 97 uint32_t size; /* total number of combining marks */ 98 } CombinClassTable; 99 100 typedef struct { 101 /*CompactEIntArray *mapping; */ 102 UNewTrie *mapping; 103 ExpansionTable *expansions; 104 struct CntTable *contractions; 105 UCATableHeader *image; 106 UColOptionSet *options; 107 MaxExpansionTable *maxExpansions; 108 MaxJamoExpansionTable *maxJamoExpansions; 109 uint8_t *unsafeCP; 110 uint8_t *contrEndCP; 111 const UCollator *UCA; 112 UHashtable *prefixLookup; 113 CombinClassTable *cmLookup; /* combining class lookup for tailoring. */ 114 } tempUCATable; 115 116 typedef struct { 117 UChar cp; 118 uint16_t cClass; // combining class 119 }CompData; 120 121 typedef struct { 122 CompData *precomp; 123 int32_t precompLen; 124 UChar *decomp; 125 int32_t decompLen; 126 UChar *comp; 127 int32_t compLen; 128 uint16_t curClass; 129 uint16_t tailoringCM; 130 int32_t cmPos; 131 }tempTailorContext; 132 133 U_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status); 134 U_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t); 135 U_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status); 136 U_CAPI UCATableHeader * U_EXPORT2 uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status); 137 138 U_CAPI int32_t U_EXPORT2 139 uprv_uca_canonicalClosure(tempUCATable *t, UColTokenParser *src, 140 icu::UnicodeSet *closed, UErrorCode *status); 141 142 U_CDECL_END 143 144 #endif /* #if !UCONFIG_NO_COLLATION */ 145 146 #endif 147