1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2002-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: propsvec.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2002feb22 14 * created by: Markus W. Scherer 15 * 16 * Store bits (Unicode character properties) in bit set vectors. 17 */ 18 19 #ifndef __UPROPSVEC_H__ 20 #define __UPROPSVEC_H__ 21 22 #include "unicode/utypes.h" 23 #include "utrie.h" 24 #include "utrie2.h" 25 26 U_CDECL_BEGIN 27 28 /** 29 * Unicode Properties Vectors associated with code point ranges. 30 * 31 * Rows of uint32_t integers in a contiguous array store 32 * the range limits and the properties vectors. 33 * 34 * Logically, each row has a certain number of uint32_t values, 35 * which is set via the upvec_open() "columns" parameter. 36 * 37 * Internally, two additional columns are stored. 38 * In each internal row, 39 * row[0] contains the start code point and 40 * row[1] contains the limit code point, 41 * which is the start of the next range. 42 * 43 * Initially, there is only one "normal" row for 44 * range [0..0x110000[ with values 0. 45 * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP. 46 * 47 * It would be possible to store only one range boundary per row, 48 * but self-contained rows allow to later sort them by contents. 49 */ 50 struct UPropsVectors; 51 typedef struct UPropsVectors UPropsVectors; 52 53 /* 54 * Special pseudo code points for storing the initialValue and the errorValue, 55 * which are used to initialize a UTrie2 or similar. 56 */ 57 #define UPVEC_FIRST_SPECIAL_CP 0x110000 58 #define UPVEC_INITIAL_VALUE_CP 0x110000 59 #define UPVEC_ERROR_VALUE_CP 0x110001 60 #define UPVEC_MAX_CP 0x110001 61 62 /* 63 * Special pseudo code point used in upvec_compact() signalling the end of 64 * delivering special values and the beginning of delivering real ones. 65 * Stable value, unlike UPVEC_MAX_CP which might grow over time. 66 */ 67 #define UPVEC_START_REAL_VALUES_CP 0x200000 68 69 /* 70 * Open a UPropsVectors object. 71 * @param columns Number of value integers (uint32_t) per row. 72 */ 73 U_CAPI UPropsVectors * U_EXPORT2 74 upvec_open(int32_t columns, UErrorCode *pErrorCode); 75 76 U_CAPI void U_EXPORT2 77 upvec_close(UPropsVectors *pv); 78 79 /* 80 * In rows for code points [start..end], select the column, 81 * reset the mask bits and set the value bits (ANDed with the mask). 82 * 83 * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). 84 */ 85 U_CAPI void U_EXPORT2 86 upvec_setValue(UPropsVectors *pv, 87 UChar32 start, UChar32 end, 88 int32_t column, 89 uint32_t value, uint32_t mask, 90 UErrorCode *pErrorCode); 91 92 /* 93 * Logically const but must not be used on the same pv concurrently! 94 * Always returns 0 if called after upvec_compact(). 95 */ 96 U_CAPI uint32_t U_EXPORT2 97 upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); 98 99 /* 100 * pRangeStart and pRangeEnd can be NULL. 101 * @return NULL if rowIndex out of range and for illegal arguments, 102 * or if called after upvec_compact() 103 */ 104 U_CAPI uint32_t * U_EXPORT2 105 upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, 106 UChar32 *pRangeStart, UChar32 *pRangeEnd); 107 108 /* 109 * Compact the vectors: 110 * - modify the memory 111 * - keep only unique vectors 112 * - store them contiguously from the beginning of the memory 113 * - for each (non-unique) row, call the handler function 114 * 115 * The handler's rowIndex is the index of the row in the compacted 116 * memory block. 117 * (Therefore, it starts at 0 increases in increments of the columns value.) 118 * 119 * In a first phase, only special values are delivered (each exactly once), 120 * with start==end both equalling a special pseudo code point. 121 * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP 122 * where rowIndex is the length of the compacted array, 123 * and the row is arbitrary (but not NULL). 124 * Then, in the second phase, the handler is called for each row of real values. 125 */ 126 typedef void U_CALLCONV 127 UPVecCompactHandler(void *context, 128 UChar32 start, UChar32 end, 129 int32_t rowIndex, uint32_t *row, int32_t columns, 130 UErrorCode *pErrorCode); 131 132 U_CAPI void U_EXPORT2 133 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); 134 135 /* 136 * Get the vectors array after calling upvec_compact(). 137 * The caller must not modify nor release the returned array. 138 * Returns NULL if called before upvec_compact(). 139 */ 140 U_CAPI const uint32_t * U_EXPORT2 141 upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); 142 143 /* 144 * Get a clone of the vectors array after calling upvec_compact(). 145 * The caller owns the returned array and must uprv_free() it. 146 * Returns NULL if called before upvec_compact(). 147 */ 148 U_CAPI uint32_t * U_EXPORT2 149 upvec_cloneArray(const UPropsVectors *pv, 150 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode); 151 152 /* 153 * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted 154 * vectors array, and freeze the trie. 155 */ 156 U_CAPI UTrie2 * U_EXPORT2 157 upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); 158 159 struct UPVecToUTrie2Context { 160 UTrie2 *trie; 161 int32_t initialValue; 162 int32_t errorValue; 163 int32_t maxValue; 164 }; 165 typedef struct UPVecToUTrie2Context UPVecToUTrie2Context; 166 167 /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */ 168 U_CAPI void U_CALLCONV 169 upvec_compactToUTrie2Handler(void *context, 170 UChar32 start, UChar32 end, 171 int32_t rowIndex, uint32_t *row, int32_t columns, 172 UErrorCode *pErrorCode); 173 174 U_CDECL_END 175 176 #endif 177