1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2002-2010, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: propsvec.h 11 * encoding: US-ASCII 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2002feb22 16 * created by: Markus W. Scherer 17 * 18 * Store bits (Unicode character properties) in bit set vectors. 19 */ 20 21 #ifndef __UPROPSVEC_H__ 22 #define __UPROPSVEC_H__ 23 24 #include "unicode/utypes.h" 25 #include "utrie.h" 26 #include "utrie2.h" 27 28 U_CDECL_BEGIN 29 30 /** 31 * Unicode Properties Vectors associated with code point ranges. 32 * 33 * Rows of uint32_t integers in a contiguous array store 34 * the range limits and the properties vectors. 35 * 36 * Logically, each row has a certain number of uint32_t values, 37 * which is set via the upvec_open() "columns" parameter. 38 * 39 * Internally, two additional columns are stored. 40 * In each internal row, 41 * row[0] contains the start code point and 42 * row[1] contains the limit code point, 43 * which is the start of the next range. 44 * 45 * Initially, there is only one "normal" row for 46 * range [0..0x110000[ with values 0. 47 * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP. 48 * 49 * It would be possible to store only one range boundary per row, 50 * but self-contained rows allow to later sort them by contents. 51 */ 52 struct UPropsVectors; 53 typedef struct UPropsVectors UPropsVectors; 54 55 /* 56 * Special pseudo code points for storing the initialValue and the errorValue, 57 * which are used to initialize a UTrie2 or similar. 58 */ 59 #define UPVEC_FIRST_SPECIAL_CP 0x110000 60 #define UPVEC_INITIAL_VALUE_CP 0x110000 61 #define UPVEC_ERROR_VALUE_CP 0x110001 62 #define UPVEC_MAX_CP 0x110001 63 64 /* 65 * Special pseudo code point used in upvec_compact() signalling the end of 66 * delivering special values and the beginning of delivering real ones. 67 * Stable value, unlike UPVEC_MAX_CP which might grow over time. 68 */ 69 #define UPVEC_START_REAL_VALUES_CP 0x200000 70 71 /* 72 * Open a UPropsVectors object. 73 * @param columns Number of value integers (uint32_t) per row. 74 */ 75 U_CAPI UPropsVectors * U_EXPORT2 76 upvec_open(int32_t columns, UErrorCode *pErrorCode); 77 78 U_CAPI void U_EXPORT2 79 upvec_close(UPropsVectors *pv); 80 81 /* 82 * In rows for code points [start..end], select the column, 83 * reset the mask bits and set the value bits (ANDed with the mask). 84 * 85 * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). 86 */ 87 U_CAPI void U_EXPORT2 88 upvec_setValue(UPropsVectors *pv, 89 UChar32 start, UChar32 end, 90 int32_t column, 91 uint32_t value, uint32_t mask, 92 UErrorCode *pErrorCode); 93 94 /* 95 * Logically const but must not be used on the same pv concurrently! 96 * Always returns 0 if called after upvec_compact(). 97 */ 98 U_CAPI uint32_t U_EXPORT2 99 upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); 100 101 /* 102 * pRangeStart and pRangeEnd can be NULL. 103 * @return NULL if rowIndex out of range and for illegal arguments, 104 * or if called after upvec_compact() 105 */ 106 U_CAPI uint32_t * U_EXPORT2 107 upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, 108 UChar32 *pRangeStart, UChar32 *pRangeEnd); 109 110 /* 111 * Compact the vectors: 112 * - modify the memory 113 * - keep only unique vectors 114 * - store them contiguously from the beginning of the memory 115 * - for each (non-unique) row, call the handler function 116 * 117 * The handler's rowIndex is the index of the row in the compacted 118 * memory block. 119 * (Therefore, it starts at 0 increases in increments of the columns value.) 120 * 121 * In a first phase, only special values are delivered (each exactly once), 122 * with start==end both equalling a special pseudo code point. 123 * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP 124 * where rowIndex is the length of the compacted array, 125 * and the row is arbitrary (but not NULL). 126 * Then, in the second phase, the handler is called for each row of real values. 127 */ 128 typedef void U_CALLCONV 129 UPVecCompactHandler(void *context, 130 UChar32 start, UChar32 end, 131 int32_t rowIndex, uint32_t *row, int32_t columns, 132 UErrorCode *pErrorCode); 133 134 U_CAPI void U_EXPORT2 135 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); 136 137 /* 138 * Get the vectors array after calling upvec_compact(). 139 * The caller must not modify nor release the returned array. 140 * Returns NULL if called before upvec_compact(). 141 */ 142 U_CAPI const uint32_t * U_EXPORT2 143 upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); 144 145 /* 146 * Get a clone of the vectors array after calling upvec_compact(). 147 * The caller owns the returned array and must uprv_free() it. 148 * Returns NULL if called before upvec_compact(). 149 */ 150 U_CAPI uint32_t * U_EXPORT2 151 upvec_cloneArray(const UPropsVectors *pv, 152 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode); 153 154 /* 155 * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted 156 * vectors array, and freeze the trie. 157 */ 158 U_CAPI UTrie2 * U_EXPORT2 159 upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); 160 161 struct UPVecToUTrie2Context { 162 UTrie2 *trie; 163 int32_t initialValue; 164 int32_t errorValue; 165 int32_t maxValue; 166 }; 167 typedef struct UPVecToUTrie2Context UPVecToUTrie2Context; 168 169 /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */ 170 U_CAPI void U_CALLCONV 171 upvec_compactToUTrie2Handler(void *context, 172 UChar32 start, UChar32 end, 173 int32_t rowIndex, uint32_t *row, int32_t columns, 174 UErrorCode *pErrorCode); 175 176 U_CDECL_END 177 178 #endif 179