1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: udataswp.h 11 * encoding: US-ASCII 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003jun05 16 * created by: Markus W. Scherer 17 * 18 * Definitions for ICU data transformations for different platforms, 19 * changing between big- and little-endian data and/or between 20 * charset families (ASCII<->EBCDIC). 21 */ 22 23 #ifndef __UDATASWP_H__ 24 #define __UDATASWP_H__ 25 26 #include <stdarg.h> 27 #include "unicode/utypes.h" 28 29 /* forward declaration */ 30 31 U_CDECL_BEGIN 32 33 struct UDataSwapper; 34 typedef struct UDataSwapper UDataSwapper; 35 36 /** 37 * Function type for data transformation. 38 * Transforms data, or just returns the length of the data if 39 * the input length is -1. 40 * Swap functions assume that their data pointers are aligned properly. 41 * 42 * Quick implementation outline: 43 * (best to copy and adapt and existing swapper implementation) 44 * check that the data looks like the expected format 45 * if(length<0) { 46 * preflight: 47 * never dereference outData 48 * read inData and determine the data size 49 * assume that inData is long enough for this 50 * } else { 51 * outData can be NULL if length==0 52 * inData==outData (in-place swapping) possible but not required! 53 * verify that length>=(actual size) 54 * if there is a chance that not every byte up to size is reached 55 * due to padding etc.: 56 * if(inData!=outData) { 57 * memcpy(outData, inData, actual size); 58 * } 59 * swap contents 60 * } 61 * return actual size 62 * 63 * Further implementation notes: 64 * - read integers from inData before swapping them 65 * because in-place swapping can make them unreadable 66 * - compareInvChars compares a local Unicode string with already-swapped 67 * output charset strings 68 * 69 * @param ds Pointer to UDataSwapper containing global data about the 70 * transformation and function pointers for handling primitive 71 * types. 72 * @param inData Pointer to the input data to be transformed or examined. 73 * @param length Length of the data, counting bytes. May be -1 for preflighting. 74 * If length>=0, then transform the data. 75 * If length==-1, then only determine the length of the data. 76 * The length cannot be determined from the data itself for all 77 * types of data (e.g., not for simple arrays of integers). 78 * @param outData Pointer to the output data buffer. 79 * If length>=0 (transformation), then the output buffer must 80 * have a capacity of at least length. 81 * If length==-1, then outData will not be used and can be NULL. 82 * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must 83 * fulfill U_SUCCESS on input. 84 * @return The actual length of the data. 85 * 86 * @see UDataSwapper 87 * @internal ICU 2.8 88 */ 89 typedef int32_t U_CALLCONV 90 UDataSwapFn(const UDataSwapper *ds, 91 const void *inData, int32_t length, void *outData, 92 UErrorCode *pErrorCode); 93 94 /** 95 * Convert one uint16_t from input to platform endianness. 96 * @internal ICU 2.8 97 */ 98 typedef uint16_t U_CALLCONV 99 UDataReadUInt16(uint16_t x); 100 101 /** 102 * Convert one uint32_t from input to platform endianness. 103 * @internal ICU 2.8 104 */ 105 typedef uint32_t U_CALLCONV 106 UDataReadUInt32(uint32_t x); 107 108 /** 109 * Convert one uint16_t from platform to input endianness. 110 * @internal ICU 2.8 111 */ 112 typedef void U_CALLCONV 113 UDataWriteUInt16(uint16_t *p, uint16_t x); 114 115 /** 116 * Convert one uint32_t from platform to input endianness. 117 * @internal ICU 2.8 118 */ 119 typedef void U_CALLCONV 120 UDataWriteUInt32(uint32_t *p, uint32_t x); 121 122 /** 123 * Compare invariant-character strings, one in the output data and the 124 * other one caller-provided in Unicode. 125 * An output data string is compared because strings are usually swapped 126 * before the rest of the data, to allow for sorting of string tables 127 * according to the output charset. 128 * You can use -1 for the length parameters of NUL-terminated strings as usual. 129 * Returns Unicode code point order for invariant characters. 130 * @internal ICU 2.8 131 */ 132 typedef int32_t U_CALLCONV 133 UDataCompareInvChars(const UDataSwapper *ds, 134 const char *outString, int32_t outLength, 135 const UChar *localString, int32_t localLength); 136 137 /** 138 * Function for message output when an error occurs during data swapping. 139 * A format string and variable number of arguments are passed 140 * like for vprintf(). 141 * 142 * @param context A function-specific context pointer. 143 * @param fmt The format string. 144 * @param args The arguments for format string inserts. 145 * 146 * @internal ICU 2.8 147 */ 148 typedef void U_CALLCONV 149 UDataPrintError(void *context, const char *fmt, va_list args); 150 151 struct UDataSwapper { 152 /** Input endianness. @internal ICU 2.8 */ 153 UBool inIsBigEndian; 154 /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */ 155 uint8_t inCharset; 156 /** Output endianness. @internal ICU 2.8 */ 157 UBool outIsBigEndian; 158 /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */ 159 uint8_t outCharset; 160 161 /* basic functions for reading data values */ 162 163 /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */ 164 UDataReadUInt16 *readUInt16; 165 /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */ 166 UDataReadUInt32 *readUInt32; 167 /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */ 168 UDataCompareInvChars *compareInvChars; 169 170 /* basic functions for writing data values */ 171 172 /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */ 173 UDataWriteUInt16 *writeUInt16; 174 /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */ 175 UDataWriteUInt32 *writeUInt32; 176 177 /* basic functions for data transformations */ 178 179 /** Transform an array of 16-bit integers. @internal ICU 2.8 */ 180 UDataSwapFn *swapArray16; 181 /** Transform an array of 32-bit integers. @internal ICU 2.8 */ 182 UDataSwapFn *swapArray32; 183 /** Transform an array of 64-bit integers. @internal ICU 53 */ 184 UDataSwapFn *swapArray64; 185 /** Transform an invariant-character string. @internal ICU 2.8 */ 186 UDataSwapFn *swapInvChars; 187 188 /** 189 * Function for message output when an error occurs during data swapping. 190 * Can be NULL. 191 * @internal ICU 2.8 192 */ 193 UDataPrintError *printError; 194 /** Context pointer for printError. @internal ICU 2.8 */ 195 void *printErrorContext; 196 }; 197 198 U_CDECL_END 199 200 U_CAPI UDataSwapper * U_EXPORT2 201 udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset, 202 UBool outIsBigEndian, uint8_t outCharset, 203 UErrorCode *pErrorCode); 204 205 /** 206 * Open a UDataSwapper for the given input data and the specified output 207 * characteristics. 208 * Values of -1 for any of the characteristics mean the local platform's 209 * characteristics. 210 * 211 * @see udata_swap 212 * @internal ICU 2.8 213 */ 214 U_CAPI UDataSwapper * U_EXPORT2 215 udata_openSwapperForInputData(const void *data, int32_t length, 216 UBool outIsBigEndian, uint8_t outCharset, 217 UErrorCode *pErrorCode); 218 219 U_CAPI void U_EXPORT2 220 udata_closeSwapper(UDataSwapper *ds); 221 222 /** 223 * Read the beginning of an ICU data piece, recognize magic bytes, 224 * swap the structure. 225 * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece. 226 * 227 * @return The size of the data header, in bytes. 228 * 229 * @internal ICU 2.8 230 */ 231 U_CAPI int32_t U_EXPORT2 232 udata_swapDataHeader(const UDataSwapper *ds, 233 const void *inData, int32_t length, void *outData, 234 UErrorCode *pErrorCode); 235 236 /** 237 * Convert one int16_t from input to platform endianness. 238 * @internal ICU 2.8 239 */ 240 U_CAPI int16_t U_EXPORT2 241 udata_readInt16(const UDataSwapper *ds, int16_t x); 242 243 /** 244 * Convert one int32_t from input to platform endianness. 245 * @internal ICU 2.8 246 */ 247 U_CAPI int32_t U_EXPORT2 248 udata_readInt32(const UDataSwapper *ds, int32_t x); 249 250 /** 251 * Swap a block of invariant, NUL-terminated strings, but not padding 252 * bytes after the last string. 253 * @internal 254 */ 255 U_CAPI int32_t U_EXPORT2 256 udata_swapInvStringBlock(const UDataSwapper *ds, 257 const void *inData, int32_t length, void *outData, 258 UErrorCode *pErrorCode); 259 260 U_CAPI void U_EXPORT2 261 udata_printError(const UDataSwapper *ds, 262 const char *fmt, 263 ...); 264 265 /* internal exports from putil.c -------------------------------------------- */ 266 267 /* declared here to keep them out of the public putil.h */ 268 269 /** 270 * Swap invariant char * strings ASCII->EBCDIC. 271 * @internal 272 */ 273 U_CAPI int32_t U_EXPORT2 274 uprv_ebcdicFromAscii(const UDataSwapper *ds, 275 const void *inData, int32_t length, void *outData, 276 UErrorCode *pErrorCode); 277 278 /** 279 * Copy invariant ASCII char * strings and verify they are invariant. 280 * @internal 281 */ 282 U_CFUNC int32_t 283 uprv_copyAscii(const UDataSwapper *ds, 284 const void *inData, int32_t length, void *outData, 285 UErrorCode *pErrorCode); 286 287 /** 288 * Swap invariant char * strings EBCDIC->ASCII. 289 * @internal 290 */ 291 U_CFUNC int32_t 292 uprv_asciiFromEbcdic(const UDataSwapper *ds, 293 const void *inData, int32_t length, void *outData, 294 UErrorCode *pErrorCode); 295 296 /** 297 * Copy invariant EBCDIC char * strings and verify they are invariant. 298 * @internal 299 */ 300 U_CFUNC int32_t 301 uprv_copyEbcdic(const UDataSwapper *ds, 302 const void *inData, int32_t length, void *outData, 303 UErrorCode *pErrorCode); 304 305 /** 306 * Compare ASCII invariant char * with Unicode invariant UChar * 307 * @internal 308 */ 309 U_CFUNC int32_t 310 uprv_compareInvAscii(const UDataSwapper *ds, 311 const char *outString, int32_t outLength, 312 const UChar *localString, int32_t localLength); 313 314 /** 315 * Compare EBCDIC invariant char * with Unicode invariant UChar * 316 * @internal 317 */ 318 U_CFUNC int32_t 319 uprv_compareInvEbcdic(const UDataSwapper *ds, 320 const char *outString, int32_t outLength, 321 const UChar *localString, int32_t localLength); 322 323 /** 324 * \def uprv_compareInvWithUChar 325 * Compare an invariant-character strings with a UChar string 326 * @internal 327 */ 328 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 329 # define uprv_compareInvWithUChar uprv_compareInvAscii 330 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 331 # define uprv_compareInvWithUChar uprv_compareInvEbcdic 332 #else 333 # error Unknown charset family! 334 #endif 335 336 337 /* material... -------------------------------------------------------------- */ 338 339 #if 0 340 341 /* udata.h */ 342 343 /** 344 * Public API function in udata.c 345 * 346 * Same as udata_openChoice() but automatically swaps the data. 347 * isAcceptable, if not NULL, may accept data with endianness and charset family 348 * different from the current platform's properties. 349 * If the data is acceptable and the platform properties do not match, then 350 * the swap function is called to swap an allocated version of the data. 351 * Preflighting may or may not be performed depending on whether the size of 352 * the loaded data item is known. 353 * 354 * @param isAcceptable Same as for udata_openChoice(). May be NULL. 355 * 356 * @internal ICU 2.8 357 */ 358 U_CAPI UDataMemory * U_EXPORT2 359 udata_openSwap(const char *path, const char *type, const char *name, 360 UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext, 361 UDataSwapFn *swap, 362 UDataPrintError *printError, void *printErrorContext, 363 UErrorCode *pErrorCode); 364 365 #endif 366 367 #endif 368