Lines Matching +full:enum +full:- +full:conversion
6 * Copyright (C) 2000-2013, International Business Machines
11 * encoding: UTF-8
31 * ICU conversion (.cnv) data file structure, following the usual UDataInfo
36 * struct UConverterStaticData -- struct containing the converter name, IBM CCSID,
40 * --------------------
42 * The static data is followed by conversionType-specific data structures.
45 * differ from those for other MBCS-style converters.
47 * _MBCSHeader.version 5 is optional and not backward-compatible
51 * - The _MBCSHeader has variable length (and is always longer than in version 4).
53 * - There is a set of flags which indicate further incompatible changes.
55 * - In particular, one of these flags indicates that most of the fromUnicode
65 * _MBCSHeader.version 5.4/4.4 supports "good one-way" mappings (|4)
69 * slightly and optionally adds a table for conversion to MBCS (non-SBCS)
80 * a UTF-8 trail byte. ASCII is allocated linearly with 128 contiguous entries.
85 * UTF-8 lead byte and middle trail byte. Unlike the older MBCS stage 2 table,
89 * conversion from UTF-16.
93 * code builds one for the code point range for which the runtime conversion
101 * U+0FFF covers UTF-8 two-byte sequences and three-byte sequences starting with
110 * (Surrogate pair assembly for UTF-16, validity checking for UTF-8.)
113 * useful especially for conversion from UTF-8 when the input can be assumed
117 * overflow because with the all-unassigned block 0 and nearly full mappings
121 * _MBCSHeader.version 4.2 adds an optional conversion extension data structure.
130 * In an extension-only file, the static data unicodeMask is 0.
133 * MBCS-style data structure following the static data.
138 * contains 32-bit fields as follows:
147 * 31.. 8 offsetExtension -- _MBCSHeader.version 4.2 (ICU 2.8) and higher
151 * 7 uint32_t fromUBytesLength -- _MBCSHeader.version 4.1 (ICU 2.4) and higher
160 * 6 MBCS_OPT_FROM_U -- if set,
175 * -- base table name for extension-only table
176 * char baseTableName[variable]; -- with NUL plus padding for 4-alignment
178 * -- all _MBCSHeader fields except for version and flags are 0
180 * -- normal base table with optional extension
189 * uint16_t unicodeCodeUnits[(offsetFromUTable-offsetToUCodeUnits)/2];
192 * -- stage 1 tables
194 * -- stage 1 table for all of Unicode
195 * uint16_t fromUTable[0x440]; (32-bit-aligned)
197 * -- BMP-only tables have a smaller stage 1 table
198 * uint16_t fromUTable[0x40]; (32-bit-aligned)
201 * -- stage 2 tables
204 * -- SBCS: pure indexes
207 * -- DBCS, MBCS, EBCDIC_STATEFUL, ...: roundtrip flags and indexes
216 * -- stage 3 tables with byte results
218 * -- SBCS: each 16-bit result contains flags and the result byte, see ucnvmbcs.c
221 * -- DBCS, MBCS, EBCDIC_STATEFUL, ... 2/3/4 bytes result, see ucnvmbcs.c
229 * -- optional utf8Friendly mbcsIndex -- _MBCSHeader.version 4.3 (ICU 3.8) and higher
239 * -- extension table, details see ucnv_ext.h
243 /* MBCS converter data and state -------------------------------------------- */
245 enum {
253 enum {
298 /* single-byte fromUnicode: get the 16-bit result word */
301 /* single-byte fromUnicode using the sbcsIndex */
304 /* single-byte fromUTF8 using the sbcsIndex; l and t must be masked externally; can be l=0 and t<=0…
307 /* multi-byte fromUnicode: get the 32-bit stage 2 entry */
316 /* double-byte fromUnicode using the mbcsIndex */
319 /* double-byte fromUTF8 using the mbcsIndex; l and t1 combined into lt1; lt1 and t2 must be masked …
325 * These per-converter types determine the storage method in stage 3 of the lookup table,
328 enum {
344 MBCS_OUTPUT_DBCS_ONLY=0xdb /* runtime-only type for DBCS-only handling of SISO tables */
356 /** Constants for fast and UTF-8-friendly conversion. */
357 enum {
358 …SBCS_FAST_MAX=0x0fff, /* maximum code point with UTF-8-friendly SBCS runtime code, s…
360 …MBCS_FAST_MAX=0xd7ff, /* maximum code point with UTF-8-friendly MBCS runtime code, s…
366 * It keeps all the per-converter data and points into the loaded mapping tables.
382 …const uint16_t *mbcsIndex; /* for fast conversion from most of BMP to MBCS (utf8Frien…
383 …uint16_t sbcsIndex[SBCS_FAST_LIMIT>>6]; /* for fast conversion from low BMP to SBCS (utf8Friendly …
440 enum {
455 enum {
477 uint32_t fullStage2Length; /* number of 32-bit units */
488 * It handles conversion extensions but not GB 18030.
501 * This version of _MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages.
503 * It does not handle conversion extensions (_extToU()).
511 * It works for single-byte, single-state codepages that only map
516 (UChar)MBCS_ENTRY_FINAL_VALUE_16((sharedData)->mbcs.stateTable[0][(uint8_t)(b)])
527 (UBool)MBCS_ENTRY_IS_TRANSITION((sharedData)->mbcs.stateTable[0][(uint8_t)(byte)])
530 * This is another simple conversion function for internal use by other
531 * conversion implementations.
534 * It handles conversion extensions but not GB 18030.
537 * as one 32-bit value. The function returns the number of bytes in *pValue:
540 * -1 illegal (currently not used, *pValue undefined)
552 * This version of _MBCSFromUChar32() is optimized for single-byte codepages.
555 * It returns the codepage byte for the code point, or -1 if it is unassigned.
577 * Internal function returning a UnicodeSet for toUnicode() conversion.
578 * Currently only used for ISO-2022-CN, and only handles roundtrip mappings.
593 * Used by stateful converters which share regular conversion tables