1 * Summary: interface for the encoding conversion functions 2 * Description: interface for the encoding conversion functions needed for 3 * XML basic encoding and iconv() support. 4 * 5 * Related specs are 6 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 7 * [ISO-10646] UTF-8 and UTF-16 in Annexes 8 * [ISO-8859-1] ISO Latin-1 characters codes. 9 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10 * Worldwide Character Encoding -- Version 1.0", Addison- 11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12 * described in Unicode Technical Report #4. 13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14 * Information Interchange, ANSI X3.4-1986. 15 * 16 * Copy: See Copyright for the status of this software. 17 * 18 * Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A. 19 20 /if not defined(XML_CHAR_ENCODING_H__) 21 /define XML_CHAR_ENCODING_H__ 22 23 /include "libxmlrpg/xmlversion" 24 /include "libxmlrpg/xmlTypesC" 25 26 * xmlCharEncoding: 27 * 28 * Predefined values for some standard encodings. 29 * Libxml does not do beforehand translation on UTF8 and ISOLatinX. 30 * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default. 31 * 32 * Anything else would have to be translated to UTF8 before being 33 * given to the parser itself. The BOM for UTF16 and the encoding 34 * declaration are looked at and a converter is looked for at that 35 * point. If not found the parser stops here as asked by the XML REC. A 36 * converter can be registered by the user 37 * xmlRegisterCharEncodingHandler but the current form doesn't allow 38 * stateful transcoding (a serious problem agreed !). If iconv has been 39 * found it will be used automatically and allow stateful transcoding, 40 * the simplest is then to be sure to enable iconv and to provide iconv 41 * libs for the encoding support needed. 42 * 43 * Note that the generic "UTF-16" is not a predefined value. Instead, only 44 * the specific UTF-16LE and UTF-16BE are present. 45 46 d xmlCharEncoding... 47 d s based(######typedef######) 48 d like(xmlCenum) 49 d XML_CHAR_ENCODING_ERROR... No encoding detected 50 d c -1 51 d XML_CHAR_ENCODING_NONE... No encoding detected 52 d c 0 53 d XML_CHAR_ENCODING_UTF8... UTF-8 54 d c 1 55 d XML_CHAR_ENCODING_UTF16LE... UTF-16 little endian 56 d c 2 57 d XML_CHAR_ENCODING_UTF16BE... UTF-16 big endian 58 d c 3 59 d XML_CHAR_ENCODING_UCS4LE... UCS-4 little endian 60 d c 4 61 d XML_CHAR_ENCODING_UCS4BE... UCS-4 big endian 62 d c 5 63 d XML_CHAR_ENCODING_EBCDIC... EBCDIC uh! 64 d c 6 65 d XML_CHAR_ENCODING_UCS4_2143... UCS-4 unusual order 66 d c 7 67 d XML_CHAR_ENCODING_UCS4_3412... UCS-4 unusual order 68 d c 8 69 d XML_CHAR_ENCODING_UCS2... UCS-2 70 d c 9 71 d XML_CHAR_ENCODING_8859_1... ISO-8859-1 ISOLatin1 72 d c 10 73 d XML_CHAR_ENCODING_8859_2... ISO-8859-2 ISOLatin2 74 d c 11 75 d XML_CHAR_ENCODING_8859_3... ISO-8859-3 76 d c 12 77 d XML_CHAR_ENCODING_8859_4... ISO-8859-4 78 d c 13 79 d XML_CHAR_ENCODING_8859_5... ISO-8859-5 80 d c 14 81 d XML_CHAR_ENCODING_8859_6... ISO-8859-6 82 d c 15 83 d XML_CHAR_ENCODING_8859_7... ISO-8859-7 84 d c 16 85 d XML_CHAR_ENCODING_8859_8... ISO-8859-8 86 d c 17 87 d XML_CHAR_ENCODING_8859_9... ISO-8859-9 88 d c 18 89 d XML_CHAR_ENCODING_2022_JP... ISO-2022-JP 90 d c 19 91 d XML_CHAR_ENCODING_SHIFT_JIS... Shift_JIS 92 d c 20 93 d XML_CHAR_ENCODING_EUC_JP... EUC-JP 94 d c 21 95 d XML_CHAR_ENCODING_ASCII... Pure ASCII 96 d c 22 97 98 * xmlCharEncodingInputFunc: 99 * @out: a pointer to an array of bytes to store the UTF-8 result 100 * @outlen: the length of @out 101 * @in: a pointer to an array of chars in the original encoding 102 * @inlen: the length of @in 103 * 104 * Take a block of chars in the original encoding and try to convert 105 * it to an UTF-8 block of chars out. 106 * 107 * Returns the number of bytes written, -1 if lack of space, or -2 108 * if the transcoding failed. 109 * The value of @inlen after return is the number of octets consumed 110 * if the return value is positive, else unpredictiable. 111 * The value of @outlen after return is the number of octets consumed. 112 113 d xmlCharEncodingInputFunc... 114 d s * based(######typedef######) 115 d procptr 116 117 * xmlCharEncodingOutputFunc: 118 * @out: a pointer to an array of bytes to store the result 119 * @outlen: the length of @out 120 * @in: a pointer to an array of UTF-8 chars 121 * @inlen: the length of @in 122 * 123 * Take a block of UTF-8 chars in and try to convert it to another 124 * encoding. 125 * Note: a first call designed to produce heading info is called with 126 * in = NULL. If stateful this should also initialize the encoder state. 127 * 128 * Returns the number of bytes written, -1 if lack of space, or -2 129 * if the transcoding failed. 130 * The value of @inlen after return is the number of octets consumed 131 * if the return value is positive, else unpredictiable. 132 * The value of @outlen after return is the number of octets produced. 133 134 d xmlCharEncodingOutputFunc... 135 d s * based(######typedef######) 136 d procptr 137 138 * Block defining the handlers for non UTF-8 encodings. 139 * If iconv is supported, there are two extra fields. 140 141 /if defined(LIBXML_ICU_ENABLED) 142 d uconv_t ds based(######typedef######) 143 d align qualified 144 d uconv * UConverter * 145 d utf8 * UConverter * 146 /endif 147 148 d xmlCharEncodingHandlerPtr... 149 d s * based(######typedef######) 150 151 d xmlCharEncodingHandler... 152 d ds based(xmlCharEncodingHandlerPtr) 153 d align qualified 154 d name * char * 155 d input like(xmlCharEncodingInputFunc) 156 d output like(xmlCharEncodingOutputFunc) 157 * 158 /if defined(LIBXML_ICONV_ENABLED) 159 d iconv_in * iconv_t 160 d iconv_out * iconv_t 161 /endif LIBXML_ICONV_ENABLED 162 * 163 /if defined(LIBXML_ICU_ENABLED) 164 d uconv_in * uconv_t * 165 d uconv_out * uconv_t * 166 /endif LIBXML_ICU_ENABLED 167 168 /include "libxmlrpg/tree" 169 170 * Interfaces for encoding handlers. 171 172 d xmlInitCharEncodingHandlers... 173 d pr extproc( 174 d 'xmlInitCharEncodingHandlers') 175 176 d xmlCleanupCharEncodingHandlers... 177 d pr extproc( 178 d 'xmlCleanupCharEncodingHandlers') 179 180 d xmlRegisterCharEncodingHandler... 181 d pr extproc( 182 d 'xmlRegisterCharEncodingHandler') 183 d handler value like(xmlCharEncodingHandlerPtr) 184 185 d xmlGetCharEncodingHandler... 186 d pr extproc('xmlGetCharEncodingHandler') 187 d like(xmlCharEncodingHandlerPtr) 188 d enc value like(xmlCharEncoding) 189 190 d xmlFindCharEncodingHandler... 191 d pr extproc('xmlFindCharEncodingHandler') 192 d like(xmlCharEncodingHandlerPtr) 193 d name * value options(*string) const char * 194 195 d xmlNewCharEncodingHandler... 196 d pr extproc('xmlNewCharEncodingHandler') 197 d like(xmlCharEncodingHandlerPtr) 198 d name * value options(*string) const char * 199 d input value like(xmlCharEncodingInputFunc) 200 d output value like(xmlCharEncodingOutputFunc) 201 202 * Interfaces for encoding names and aliases. 203 204 d xmlAddEncodingAlias... 205 d pr extproc('xmlAddEncodingAlias') 206 d like(xmlCint) 207 d name * value options(*string) const char * 208 d alias * value options(*string) const char * 209 210 d xmlDelEncodingAlias... 211 d pr extproc('xmlDelEncodingAlias') 212 d like(xmlCint) 213 d alias * value options(*string) const char * 214 215 d xmlGetEncodingAlias... 216 d pr * extproc('xmlGetEncodingAlias') const char * 217 d alias * value options(*string) const char * 218 219 d xmlCleanupEncodingAliases... 220 d pr extproc('xmlCleanupEncodingAliases') 221 222 d xmlParseCharEncoding... 223 d pr extproc('xmlParseCharEncoding') 224 d like(xmlCharEncoding) 225 d name * value options(*string) const char * 226 227 d xmlGetCharEncodingName... 228 d pr * extproc('xmlGetCharEncodingName') const char * 229 d enc value like(xmlCharEncoding) 230 231 * Interfaces directly used by the parsers. 232 233 d xmlDetectCharEncoding... 234 d pr extproc('xmlDetectCharEncoding') 235 d like(xmlCharEncoding) 236 d in * value options(*string) const unsigned char* 237 d len value like(xmlCint) 238 239 d xmlCharEncOutFunc... 240 d pr extproc('xmlCharEncOutFunc') 241 d like(xmlCint) 242 d handler likeds(xmlCharEncodingHandler) 243 d out value like(xmlBufferPtr) 244 d in value like(xmlBufferPtr) 245 246 d xmlCharEncInFunc... 247 d pr extproc('xmlCharEncInFunc') 248 d like(xmlCint) 249 d handler likeds(xmlCharEncodingHandler) 250 d out value like(xmlBufferPtr) 251 d in value like(xmlBufferPtr) 252 253 d xmlCharEncFirstLine... 254 d pr extproc('xmlCharEncFirstLine') 255 d like(xmlCint) 256 d handler likeds(xmlCharEncodingHandler) 257 d out value like(xmlBufferPtr) 258 d in value like(xmlBufferPtr) 259 260 d xmlCharEncCloseFunc... 261 d pr extproc('xmlCharEncCloseFunc') 262 d like(xmlCint) 263 d handler likeds(xmlCharEncodingHandler) 264 265 * Export a few useful functions 266 267 /if defined(LIBXML_OUTPUT_ENABLED) 268 d UTF8Toisolat1 pr extproc('UTF8Toisolat1') 269 d like(xmlCint) 270 d out 65535 options(*varsize) unsigned char (*) 271 d outlen like(xmlCint) 272 d in * value options(*string) const unsigned char* 273 d inlen like(xmlCint) 274 275 /endif LIBXML_OUTPUT_ENABLD 276 277 d isolat1ToUTF8 pr extproc('isolat1ToUTF8') 278 d like(xmlCint) 279 d out 65535 options(*varsize) unsigned char (*) 280 d outlen like(xmlCint) 281 d in * value options(*string) const unsigned char* 282 d inlen like(xmlCint) 283 284 /endif XML_CHAR_ENCODING_H 285