1 * Summary: interface for the encoding conversion functions 2 * Description: interface for the encoding conversion functions needed for 3 * XML basic encoding and iconv() support. 4 * 5 * Related specs are 6 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 7 * [ISO-10646] UTF-8 and UTF-16 in Annexes 8 * [ISO-8859-1] ISO Latin-1 characters codes. 9 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10 * Worldwide Character Encoding -- Version 1.0", Addison- 11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12 * described in Unicode Technical Report #4. 13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14 * Information Interchange, ANSI X3.4-1986. 15 * 16 * Copy: See Copyright for the status of this software. 17 * 18 * Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A. 19 20 /if not defined(XML_CHAR_ENCODING_H__) 21 /define XML_CHAR_ENCODING_H__ 22 23 /include "libxmlrpg/xmlversion" 24 25 * xmlCharEncoding: 26 * 27 * Predefined values for some standard encodings. 28 * Libxml does not do beforehand translation on UTF8 and ISOLatinX. 29 * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default. 30 * 31 * Anything else would have to be translated to UTF8 before being 32 * given to the parser itself. The BOM for UTF16 and the encoding 33 * declaration are looked at and a converter is looked for at that 34 * point. If not found the parser stops here as asked by the XML REC. A 35 * converter can be registered by the user 36 * xmlRegisterCharEncodingHandler but the current form doesn't allow 37 * stateful transcoding (a serious problem agreed !). If iconv has been 38 * found it will be used automatically and allow stateful transcoding, 39 * the simplest is then to be sure to enable iconv and to provide iconv 40 * libs for the encoding support needed. 41 * 42 * Note that the generic "UTF-16" is not a predefined value. Instead, only 43 * the specific UTF-16LE and UTF-16BE are present. 44 45 d xmlCharEncoding... 46 d s 10i 0 based(######typedef######) enum 47 d XML_CHAR_ENCODING_ERROR... No encoding detected 48 d c -1 49 d XML_CHAR_ENCODING_NONE... No encoding detected 50 d c 0 51 d XML_CHAR_ENCODING_UTF8... UTF-8 52 d c 1 53 d XML_CHAR_ENCODING_UTF16LE... UTF-16 little endian 54 d c 2 55 d XML_CHAR_ENCODING_UTF16BE... UTF-16 big endian 56 d c 3 57 d XML_CHAR_ENCODING_UCS4LE... UCS-4 little endian 58 d c 4 59 d XML_CHAR_ENCODING_UCS4BE... UCS-4 big endian 60 d c 5 61 d XML_CHAR_ENCODING_EBCDIC... EBCDIC uh! 62 d c 6 63 d XML_CHAR_ENCODING_UCS4_2143... UCS-4 unusual order 64 d c 7 65 d XML_CHAR_ENCODING_UCS4_3412... UCS-4 unusual order 66 d c 8 67 d XML_CHAR_ENCODING_UCS2... UCS-2 68 d c 9 69 d XML_CHAR_ENCODING_8859_1... ISO-8859-1 ISOLatin1 70 d c 10 71 d XML_CHAR_ENCODING_8859_2... ISO-8859-2 ISOLatin2 72 d c 11 73 d XML_CHAR_ENCODING_8859_3... ISO-8859-3 74 d c 12 75 d XML_CHAR_ENCODING_8859_4... ISO-8859-4 76 d c 13 77 d XML_CHAR_ENCODING_8859_5... ISO-8859-5 78 d c 14 79 d XML_CHAR_ENCODING_8859_6... ISO-8859-6 80 d c 15 81 d XML_CHAR_ENCODING_8859_7... ISO-8859-7 82 d c 16 83 d XML_CHAR_ENCODING_8859_8... ISO-8859-8 84 d c 17 85 d XML_CHAR_ENCODING_8859_9... ISO-8859-9 86 d c 18 87 d XML_CHAR_ENCODING_2022_JP... ISO-2022-JP 88 d c 19 89 d XML_CHAR_ENCODING_SHIFT_JIS... Shift_JIS 90 d c 20 91 d XML_CHAR_ENCODING_EUC_JP... EUC-JP 92 d c 21 93 d XML_CHAR_ENCODING_ASCII... Pure ASCII 94 d c 22 95 96 * xmlCharEncodingInputFunc: 97 * @out: a pointer to an array of bytes to store the UTF-8 result 98 * @outlen: the length of @out 99 * @in: a pointer to an array of chars in the original encoding 100 * @inlen: the length of @in 101 * 102 * Take a block of chars in the original encoding and try to convert 103 * it to an UTF-8 block of chars out. 104 * 105 * Returns the number of bytes written, -1 if lack of space, or -2 106 * if the transcoding failed. 107 * The value of @inlen after return is the number of octets consumed 108 * if the return value is positive, else unpredictiable. 109 * The value of @outlen after return is the number of octets consumed. 110 111 d xmlCharEncodingInputFunc... 112 d s * based(######typedef######) 113 d procptr 114 115 * xmlCharEncodingOutputFunc: 116 * @out: a pointer to an array of bytes to store the result 117 * @outlen: the length of @out 118 * @in: a pointer to an array of UTF-8 chars 119 * @inlen: the length of @in 120 * 121 * Take a block of UTF-8 chars in and try to convert it to another 122 * encoding. 123 * Note: a first call designed to produce heading info is called with 124 * in = NULL. If stateful this should also initialize the encoder state. 125 * 126 * Returns the number of bytes written, -1 if lack of space, or -2 127 * if the transcoding failed. 128 * The value of @inlen after return is the number of octets consumed 129 * if the return value is positive, else unpredictiable. 130 * The value of @outlen after return is the number of octets produced. 131 132 d xmlCharEncodingOutputFunc... 133 d s * based(######typedef######) 134 d procptr 135 136 * Block defining the handlers for non UTF-8 encodings. 137 * If iconv is supported, there are two extra fields. 138 139 /if defined(LIBXML_ICU_ENABLED) 140 d uconv_t ds based(######typedef######) 141 d align qualified 142 d uconv * UConverter * 143 d utf8 * UConverter * 144 /endif 145 146 d xmlCharEncodingHandlerPtr... 147 d s * based(######typedef######) 148 149 d xmlCharEncodingHandler... 150 d ds based(xmlCharEncodingHandlerPtr) 151 d align qualified 152 d name * char * 153 d input like(xmlCharEncodingInputFunc) 154 d output like(xmlCharEncodingOutputFunc) 155 * 156 /if defined(LIBXML_ICONV_ENABLED) 157 d iconv_in * iconv_t 158 d iconv_out * iconv_t 159 /endif LIBXML_ICONV_ENABLED 160 * 161 /if defined(LIBXML_ICU_ENABLED) 162 d uconv_in * uconv_t * 163 d uconv_out * uconv_t * 164 /endif LIBXML_ICU_ENABLED 165 166 /include "libxmlrpg/tree" 167 168 * Interfaces for encoding handlers. 169 170 d xmlInitCharEncodingHandlers... 171 d pr extproc( 172 d 'xmlInitCharEncodingHandlers') 173 174 d xmlCleanupCharEncodingHandlers... 175 d pr extproc( 176 d 'xmlCleanupCharEncodingHandlers') 177 178 d xmlRegisterCharEncodingHandler... 179 d pr extproc( 180 d 'xmlRegisterCharEncodingHandler') 181 d handler value like(xmlCharEncodingHandlerPtr) 182 183 d xmlGetCharEncodingHandler... 184 d pr extproc('xmlGetCharEncodingHandler') 185 d like(xmlCharEncodingHandlerPtr) 186 d enc value like(xmlCharEncoding) 187 188 d xmlFindCharEncodingHandler... 189 d pr extproc('xmlFindCharEncodingHandler') 190 d like(xmlCharEncodingHandlerPtr) 191 d name * value options(*string) const char * 192 193 d xmlNewCharEncodingHandler... 194 d pr extproc('xmlNewCharEncodingHandler') 195 d like(xmlCharEncodingHandlerPtr) 196 d name * value options(*string) const char * 197 d input value like(xmlCharEncodingInputFunc) 198 d output value like(xmlCharEncodingOutputFunc) 199 200 * Interfaces for encoding names and aliases. 201 202 d xmlAddEncodingAlias... 203 d pr 10i 0 extproc('xmlAddEncodingAlias') 204 d name * value options(*string) const char * 205 d alias * value options(*string) const char * 206 207 d xmlDelEncodingAlias... 208 d pr 10i 0 extproc('xmlDelEncodingAlias') 209 d alias * value options(*string) const char * 210 211 d xmlGetEncodingAlias... 212 d pr * extproc('xmlGetEncodingAlias') const char * 213 d alias * value options(*string) const char * 214 215 d xmlCleanupEncodingAliases... 216 d pr extproc('xmlCleanupEncodingAliases') 217 218 d xmlParseCharEncoding... 219 d pr extproc('xmlParseCharEncoding') 220 d like(xmlCharEncoding) 221 d name * value options(*string) const char * 222 223 d xmlGetCharEncodingName... 224 d pr * extproc('xmlGetCharEncodingName') const char * 225 d enc value like(xmlCharEncoding) 226 227 * Interfaces directly used by the parsers. 228 229 d xmlDetectCharEncoding... 230 d pr extproc('xmlDetectCharEncoding') 231 d like(xmlCharEncoding) 232 d in * value options(*string) const unsigned char* 233 d len 10i 0 value 234 235 d xmlCharEncOutFunc... 236 d pr 10i 0 extproc('xmlCharEncOutFunc') 237 d handler like(xmlCharEncodingHandler) 238 d out value like(xmlBufferPtr) 239 d in value like(xmlBufferPtr) 240 241 d xmlCharEncInFunc... 242 d pr 10i 0 extproc('xmlCharEncInFunc') 243 d handler like(xmlCharEncodingHandler) 244 d out value like(xmlBufferPtr) 245 d in value like(xmlBufferPtr) 246 247 d xmlCharEncFirstLine... 248 d pr 10i 0 extproc('xmlCharEncFirstLine') 249 d handler like(xmlCharEncodingHandler) 250 d out value like(xmlBufferPtr) 251 d in value like(xmlBufferPtr) 252 253 d xmlCharEncCloseFunc... 254 d pr 10i 0 extproc('xmlCharEncCloseFunc') 255 d handler like(xmlCharEncodingHandler) 256 257 * Export a few useful functions 258 259 /if defined(LIBXML_OUTPUT_ENABLED) 260 d UTF8Toisolat1 pr 10i 0 extproc('UTF8Toisolat1') 261 d out 65535 options(*varsize) unsigned char (*) 262 d outlen 10i 0 263 d in * value options(*string) const unsigned char* 264 d inlen 10i 0 265 266 /endif LIBXML_OUTPUT_ENABLD 267 268 d isolat1ToUTF8 pr 10i 0 extproc('isolat1ToUTF8') 269 d out 65535 options(*varsize) unsigned char (*) 270 d outlen 10i 0 271 d in * value options(*string) const unsigned char* 272 d inlen 10i 0 273 274 /endif XML_CHAR_ENCODING_H 275