• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1      * Summary: interface for the encoding conversion functions
2      * Description: interface for the encoding conversion functions needed for
3      *              XML basic encoding and iconv() support.
4      *
5      * Related specs are
6      * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
7      * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8      * [ISO-8859-1]   ISO Latin-1 characters codes.
9      * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10      *                Worldwide Character Encoding -- Version 1.0", Addison-
11      *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12      *                described in Unicode Technical Report #4.
13      * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14      *                Information Interchange, ANSI X3.4-1986.
15      *
16      * Copy: See Copyright for the status of this software.
17      *
18      * Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A.
19
20      /if not defined(XML_CHAR_ENCODING_H__)
21      /define XML_CHAR_ENCODING_H__
22
23      /include "libxmlrpg/xmlversion"
24
25      * xmlCharEncoding:
26      *
27      * Predefined values for some standard encodings.
28      * Libxml does not do beforehand translation on UTF8 and ISOLatinX.
29      * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
30      *
31      * Anything else would have to be translated to UTF8 before being
32      * given to the parser itself. The BOM for UTF16 and the encoding
33      * declaration are looked at and a converter is looked for at that
34      * point. If not found the parser stops here as asked by the XML REC. A
35      * converter can be registered by the user
36      * xmlRegisterCharEncodingHandler but the current form doesn't allow
37      * stateful transcoding (a serious problem agreed !). If iconv has been
38      * found it will be used automatically and allow stateful transcoding,
39      * the simplest is then to be sure to enable iconv and to provide iconv
40      * libs for the encoding support needed.
41      *
42      * Note that the generic "UTF-16" is not a predefined value.  Instead, only
43      * the specific UTF-16LE and UTF-16BE are present.
44
45     d xmlCharEncoding...
46     d                 s             10i 0 based(######typedef######)           enum
47     d  XML_CHAR_ENCODING_ERROR...                                              No encoding detected
48     d                 c                   -1
49     d  XML_CHAR_ENCODING_NONE...                                               No encoding detected
50     d                 c                   0
51     d  XML_CHAR_ENCODING_UTF8...                                               UTF-8
52     d                 c                   1
53     d  XML_CHAR_ENCODING_UTF16LE...                                            UTF-16 little endian
54     d                 c                   2
55     d  XML_CHAR_ENCODING_UTF16BE...                                            UTF-16 big endian
56     d                 c                   3
57     d  XML_CHAR_ENCODING_UCS4LE...                                             UCS-4 little endian
58     d                 c                   4
59     d  XML_CHAR_ENCODING_UCS4BE...                                             UCS-4 big endian
60     d                 c                   5
61     d  XML_CHAR_ENCODING_EBCDIC...                                             EBCDIC uh!
62     d                 c                   6
63     d  XML_CHAR_ENCODING_UCS4_2143...                                          UCS-4 unusual order
64     d                 c                   7
65     d  XML_CHAR_ENCODING_UCS4_3412...                                          UCS-4 unusual order
66     d                 c                   8
67     d  XML_CHAR_ENCODING_UCS2...                                               UCS-2
68     d                 c                   9
69     d  XML_CHAR_ENCODING_8859_1...                                             ISO-8859-1 ISOLatin1
70     d                 c                   10
71     d  XML_CHAR_ENCODING_8859_2...                                             ISO-8859-2 ISOLatin2
72     d                 c                   11
73     d  XML_CHAR_ENCODING_8859_3...                                             ISO-8859-3
74     d                 c                   12
75     d  XML_CHAR_ENCODING_8859_4...                                             ISO-8859-4
76     d                 c                   13
77     d  XML_CHAR_ENCODING_8859_5...                                             ISO-8859-5
78     d                 c                   14
79     d  XML_CHAR_ENCODING_8859_6...                                             ISO-8859-6
80     d                 c                   15
81     d  XML_CHAR_ENCODING_8859_7...                                             ISO-8859-7
82     d                 c                   16
83     d  XML_CHAR_ENCODING_8859_8...                                             ISO-8859-8
84     d                 c                   17
85     d  XML_CHAR_ENCODING_8859_9...                                             ISO-8859-9
86     d                 c                   18
87     d  XML_CHAR_ENCODING_2022_JP...                                            ISO-2022-JP
88     d                 c                   19
89     d  XML_CHAR_ENCODING_SHIFT_JIS...                                          Shift_JIS
90     d                 c                   20
91     d  XML_CHAR_ENCODING_EUC_JP...                                             EUC-JP
92     d                 c                   21
93     d  XML_CHAR_ENCODING_ASCII...                                              Pure ASCII
94     d                 c                   22
95
96      * xmlCharEncodingInputFunc:
97      * @out:  a pointer to an array of bytes to store the UTF-8 result
98      * @outlen:  the length of @out
99      * @in:  a pointer to an array of chars in the original encoding
100      * @inlen:  the length of @in
101      *
102      * Take a block of chars in the original encoding and try to convert
103      * it to an UTF-8 block of chars out.
104      *
105      * Returns the number of bytes written, -1 if lack of space, or -2
106      *     if the transcoding failed.
107      * The value of @inlen after return is the number of octets consumed
108      *     if the return value is positive, else unpredictiable.
109      * The value of @outlen after return is the number of octets consumed.
110
111     d xmlCharEncodingInputFunc...
112     d                 s               *   based(######typedef######)
113     d                                     procptr
114
115      * xmlCharEncodingOutputFunc:
116      * @out:  a pointer to an array of bytes to store the result
117      * @outlen:  the length of @out
118      * @in:  a pointer to an array of UTF-8 chars
119      * @inlen:  the length of @in
120      *
121      * Take a block of UTF-8 chars in and try to convert it to another
122      * encoding.
123      * Note: a first call designed to produce heading info is called with
124      * in = NULL. If stateful this should also initialize the encoder state.
125      *
126      * Returns the number of bytes written, -1 if lack of space, or -2
127      *     if the transcoding failed.
128      * The value of @inlen after return is the number of octets consumed
129      *     if the return value is positive, else unpredictiable.
130      * The value of @outlen after return is the number of octets produced.
131
132     d xmlCharEncodingOutputFunc...
133     d                 s               *   based(######typedef######)
134     d                                     procptr
135
136      * Block defining the handlers for non UTF-8 encodings.
137      * If iconv is supported, there are two extra fields.
138
139      /if defined(LIBXML_ICU_ENABLED)
140     d uconv_t         ds                  based(######typedef######)
141     d                                     align qualified
142     d  uconv                          *                                        UConverter *
143     d  utf8                           *                                        UConverter *
144      /endif
145
146     d xmlCharEncodingHandlerPtr...
147     d                 s               *   based(######typedef######)
148
149     d xmlCharEncodingHandler...
150     d                 ds                  based(xmlCharEncodingHandlerPtr)
151     d                                     align qualified
152     d  name                           *                                        char *
153     d  input                              like(xmlCharEncodingInputFunc)
154     d  output                             like(xmlCharEncodingOutputFunc)
155      *
156      /if defined(LIBXML_ICONV_ENABLED)
157     d  iconv_in                       *                                        iconv_t
158     d  iconv_out                      *                                        iconv_t
159      /endif                                                                    LIBXML_ICONV_ENABLED
160      *
161      /if defined(LIBXML_ICU_ENABLED)
162     d  uconv_in                       *                                        uconv_t *
163     d  uconv_out                      *                                        uconv_t *
164      /endif                                                                    LIBXML_ICU_ENABLED
165
166      /include "libxmlrpg/tree"
167
168      * Interfaces for encoding handlers.
169
170     d xmlInitCharEncodingHandlers...
171     d                 pr                  extproc(
172     d                                      'xmlInitCharEncodingHandlers')
173
174     d xmlCleanupCharEncodingHandlers...
175     d                 pr                  extproc(
176     d                                      'xmlCleanupCharEncodingHandlers')
177
178     d xmlRegisterCharEncodingHandler...
179     d                 pr                  extproc(
180     d                                      'xmlRegisterCharEncodingHandler')
181     d  handler                            value like(xmlCharEncodingHandlerPtr)
182
183     d xmlGetCharEncodingHandler...
184     d                 pr                  extproc('xmlGetCharEncodingHandler')
185     d                                     like(xmlCharEncodingHandlerPtr)
186     d  enc                                value like(xmlCharEncoding)
187
188     d xmlFindCharEncodingHandler...
189     d                 pr                  extproc('xmlFindCharEncodingHandler')
190     d                                     like(xmlCharEncodingHandlerPtr)
191     d  name                           *   value options(*string)               const char *
192
193     d xmlNewCharEncodingHandler...
194     d                 pr                  extproc('xmlNewCharEncodingHandler')
195     d                                     like(xmlCharEncodingHandlerPtr)
196     d  name                           *   value options(*string)               const char *
197     d  input                              value like(xmlCharEncodingInputFunc)
198     d  output                             value like(xmlCharEncodingOutputFunc)
199
200      * Interfaces for encoding names and aliases.
201
202     d xmlAddEncodingAlias...
203     d                 pr            10i 0 extproc('xmlAddEncodingAlias')
204     d  name                           *   value options(*string)               const char *
205     d  alias                          *   value options(*string)               const char *
206
207     d xmlDelEncodingAlias...
208     d                 pr            10i 0 extproc('xmlDelEncodingAlias')
209     d  alias                          *   value options(*string)               const char *
210
211     d xmlGetEncodingAlias...
212     d                 pr              *   extproc('xmlGetEncodingAlias')       const char *
213     d  alias                          *   value options(*string)               const char *
214
215     d xmlCleanupEncodingAliases...
216     d                 pr                  extproc('xmlCleanupEncodingAliases')
217
218     d xmlParseCharEncoding...
219     d                 pr                  extproc('xmlParseCharEncoding')
220     d                                     like(xmlCharEncoding)
221     d  name                           *   value options(*string)               const char *
222
223     d xmlGetCharEncodingName...
224     d                 pr              *   extproc('xmlGetCharEncodingName')    const char *
225     d  enc                                value like(xmlCharEncoding)
226
227      * Interfaces directly used by the parsers.
228
229     d xmlDetectCharEncoding...
230     d                 pr                  extproc('xmlDetectCharEncoding')
231     d                                     like(xmlCharEncoding)
232     d  in                             *   value options(*string)               const unsigned char*
233     d  len                          10i 0 value
234
235     d xmlCharEncOutFunc...
236     d                 pr            10i 0 extproc('xmlCharEncOutFunc')
237     d  handler                            likeds(xmlCharEncodingHandler)
238     d  out                                value like(xmlBufferPtr)
239     d  in                                 value like(xmlBufferPtr)
240
241     d xmlCharEncInFunc...
242     d                 pr            10i 0 extproc('xmlCharEncInFunc')
243     d  handler                            likeds(xmlCharEncodingHandler)
244     d  out                                value like(xmlBufferPtr)
245     d  in                                 value like(xmlBufferPtr)
246
247     d xmlCharEncFirstLine...
248     d                 pr            10i 0 extproc('xmlCharEncFirstLine')
249     d  handler                            likeds(xmlCharEncodingHandler)
250     d  out                                value like(xmlBufferPtr)
251     d  in                                 value like(xmlBufferPtr)
252
253     d xmlCharEncCloseFunc...
254     d                 pr            10i 0 extproc('xmlCharEncCloseFunc')
255     d  handler                            likeds(xmlCharEncodingHandler)
256
257      * Export a few useful functions
258
259      /if defined(LIBXML_OUTPUT_ENABLED)
260     d UTF8Toisolat1   pr            10i 0 extproc('UTF8Toisolat1')
261     d  out                       65535    options(*varsize)                    unsigned char (*)
262     d  outlen                       10i 0
263     d  in                             *   value options(*string)               const unsigned char*
264     d  inlen                        10i 0
265
266      /endif                                                                    LIBXML_OUTPUT_ENABLD
267
268     d isolat1ToUTF8   pr            10i 0 extproc('isolat1ToUTF8')
269     d  out                       65535    options(*varsize)                    unsigned char (*)
270     d  outlen                       10i 0
271     d  in                             *   value options(*string)               const unsigned char*
272     d  inlen                        10i 0
273
274      /endif                                                                    XML_CHAR_ENCODING_H
275