• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000-2017 Expat development team
11    Licensed under the MIT license:
12 
13    Permission is  hereby granted,  free of charge,  to any  person obtaining
14    a  copy  of  this  software   and  associated  documentation  files  (the
15    "Software"),  to  deal in  the  Software  without restriction,  including
16    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17    distribute, sublicense, and/or sell copies of the Software, and to permit
18    persons  to whom  the Software  is  furnished to  do so,  subject to  the
19    following conditions:
20 
21    The above copyright  notice and this permission notice  shall be included
22    in all copies or substantial portions of the Software.
23 
24    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30    USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32 
33 #include <stddef.h>
34 #include <string.h> /* memcpy */
35 #include <stdbool.h>
36 
37 #ifdef _WIN32
38 #  include "winconfig.h"
39 #else
40 #  ifdef HAVE_EXPAT_CONFIG_H
41 #    include <expat_config.h>
42 #  endif
43 #endif /* ndef _WIN32 */
44 
45 #include "expat_external.h"
46 #include "internal.h"
47 #include "xmltok.h"
48 #include "nametab.h"
49 
50 #ifdef XML_DTD
51 #  define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
52 #else
53 #  define IGNORE_SECTION_TOK_VTABLE /* as nothing */
54 #endif
55 
56 #define VTABLE1                                                                \
57   {PREFIX(prologTok), PREFIX(contentTok),                                      \
58    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE},                         \
59       {PREFIX(attributeValueTok), PREFIX(entityValueTok)},                     \
60       PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS),             \
61       PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName),    \
62       PREFIX(updatePosition), PREFIX(isPublicId)
63 
64 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
65 
66 #define UCS2_GET_NAMING(pages, hi, lo)                                         \
67   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F)))
68 
69 /* A 2 byte UTF-8 representation splits the characters 11 bits between
70    the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
71    pages, 3 bits to add to that index and 5 bits to generate the mask.
72 */
73 #define UTF8_GET_NAMING2(pages, byte)                                          \
74   (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3)                         \
75                 + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)]         \
76    & (1u << (((byte)[1]) & 0x1F)))
77 
78 /* A 3 byte UTF-8 representation splits the characters 16 bits between
79    the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
80    into pages, 3 bits to add to that index and 5 bits to generate the
81    mask.
82 */
83 #define UTF8_GET_NAMING3(pages, byte)                                          \
84   (namingBitmap                                                                \
85        [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)]      \
86          << 3)                                                                 \
87         + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)]                 \
88    & (1u << (((byte)[2]) & 0x1F)))
89 
90 #define UTF8_GET_NAMING(pages, p, n)                                           \
91   ((n) == 2                                                                    \
92        ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p))                   \
93        : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
94 
95 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
96    of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
97    with the additional restriction of not allowing the Unicode
98    code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
99    Implementation details:
100      (A & 0x80) == 0     means A < 0x80
101    and
102      (A & 0xC0) == 0xC0  means A > 0xBF
103 */
104 
105 #define UTF8_INVALID2(p)                                                       \
106   ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
107 
108 #define UTF8_INVALID3(p)                                                       \
109   (((p)[2] & 0x80) == 0                                                        \
110    || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD                          \
111                                       : ((p)[2] & 0xC0) == 0xC0)               \
112    || ((*p) == 0xE0                                                            \
113            ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0                          \
114            : ((p)[1] & 0x80) == 0                                              \
115                  || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
116 
117 #define UTF8_INVALID4(p)                                                       \
118   (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0     \
119    || ((p)[2] & 0xC0) == 0xC0                                                  \
120    || ((*p) == 0xF0                                                            \
121            ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0                          \
122            : ((p)[1] & 0x80) == 0                                              \
123                  || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
124 
125 static int PTRFASTCALL
isNever(const ENCODING * enc,const char * p)126 isNever(const ENCODING *enc, const char *p) {
127   UNUSED_P(enc);
128   UNUSED_P(p);
129   return 0;
130 }
131 
132 static int PTRFASTCALL
utf8_isName2(const ENCODING * enc,const char * p)133 utf8_isName2(const ENCODING *enc, const char *p) {
134   UNUSED_P(enc);
135   return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
136 }
137 
138 static int PTRFASTCALL
utf8_isName3(const ENCODING * enc,const char * p)139 utf8_isName3(const ENCODING *enc, const char *p) {
140   UNUSED_P(enc);
141   return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
142 }
143 
144 #define utf8_isName4 isNever
145 
146 static int PTRFASTCALL
utf8_isNmstrt2(const ENCODING * enc,const char * p)147 utf8_isNmstrt2(const ENCODING *enc, const char *p) {
148   UNUSED_P(enc);
149   return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
150 }
151 
152 static int PTRFASTCALL
utf8_isNmstrt3(const ENCODING * enc,const char * p)153 utf8_isNmstrt3(const ENCODING *enc, const char *p) {
154   UNUSED_P(enc);
155   return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
156 }
157 
158 #define utf8_isNmstrt4 isNever
159 
160 static int PTRFASTCALL
utf8_isInvalid2(const ENCODING * enc,const char * p)161 utf8_isInvalid2(const ENCODING *enc, const char *p) {
162   UNUSED_P(enc);
163   return UTF8_INVALID2((const unsigned char *)p);
164 }
165 
166 static int PTRFASTCALL
utf8_isInvalid3(const ENCODING * enc,const char * p)167 utf8_isInvalid3(const ENCODING *enc, const char *p) {
168   UNUSED_P(enc);
169   return UTF8_INVALID3((const unsigned char *)p);
170 }
171 
172 static int PTRFASTCALL
utf8_isInvalid4(const ENCODING * enc,const char * p)173 utf8_isInvalid4(const ENCODING *enc, const char *p) {
174   UNUSED_P(enc);
175   return UTF8_INVALID4((const unsigned char *)p);
176 }
177 
178 struct normal_encoding {
179   ENCODING enc;
180   unsigned char type[256];
181 #ifdef XML_MIN_SIZE
182   int(PTRFASTCALL *byteType)(const ENCODING *, const char *);
183   int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
184   int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
185   int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
186   int(PTRCALL *charMatches)(const ENCODING *, const char *, int);
187 #endif /* XML_MIN_SIZE */
188   int(PTRFASTCALL *isName2)(const ENCODING *, const char *);
189   int(PTRFASTCALL *isName3)(const ENCODING *, const char *);
190   int(PTRFASTCALL *isName4)(const ENCODING *, const char *);
191   int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
192   int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
193   int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
194   int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
195   int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
196   int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
197 };
198 
199 #define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc))
200 
201 #ifdef XML_MIN_SIZE
202 
203 #  define STANDARD_VTABLE(E)                                                   \
204     E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches,
205 
206 #else
207 
208 #  define STANDARD_VTABLE(E) /* as nothing */
209 
210 #endif
211 
212 #define NORMAL_VTABLE(E)                                                       \
213   E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3,              \
214       E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4
215 
216 #define NULL_VTABLE                                                            \
217   /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL,                  \
218       /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL,        \
219       /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL
220 
221 static int FASTCALL checkCharRefNumber(int);
222 
223 #include "xmltok_impl.h"
224 #include "ascii.h"
225 
226 #ifdef XML_MIN_SIZE
227 #  define sb_isNameMin isNever
228 #  define sb_isNmstrtMin isNever
229 #endif
230 
231 #ifdef XML_MIN_SIZE
232 #  define MINBPC(enc) ((enc)->minBytesPerChar)
233 #else
234 /* minimum bytes per character */
235 #  define MINBPC(enc) 1
236 #endif
237 
238 #define SB_BYTE_TYPE(enc, p)                                                   \
239   (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
240 
241 #ifdef XML_MIN_SIZE
242 static int PTRFASTCALL
sb_byteType(const ENCODING * enc,const char * p)243 sb_byteType(const ENCODING *enc, const char *p) {
244   return SB_BYTE_TYPE(enc, p);
245 }
246 #  define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
247 #else
248 #  define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
249 #endif
250 
251 #ifdef XML_MIN_SIZE
252 #  define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
253 static int PTRFASTCALL
sb_byteToAscii(const ENCODING * enc,const char * p)254 sb_byteToAscii(const ENCODING *enc, const char *p) {
255   UNUSED_P(enc);
256   return *p;
257 }
258 #else
259 #  define BYTE_TO_ASCII(enc, p) (*(p))
260 #endif
261 
262 #define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p))
263 #define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p))
264 #define IS_INVALID_CHAR(enc, p, n)                                             \
265   (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
266 
267 #ifdef XML_MIN_SIZE
268 #  define IS_NAME_CHAR_MINBPC(enc, p)                                          \
269     (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
270 #  define IS_NMSTRT_CHAR_MINBPC(enc, p)                                        \
271     (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
272 #else
273 #  define IS_NAME_CHAR_MINBPC(enc, p) (0)
274 #  define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
275 #endif
276 
277 #ifdef XML_MIN_SIZE
278 #  define CHAR_MATCHES(enc, p, c)                                              \
279     (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
280 static int PTRCALL
sb_charMatches(const ENCODING * enc,const char * p,int c)281 sb_charMatches(const ENCODING *enc, const char *p, int c) {
282   UNUSED_P(enc);
283   return *p == c;
284 }
285 #else
286 /* c is an ASCII character */
287 #  define CHAR_MATCHES(enc, p, c) (*(p) == c)
288 #endif
289 
290 #define PREFIX(ident) normal_##ident
291 #define XML_TOK_IMPL_C
292 #include "xmltok_impl.c"
293 #undef XML_TOK_IMPL_C
294 
295 #undef MINBPC
296 #undef BYTE_TYPE
297 #undef BYTE_TO_ASCII
298 #undef CHAR_MATCHES
299 #undef IS_NAME_CHAR
300 #undef IS_NAME_CHAR_MINBPC
301 #undef IS_NMSTRT_CHAR
302 #undef IS_NMSTRT_CHAR_MINBPC
303 #undef IS_INVALID_CHAR
304 
305 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
306        UTF8_cval1 = 0x00,
307        UTF8_cval2 = 0xc0,
308        UTF8_cval3 = 0xe0,
309        UTF8_cval4 = 0xf0
310 };
311 
312 void
_INTERNAL_trim_to_complete_utf8_characters(const char * from,const char ** fromLimRef)313 _INTERNAL_trim_to_complete_utf8_characters(const char *from,
314                                            const char **fromLimRef) {
315   const char *fromLim = *fromLimRef;
316   size_t walked = 0;
317   for (; fromLim > from; fromLim--, walked++) {
318     const unsigned char prev = (unsigned char)fromLim[-1];
319     if ((prev & 0xf8u)
320         == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
321       if (walked + 1 >= 4) {
322         fromLim += 4 - 1;
323         break;
324       } else {
325         walked = 0;
326       }
327     } else if ((prev & 0xf0u)
328                == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
329       if (walked + 1 >= 3) {
330         fromLim += 3 - 1;
331         break;
332       } else {
333         walked = 0;
334       }
335     } else if ((prev & 0xe0u)
336                == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
337       if (walked + 1 >= 2) {
338         fromLim += 2 - 1;
339         break;
340       } else {
341         walked = 0;
342       }
343     } else if ((prev & 0x80u)
344                == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
345       break;
346     }
347   }
348   *fromLimRef = fromLim;
349 }
350 
351 static enum XML_Convert_Result PTRCALL
utf8_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)352 utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
353             char **toP, const char *toLim) {
354   bool input_incomplete = false;
355   bool output_exhausted = false;
356 
357   /* Avoid copying partial characters (due to limited space). */
358   const ptrdiff_t bytesAvailable = fromLim - *fromP;
359   const ptrdiff_t bytesStorable = toLim - *toP;
360   UNUSED_P(enc);
361   if (bytesAvailable > bytesStorable) {
362     fromLim = *fromP + bytesStorable;
363     output_exhausted = true;
364   }
365 
366   /* Avoid copying partial characters (from incomplete input). */
367   {
368     const char *const fromLimBefore = fromLim;
369     _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
370     if (fromLim < fromLimBefore) {
371       input_incomplete = true;
372     }
373   }
374 
375   {
376     const ptrdiff_t bytesToCopy = fromLim - *fromP;
377     memcpy(*toP, *fromP, bytesToCopy);
378     *fromP += bytesToCopy;
379     *toP += bytesToCopy;
380   }
381 
382   if (output_exhausted) /* needs to go first */
383     return XML_CONVERT_OUTPUT_EXHAUSTED;
384   else if (input_incomplete)
385     return XML_CONVERT_INPUT_INCOMPLETE;
386   else
387     return XML_CONVERT_COMPLETED;
388 }
389 
390 static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING * enc,const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)391 utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
392              unsigned short **toP, const unsigned short *toLim) {
393   enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
394   unsigned short *to = *toP;
395   const char *from = *fromP;
396   while (from < fromLim && to < toLim) {
397     switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
398     case BT_LEAD2:
399       if (fromLim - from < 2) {
400         res = XML_CONVERT_INPUT_INCOMPLETE;
401         goto after;
402       }
403       *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
404       from += 2;
405       break;
406     case BT_LEAD3:
407       if (fromLim - from < 3) {
408         res = XML_CONVERT_INPUT_INCOMPLETE;
409         goto after;
410       }
411       *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6)
412                                | (from[2] & 0x3f));
413       from += 3;
414       break;
415     case BT_LEAD4: {
416       unsigned long n;
417       if (toLim - to < 2) {
418         res = XML_CONVERT_OUTPUT_EXHAUSTED;
419         goto after;
420       }
421       if (fromLim - from < 4) {
422         res = XML_CONVERT_INPUT_INCOMPLETE;
423         goto after;
424       }
425       n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
426           | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
427       n -= 0x10000;
428       to[0] = (unsigned short)((n >> 10) | 0xD800);
429       to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
430       to += 2;
431       from += 4;
432     } break;
433     default:
434       *to++ = *from++;
435       break;
436     }
437   }
438   if (from < fromLim)
439     res = XML_CONVERT_OUTPUT_EXHAUSTED;
440 after:
441   *fromP = from;
442   *toP = to;
443   return res;
444 }
445 
446 #ifdef XML_NS
447 static const struct normal_encoding utf8_encoding_ns
448     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
449        {
450 #  include "asciitab.h"
451 #  include "utf8tab.h"
452        },
453        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
454 #endif
455 
456 static const struct normal_encoding utf8_encoding
457     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
458        {
459 #define BT_COLON BT_NMSTRT
460 #include "asciitab.h"
461 #undef BT_COLON
462 #include "utf8tab.h"
463        },
464        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
465 
466 #ifdef XML_NS
467 
468 static const struct normal_encoding internal_utf8_encoding_ns
469     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
470        {
471 #  include "iasciitab.h"
472 #  include "utf8tab.h"
473        },
474        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
475 
476 #endif
477 
478 static const struct normal_encoding internal_utf8_encoding
479     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
480        {
481 #define BT_COLON BT_NMSTRT
482 #include "iasciitab.h"
483 #undef BT_COLON
484 #include "utf8tab.h"
485        },
486        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
487 
488 static enum XML_Convert_Result PTRCALL
latin1_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)489 latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
490               char **toP, const char *toLim) {
491   UNUSED_P(enc);
492   for (;;) {
493     unsigned char c;
494     if (*fromP == fromLim)
495       return XML_CONVERT_COMPLETED;
496     c = (unsigned char)**fromP;
497     if (c & 0x80) {
498       if (toLim - *toP < 2)
499         return XML_CONVERT_OUTPUT_EXHAUSTED;
500       *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
501       *(*toP)++ = (char)((c & 0x3f) | 0x80);
502       (*fromP)++;
503     } else {
504       if (*toP == toLim)
505         return XML_CONVERT_OUTPUT_EXHAUSTED;
506       *(*toP)++ = *(*fromP)++;
507     }
508   }
509 }
510 
511 static enum XML_Convert_Result PTRCALL
latin1_toUtf16(const ENCODING * enc,const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)512 latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
513                unsigned short **toP, const unsigned short *toLim) {
514   UNUSED_P(enc);
515   while (*fromP < fromLim && *toP < toLim)
516     *(*toP)++ = (unsigned char)*(*fromP)++;
517 
518   if ((*toP == toLim) && (*fromP < fromLim))
519     return XML_CONVERT_OUTPUT_EXHAUSTED;
520   else
521     return XML_CONVERT_COMPLETED;
522 }
523 
524 #ifdef XML_NS
525 
526 static const struct normal_encoding latin1_encoding_ns
527     = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
528        {
529 #  include "asciitab.h"
530 #  include "latin1tab.h"
531        },
532        STANDARD_VTABLE(sb_) NULL_VTABLE};
533 
534 #endif
535 
536 static const struct normal_encoding latin1_encoding
537     = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
538        {
539 #define BT_COLON BT_NMSTRT
540 #include "asciitab.h"
541 #undef BT_COLON
542 #include "latin1tab.h"
543        },
544        STANDARD_VTABLE(sb_) NULL_VTABLE};
545 
546 static enum XML_Convert_Result PTRCALL
ascii_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)547 ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
548              char **toP, const char *toLim) {
549   UNUSED_P(enc);
550   while (*fromP < fromLim && *toP < toLim)
551     *(*toP)++ = *(*fromP)++;
552 
553   if ((*toP == toLim) && (*fromP < fromLim))
554     return XML_CONVERT_OUTPUT_EXHAUSTED;
555   else
556     return XML_CONVERT_COMPLETED;
557 }
558 
559 #ifdef XML_NS
560 
561 static const struct normal_encoding ascii_encoding_ns
562     = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
563        {
564 #  include "asciitab.h"
565            /* BT_NONXML == 0 */
566        },
567        STANDARD_VTABLE(sb_) NULL_VTABLE};
568 
569 #endif
570 
571 static const struct normal_encoding ascii_encoding
572     = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
573        {
574 #define BT_COLON BT_NMSTRT
575 #include "asciitab.h"
576 #undef BT_COLON
577            /* BT_NONXML == 0 */
578        },
579        STANDARD_VTABLE(sb_) NULL_VTABLE};
580 
581 static int PTRFASTCALL
unicode_byte_type(char hi,char lo)582 unicode_byte_type(char hi, char lo) {
583   switch ((unsigned char)hi) {
584   /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */
585   case 0xD8:
586   case 0xD9:
587   case 0xDA:
588   case 0xDB:
589     return BT_LEAD4;
590   /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */
591   case 0xDC:
592   case 0xDD:
593   case 0xDE:
594   case 0xDF:
595     return BT_TRAIL;
596   case 0xFF:
597     switch ((unsigned char)lo) {
598     case 0xFF: /* noncharacter-FFFF */
599     case 0xFE: /* noncharacter-FFFE */
600       return BT_NONXML;
601     }
602     break;
603   }
604   return BT_NONASCII;
605 }
606 
607 #define DEFINE_UTF16_TO_UTF8(E)                                                \
608   static enum XML_Convert_Result PTRCALL E##toUtf8(                            \
609       const ENCODING *enc, const char **fromP, const char *fromLim,            \
610       char **toP, const char *toLim) {                                         \
611     const char *from = *fromP;                                                 \
612     UNUSED_P(enc);                                                             \
613     fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */      \
614     for (; from < fromLim; from += 2) {                                        \
615       int plane;                                                               \
616       unsigned char lo2;                                                       \
617       unsigned char lo = GET_LO(from);                                         \
618       unsigned char hi = GET_HI(from);                                         \
619       switch (hi) {                                                            \
620       case 0:                                                                  \
621         if (lo < 0x80) {                                                       \
622           if (*toP == toLim) {                                                 \
623             *fromP = from;                                                     \
624             return XML_CONVERT_OUTPUT_EXHAUSTED;                               \
625           }                                                                    \
626           *(*toP)++ = lo;                                                      \
627           break;                                                               \
628         }                                                                      \
629         /* fall through */                                                     \
630       case 0x1:                                                                \
631       case 0x2:                                                                \
632       case 0x3:                                                                \
633       case 0x4:                                                                \
634       case 0x5:                                                                \
635       case 0x6:                                                                \
636       case 0x7:                                                                \
637         if (toLim - *toP < 2) {                                                \
638           *fromP = from;                                                       \
639           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
640         }                                                                      \
641         *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2);                      \
642         *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \
643         break;                                                                 \
644       default:                                                                 \
645         if (toLim - *toP < 3) {                                                \
646           *fromP = from;                                                       \
647           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
648         }                                                                      \
649         /* 16 bits divided 4, 6, 6 amongst 3 bytes */                          \
650         *(*toP)++ = ((hi >> 4) | UTF8_cval3);                                  \
651         *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80);                    \
652         *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \
653         break;                                                                 \
654       case 0xD8:                                                               \
655       case 0xD9:                                                               \
656       case 0xDA:                                                               \
657       case 0xDB:                                                               \
658         if (toLim - *toP < 4) {                                                \
659           *fromP = from;                                                       \
660           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
661         }                                                                      \
662         if (fromLim - from < 4) {                                              \
663           *fromP = from;                                                       \
664           return XML_CONVERT_INPUT_INCOMPLETE;                                 \
665         }                                                                      \
666         plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1;                   \
667         *(*toP)++ = (char)((plane >> 2) | UTF8_cval4);                         \
668         *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80);         \
669         from += 2;                                                             \
670         lo2 = GET_LO(from);                                                    \
671         *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2)           \
672                      | (lo2 >> 6) | 0x80);                                     \
673         *(*toP)++ = ((lo2 & 0x3f) | 0x80);                                     \
674         break;                                                                 \
675       }                                                                        \
676     }                                                                          \
677     *fromP = from;                                                             \
678     if (from < fromLim)                                                        \
679       return XML_CONVERT_INPUT_INCOMPLETE;                                     \
680     else                                                                       \
681       return XML_CONVERT_COMPLETED;                                            \
682   }
683 
684 #define DEFINE_UTF16_TO_UTF16(E)                                               \
685   static enum XML_Convert_Result PTRCALL E##toUtf16(                           \
686       const ENCODING *enc, const char **fromP, const char *fromLim,            \
687       unsigned short **toP, const unsigned short *toLim) {                     \
688     enum XML_Convert_Result res = XML_CONVERT_COMPLETED;                       \
689     UNUSED_P(enc);                                                             \
690     fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */  \
691     /* Avoid copying first half only of surrogate */                           \
692     if (fromLim - *fromP > ((toLim - *toP) << 1)                               \
693         && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) {                             \
694       fromLim -= 2;                                                            \
695       res = XML_CONVERT_INPUT_INCOMPLETE;                                      \
696     }                                                                          \
697     for (; *fromP < fromLim && *toP < toLim; *fromP += 2)                      \
698       *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP);                      \
699     if ((*toP == toLim) && (*fromP < fromLim))                                 \
700       return XML_CONVERT_OUTPUT_EXHAUSTED;                                     \
701     else                                                                       \
702       return res;                                                              \
703   }
704 
705 #define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8)))
706 #define GET_LO(ptr) ((unsigned char)(ptr)[0])
707 #define GET_HI(ptr) ((unsigned char)(ptr)[1])
708 
709 DEFINE_UTF16_TO_UTF8(little2_)
DEFINE_UTF16_TO_UTF16(little2_)710 DEFINE_UTF16_TO_UTF16(little2_)
711 
712 #undef SET2
713 #undef GET_LO
714 #undef GET_HI
715 
716 #define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF)))
717 #define GET_LO(ptr) ((unsigned char)(ptr)[1])
718 #define GET_HI(ptr) ((unsigned char)(ptr)[0])
719 
720 DEFINE_UTF16_TO_UTF8(big2_)
721 DEFINE_UTF16_TO_UTF16(big2_)
722 
723 #undef SET2
724 #undef GET_LO
725 #undef GET_HI
726 
727 #define LITTLE2_BYTE_TYPE(enc, p)                                              \
728   ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]  \
729                : unicode_byte_type((p)[1], (p)[0]))
730 #define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1)
731 #define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == c)
732 #define LITTLE2_IS_NAME_CHAR_MINBPC(p)                                         \
733   UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
734 #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)                                       \
735   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
736 
737 #ifdef XML_MIN_SIZE
738 
739 static int PTRFASTCALL
740 little2_byteType(const ENCODING *enc, const char *p) {
741   return LITTLE2_BYTE_TYPE(enc, p);
742 }
743 
744 static int PTRFASTCALL
little2_byteToAscii(const ENCODING * enc,const char * p)745 little2_byteToAscii(const ENCODING *enc, const char *p) {
746   UNUSED_P(enc);
747   return LITTLE2_BYTE_TO_ASCII(p);
748 }
749 
750 static int PTRCALL
little2_charMatches(const ENCODING * enc,const char * p,int c)751 little2_charMatches(const ENCODING *enc, const char *p, int c) {
752   UNUSED_P(enc);
753   return LITTLE2_CHAR_MATCHES(p, c);
754 }
755 
756 static int PTRFASTCALL
little2_isNameMin(const ENCODING * enc,const char * p)757 little2_isNameMin(const ENCODING *enc, const char *p) {
758   UNUSED_P(enc);
759   return LITTLE2_IS_NAME_CHAR_MINBPC(p);
760 }
761 
762 static int PTRFASTCALL
little2_isNmstrtMin(const ENCODING * enc,const char * p)763 little2_isNmstrtMin(const ENCODING *enc, const char *p) {
764   UNUSED_P(enc);
765   return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p);
766 }
767 
768 #  undef VTABLE
769 #  define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
770 
771 #else /* not XML_MIN_SIZE */
772 
773 #  undef PREFIX
774 #  define PREFIX(ident) little2_##ident
775 #  define MINBPC(enc) 2
776 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
777 #  define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
778 #  define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p)
779 #  define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c)
780 #  define IS_NAME_CHAR(enc, p, n) 0
781 #  define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p)
782 #  define IS_NMSTRT_CHAR(enc, p, n) (0)
783 #  define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)
784 
785 #  define XML_TOK_IMPL_C
786 #  include "xmltok_impl.c"
787 #  undef XML_TOK_IMPL_C
788 
789 #  undef MINBPC
790 #  undef BYTE_TYPE
791 #  undef BYTE_TO_ASCII
792 #  undef CHAR_MATCHES
793 #  undef IS_NAME_CHAR
794 #  undef IS_NAME_CHAR_MINBPC
795 #  undef IS_NMSTRT_CHAR
796 #  undef IS_NMSTRT_CHAR_MINBPC
797 #  undef IS_INVALID_CHAR
798 
799 #endif /* not XML_MIN_SIZE */
800 
801 #ifdef XML_NS
802 
803 static const struct normal_encoding little2_encoding_ns
804     = {{VTABLE, 2, 0,
805 #  if BYTEORDER == 1234
806         1
807 #  else
808         0
809 #  endif
810        },
811        {
812 #  include "asciitab.h"
813 #  include "latin1tab.h"
814        },
815        STANDARD_VTABLE(little2_) NULL_VTABLE};
816 
817 #endif
818 
819 static const struct normal_encoding little2_encoding
820     = {{VTABLE, 2, 0,
821 #if BYTEORDER == 1234
822         1
823 #else
824         0
825 #endif
826        },
827        {
828 #define BT_COLON BT_NMSTRT
829 #include "asciitab.h"
830 #undef BT_COLON
831 #include "latin1tab.h"
832        },
833        STANDARD_VTABLE(little2_) NULL_VTABLE};
834 
835 #if BYTEORDER != 4321
836 
837 #  ifdef XML_NS
838 
839 static const struct normal_encoding internal_little2_encoding_ns
840     = {{VTABLE, 2, 0, 1},
841        {
842 #    include "iasciitab.h"
843 #    include "latin1tab.h"
844        },
845        STANDARD_VTABLE(little2_) NULL_VTABLE};
846 
847 #  endif
848 
849 static const struct normal_encoding internal_little2_encoding
850     = {{VTABLE, 2, 0, 1},
851        {
852 #  define BT_COLON BT_NMSTRT
853 #  include "iasciitab.h"
854 #  undef BT_COLON
855 #  include "latin1tab.h"
856        },
857        STANDARD_VTABLE(little2_) NULL_VTABLE};
858 
859 #endif
860 
861 #define BIG2_BYTE_TYPE(enc, p)                                                 \
862   ((p)[0] == 0                                                                 \
863        ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]]        \
864        : unicode_byte_type((p)[0], (p)[1]))
865 #define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
866 #define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == c)
867 #define BIG2_IS_NAME_CHAR_MINBPC(p)                                            \
868   UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
869 #define BIG2_IS_NMSTRT_CHAR_MINBPC(p)                                          \
870   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
871 
872 #ifdef XML_MIN_SIZE
873 
874 static int PTRFASTCALL
big2_byteType(const ENCODING * enc,const char * p)875 big2_byteType(const ENCODING *enc, const char *p) {
876   return BIG2_BYTE_TYPE(enc, p);
877 }
878 
879 static int PTRFASTCALL
big2_byteToAscii(const ENCODING * enc,const char * p)880 big2_byteToAscii(const ENCODING *enc, const char *p) {
881   UNUSED_P(enc);
882   return BIG2_BYTE_TO_ASCII(p);
883 }
884 
885 static int PTRCALL
big2_charMatches(const ENCODING * enc,const char * p,int c)886 big2_charMatches(const ENCODING *enc, const char *p, int c) {
887   UNUSED_P(enc);
888   return BIG2_CHAR_MATCHES(p, c);
889 }
890 
891 static int PTRFASTCALL
big2_isNameMin(const ENCODING * enc,const char * p)892 big2_isNameMin(const ENCODING *enc, const char *p) {
893   UNUSED_P(enc);
894   return BIG2_IS_NAME_CHAR_MINBPC(p);
895 }
896 
897 static int PTRFASTCALL
big2_isNmstrtMin(const ENCODING * enc,const char * p)898 big2_isNmstrtMin(const ENCODING *enc, const char *p) {
899   UNUSED_P(enc);
900   return BIG2_IS_NMSTRT_CHAR_MINBPC(p);
901 }
902 
903 #  undef VTABLE
904 #  define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
905 
906 #else /* not XML_MIN_SIZE */
907 
908 #  undef PREFIX
909 #  define PREFIX(ident) big2_##ident
910 #  define MINBPC(enc) 2
911 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
912 #  define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
913 #  define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p)
914 #  define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c)
915 #  define IS_NAME_CHAR(enc, p, n) 0
916 #  define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p)
917 #  define IS_NMSTRT_CHAR(enc, p, n) (0)
918 #  define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p)
919 
920 #  define XML_TOK_IMPL_C
921 #  include "xmltok_impl.c"
922 #  undef XML_TOK_IMPL_C
923 
924 #  undef MINBPC
925 #  undef BYTE_TYPE
926 #  undef BYTE_TO_ASCII
927 #  undef CHAR_MATCHES
928 #  undef IS_NAME_CHAR
929 #  undef IS_NAME_CHAR_MINBPC
930 #  undef IS_NMSTRT_CHAR
931 #  undef IS_NMSTRT_CHAR_MINBPC
932 #  undef IS_INVALID_CHAR
933 
934 #endif /* not XML_MIN_SIZE */
935 
936 #ifdef XML_NS
937 
938 static const struct normal_encoding big2_encoding_ns
939     = {{VTABLE, 2, 0,
940 #  if BYTEORDER == 4321
941         1
942 #  else
943         0
944 #  endif
945        },
946        {
947 #  include "asciitab.h"
948 #  include "latin1tab.h"
949        },
950        STANDARD_VTABLE(big2_) NULL_VTABLE};
951 
952 #endif
953 
954 static const struct normal_encoding big2_encoding
955     = {{VTABLE, 2, 0,
956 #if BYTEORDER == 4321
957         1
958 #else
959         0
960 #endif
961        },
962        {
963 #define BT_COLON BT_NMSTRT
964 #include "asciitab.h"
965 #undef BT_COLON
966 #include "latin1tab.h"
967        },
968        STANDARD_VTABLE(big2_) NULL_VTABLE};
969 
970 #if BYTEORDER != 1234
971 
972 #  ifdef XML_NS
973 
974 static const struct normal_encoding internal_big2_encoding_ns
975     = {{VTABLE, 2, 0, 1},
976        {
977 #    include "iasciitab.h"
978 #    include "latin1tab.h"
979        },
980        STANDARD_VTABLE(big2_) NULL_VTABLE};
981 
982 #  endif
983 
984 static const struct normal_encoding internal_big2_encoding
985     = {{VTABLE, 2, 0, 1},
986        {
987 #  define BT_COLON BT_NMSTRT
988 #  include "iasciitab.h"
989 #  undef BT_COLON
990 #  include "latin1tab.h"
991        },
992        STANDARD_VTABLE(big2_) NULL_VTABLE};
993 
994 #endif
995 
996 #undef PREFIX
997 
998 static int FASTCALL
streqci(const char * s1,const char * s2)999 streqci(const char *s1, const char *s2) {
1000   for (;;) {
1001     char c1 = *s1++;
1002     char c2 = *s2++;
1003     if (ASCII_a <= c1 && c1 <= ASCII_z)
1004       c1 += ASCII_A - ASCII_a;
1005     if (ASCII_a <= c2 && c2 <= ASCII_z)
1006       /* The following line will never get executed.  streqci() is
1007        * only called from two places, both of which guarantee to put
1008        * upper-case strings into s2.
1009        */
1010       c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
1011     if (c1 != c2)
1012       return 0;
1013     if (! c1)
1014       break;
1015   }
1016   return 1;
1017 }
1018 
1019 static void PTRCALL
initUpdatePosition(const ENCODING * enc,const char * ptr,const char * end,POSITION * pos)1020 initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end,
1021                    POSITION *pos) {
1022   UNUSED_P(enc);
1023   normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
1024 }
1025 
1026 static int
toAscii(const ENCODING * enc,const char * ptr,const char * end)1027 toAscii(const ENCODING *enc, const char *ptr, const char *end) {
1028   char buf[1];
1029   char *p = buf;
1030   XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
1031   if (p == buf)
1032     return -1;
1033   else
1034     return buf[0];
1035 }
1036 
1037 static int FASTCALL
isSpace(int c)1038 isSpace(int c) {
1039   switch (c) {
1040   case 0x20:
1041   case 0xD:
1042   case 0xA:
1043   case 0x9:
1044     return 1;
1045   }
1046   return 0;
1047 }
1048 
1049 /* Return 1 if there's just optional white space or there's an S
1050    followed by name=val.
1051 */
1052 static int
parsePseudoAttribute(const ENCODING * enc,const char * ptr,const char * end,const char ** namePtr,const char ** nameEndPtr,const char ** valPtr,const char ** nextTokPtr)1053 parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end,
1054                      const char **namePtr, const char **nameEndPtr,
1055                      const char **valPtr, const char **nextTokPtr) {
1056   int c;
1057   char open;
1058   if (ptr == end) {
1059     *namePtr = NULL;
1060     return 1;
1061   }
1062   if (! isSpace(toAscii(enc, ptr, end))) {
1063     *nextTokPtr = ptr;
1064     return 0;
1065   }
1066   do {
1067     ptr += enc->minBytesPerChar;
1068   } while (isSpace(toAscii(enc, ptr, end)));
1069   if (ptr == end) {
1070     *namePtr = NULL;
1071     return 1;
1072   }
1073   *namePtr = ptr;
1074   for (;;) {
1075     c = toAscii(enc, ptr, end);
1076     if (c == -1) {
1077       *nextTokPtr = ptr;
1078       return 0;
1079     }
1080     if (c == ASCII_EQUALS) {
1081       *nameEndPtr = ptr;
1082       break;
1083     }
1084     if (isSpace(c)) {
1085       *nameEndPtr = ptr;
1086       do {
1087         ptr += enc->minBytesPerChar;
1088       } while (isSpace(c = toAscii(enc, ptr, end)));
1089       if (c != ASCII_EQUALS) {
1090         *nextTokPtr = ptr;
1091         return 0;
1092       }
1093       break;
1094     }
1095     ptr += enc->minBytesPerChar;
1096   }
1097   if (ptr == *namePtr) {
1098     *nextTokPtr = ptr;
1099     return 0;
1100   }
1101   ptr += enc->minBytesPerChar;
1102   c = toAscii(enc, ptr, end);
1103   while (isSpace(c)) {
1104     ptr += enc->minBytesPerChar;
1105     c = toAscii(enc, ptr, end);
1106   }
1107   if (c != ASCII_QUOT && c != ASCII_APOS) {
1108     *nextTokPtr = ptr;
1109     return 0;
1110   }
1111   open = (char)c;
1112   ptr += enc->minBytesPerChar;
1113   *valPtr = ptr;
1114   for (;; ptr += enc->minBytesPerChar) {
1115     c = toAscii(enc, ptr, end);
1116     if (c == open)
1117       break;
1118     if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)
1119         && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD
1120         && c != ASCII_MINUS && c != ASCII_UNDERSCORE) {
1121       *nextTokPtr = ptr;
1122       return 0;
1123     }
1124   }
1125   *nextTokPtr = ptr + enc->minBytesPerChar;
1126   return 1;
1127 }
1128 
1129 static const char KW_version[]
1130     = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'};
1131 
1132 static const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d,
1133                                    ASCII_i, ASCII_n, ASCII_g, '\0'};
1134 
1135 static const char KW_standalone[]
1136     = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a,
1137        ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'};
1138 
1139 static const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'};
1140 
1141 static const char KW_no[] = {ASCII_n, ASCII_o, '\0'};
1142 
1143 static int
doParseXmlDecl(const ENCODING * (* encodingFinder)(const ENCODING *,const char *,const char *),int isGeneralTextEntity,const ENCODING * enc,const char * ptr,const char * end,const char ** badPtr,const char ** versionPtr,const char ** versionEndPtr,const char ** encodingName,const ENCODING ** encoding,int * standalone)1144 doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *,
1145                                                  const char *),
1146                int isGeneralTextEntity, const ENCODING *enc, const char *ptr,
1147                const char *end, const char **badPtr, const char **versionPtr,
1148                const char **versionEndPtr, const char **encodingName,
1149                const ENCODING **encoding, int *standalone) {
1150   const char *val = NULL;
1151   const char *name = NULL;
1152   const char *nameEnd = NULL;
1153   ptr += 5 * enc->minBytesPerChar;
1154   end -= 2 * enc->minBytesPerChar;
1155   if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
1156       || ! name) {
1157     *badPtr = ptr;
1158     return 0;
1159   }
1160   if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
1161     if (! isGeneralTextEntity) {
1162       *badPtr = name;
1163       return 0;
1164     }
1165   } else {
1166     if (versionPtr)
1167       *versionPtr = val;
1168     if (versionEndPtr)
1169       *versionEndPtr = ptr;
1170     if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1171       *badPtr = ptr;
1172       return 0;
1173     }
1174     if (! name) {
1175       if (isGeneralTextEntity) {
1176         /* a TextDecl must have an EncodingDecl */
1177         *badPtr = ptr;
1178         return 0;
1179       }
1180       return 1;
1181     }
1182   }
1183   if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
1184     int c = toAscii(enc, val, end);
1185     if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) {
1186       *badPtr = val;
1187       return 0;
1188     }
1189     if (encodingName)
1190       *encodingName = val;
1191     if (encoding)
1192       *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
1193     if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1194       *badPtr = ptr;
1195       return 0;
1196     }
1197     if (! name)
1198       return 1;
1199   }
1200   if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
1201       || isGeneralTextEntity) {
1202     *badPtr = name;
1203     return 0;
1204   }
1205   if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
1206     if (standalone)
1207       *standalone = 1;
1208   } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
1209     if (standalone)
1210       *standalone = 0;
1211   } else {
1212     *badPtr = val;
1213     return 0;
1214   }
1215   while (isSpace(toAscii(enc, ptr, end)))
1216     ptr += enc->minBytesPerChar;
1217   if (ptr != end) {
1218     *badPtr = ptr;
1219     return 0;
1220   }
1221   return 1;
1222 }
1223 
1224 static int FASTCALL
checkCharRefNumber(int result)1225 checkCharRefNumber(int result) {
1226   switch (result >> 8) {
1227   case 0xD8:
1228   case 0xD9:
1229   case 0xDA:
1230   case 0xDB:
1231   case 0xDC:
1232   case 0xDD:
1233   case 0xDE:
1234   case 0xDF:
1235     return -1;
1236   case 0:
1237     if (latin1_encoding.type[result] == BT_NONXML)
1238       return -1;
1239     break;
1240   case 0xFF:
1241     if (result == 0xFFFE || result == 0xFFFF)
1242       return -1;
1243     break;
1244   }
1245   return result;
1246 }
1247 
1248 int FASTCALL
XmlUtf8Encode(int c,char * buf)1249 XmlUtf8Encode(int c, char *buf) {
1250   enum {
1251     /* minN is minimum legal resulting value for N byte sequence */
1252     min2 = 0x80,
1253     min3 = 0x800,
1254     min4 = 0x10000
1255   };
1256 
1257   if (c < 0)
1258     return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
1259   if (c < min2) {
1260     buf[0] = (char)(c | UTF8_cval1);
1261     return 1;
1262   }
1263   if (c < min3) {
1264     buf[0] = (char)((c >> 6) | UTF8_cval2);
1265     buf[1] = (char)((c & 0x3f) | 0x80);
1266     return 2;
1267   }
1268   if (c < min4) {
1269     buf[0] = (char)((c >> 12) | UTF8_cval3);
1270     buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
1271     buf[2] = (char)((c & 0x3f) | 0x80);
1272     return 3;
1273   }
1274   if (c < 0x110000) {
1275     buf[0] = (char)((c >> 18) | UTF8_cval4);
1276     buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
1277     buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
1278     buf[3] = (char)((c & 0x3f) | 0x80);
1279     return 4;
1280   }
1281   return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
1282 }
1283 
1284 int FASTCALL
XmlUtf16Encode(int charNum,unsigned short * buf)1285 XmlUtf16Encode(int charNum, unsigned short *buf) {
1286   if (charNum < 0)
1287     return 0;
1288   if (charNum < 0x10000) {
1289     buf[0] = (unsigned short)charNum;
1290     return 1;
1291   }
1292   if (charNum < 0x110000) {
1293     charNum -= 0x10000;
1294     buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
1295     buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
1296     return 2;
1297   }
1298   return 0;
1299 }
1300 
1301 struct unknown_encoding {
1302   struct normal_encoding normal;
1303   CONVERTER convert;
1304   void *userData;
1305   unsigned short utf16[256];
1306   char utf8[256][4];
1307 };
1308 
1309 #define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc))
1310 
1311 int
XmlSizeOfUnknownEncoding(void)1312 XmlSizeOfUnknownEncoding(void) {
1313   return sizeof(struct unknown_encoding);
1314 }
1315 
1316 static int PTRFASTCALL
unknown_isName(const ENCODING * enc,const char * p)1317 unknown_isName(const ENCODING *enc, const char *p) {
1318   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1319   int c = uenc->convert(uenc->userData, p);
1320   if (c & ~0xFFFF)
1321     return 0;
1322   return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
1323 }
1324 
1325 static int PTRFASTCALL
unknown_isNmstrt(const ENCODING * enc,const char * p)1326 unknown_isNmstrt(const ENCODING *enc, const char *p) {
1327   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1328   int c = uenc->convert(uenc->userData, p);
1329   if (c & ~0xFFFF)
1330     return 0;
1331   return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
1332 }
1333 
1334 static int PTRFASTCALL
unknown_isInvalid(const ENCODING * enc,const char * p)1335 unknown_isInvalid(const ENCODING *enc, const char *p) {
1336   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1337   int c = uenc->convert(uenc->userData, p);
1338   return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
1339 }
1340 
1341 static enum XML_Convert_Result PTRCALL
unknown_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)1342 unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
1343                char **toP, const char *toLim) {
1344   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1345   char buf[XML_UTF8_ENCODE_MAX];
1346   for (;;) {
1347     const char *utf8;
1348     int n;
1349     if (*fromP == fromLim)
1350       return XML_CONVERT_COMPLETED;
1351     utf8 = uenc->utf8[(unsigned char)**fromP];
1352     n = *utf8++;
1353     if (n == 0) {
1354       int c = uenc->convert(uenc->userData, *fromP);
1355       n = XmlUtf8Encode(c, buf);
1356       if (n > toLim - *toP)
1357         return XML_CONVERT_OUTPUT_EXHAUSTED;
1358       utf8 = buf;
1359       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1360                  - (BT_LEAD2 - 2));
1361     } else {
1362       if (n > toLim - *toP)
1363         return XML_CONVERT_OUTPUT_EXHAUSTED;
1364       (*fromP)++;
1365     }
1366     memcpy(*toP, utf8, n);
1367     *toP += n;
1368   }
1369 }
1370 
1371 static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING * enc,const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)1372 unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
1373                 unsigned short **toP, const unsigned short *toLim) {
1374   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1375   while (*fromP < fromLim && *toP < toLim) {
1376     unsigned short c = uenc->utf16[(unsigned char)**fromP];
1377     if (c == 0) {
1378       c = (unsigned short)uenc->convert(uenc->userData, *fromP);
1379       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1380                  - (BT_LEAD2 - 2));
1381     } else
1382       (*fromP)++;
1383     *(*toP)++ = c;
1384   }
1385 
1386   if ((*toP == toLim) && (*fromP < fromLim))
1387     return XML_CONVERT_OUTPUT_EXHAUSTED;
1388   else
1389     return XML_CONVERT_COMPLETED;
1390 }
1391 
1392 ENCODING *
XmlInitUnknownEncoding(void * mem,int * table,CONVERTER convert,void * userData)1393 XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert,
1394                        void *userData) {
1395   int i;
1396   struct unknown_encoding *e = (struct unknown_encoding *)mem;
1397   memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding));
1398   for (i = 0; i < 128; i++)
1399     if (latin1_encoding.type[i] != BT_OTHER
1400         && latin1_encoding.type[i] != BT_NONXML && table[i] != i)
1401       return 0;
1402   for (i = 0; i < 256; i++) {
1403     int c = table[i];
1404     if (c == -1) {
1405       e->normal.type[i] = BT_MALFORM;
1406       /* This shouldn't really get used. */
1407       e->utf16[i] = 0xFFFF;
1408       e->utf8[i][0] = 1;
1409       e->utf8[i][1] = 0;
1410     } else if (c < 0) {
1411       if (c < -4)
1412         return 0;
1413       /* Multi-byte sequences need a converter function */
1414       if (! convert)
1415         return 0;
1416       e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
1417       e->utf8[i][0] = 0;
1418       e->utf16[i] = 0;
1419     } else if (c < 0x80) {
1420       if (latin1_encoding.type[c] != BT_OTHER
1421           && latin1_encoding.type[c] != BT_NONXML && c != i)
1422         return 0;
1423       e->normal.type[i] = latin1_encoding.type[c];
1424       e->utf8[i][0] = 1;
1425       e->utf8[i][1] = (char)c;
1426       e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
1427     } else if (checkCharRefNumber(c) < 0) {
1428       e->normal.type[i] = BT_NONXML;
1429       /* This shouldn't really get used. */
1430       e->utf16[i] = 0xFFFF;
1431       e->utf8[i][0] = 1;
1432       e->utf8[i][1] = 0;
1433     } else {
1434       if (c > 0xFFFF)
1435         return 0;
1436       if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
1437         e->normal.type[i] = BT_NMSTRT;
1438       else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
1439         e->normal.type[i] = BT_NAME;
1440       else
1441         e->normal.type[i] = BT_OTHER;
1442       e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
1443       e->utf16[i] = (unsigned short)c;
1444     }
1445   }
1446   e->userData = userData;
1447   e->convert = convert;
1448   if (convert) {
1449     e->normal.isName2 = unknown_isName;
1450     e->normal.isName3 = unknown_isName;
1451     e->normal.isName4 = unknown_isName;
1452     e->normal.isNmstrt2 = unknown_isNmstrt;
1453     e->normal.isNmstrt3 = unknown_isNmstrt;
1454     e->normal.isNmstrt4 = unknown_isNmstrt;
1455     e->normal.isInvalid2 = unknown_isInvalid;
1456     e->normal.isInvalid3 = unknown_isInvalid;
1457     e->normal.isInvalid4 = unknown_isInvalid;
1458   }
1459   e->normal.enc.utf8Convert = unknown_toUtf8;
1460   e->normal.enc.utf16Convert = unknown_toUtf16;
1461   return &(e->normal.enc);
1462 }
1463 
1464 /* If this enumeration is changed, getEncodingIndex and encodings
1465 must also be changed. */
1466 enum {
1467   UNKNOWN_ENC = -1,
1468   ISO_8859_1_ENC = 0,
1469   US_ASCII_ENC,
1470   UTF_8_ENC,
1471   UTF_16_ENC,
1472   UTF_16BE_ENC,
1473   UTF_16LE_ENC,
1474   /* must match encodingNames up to here */
1475   NO_ENC
1476 };
1477 
1478 static const char KW_ISO_8859_1[]
1479     = {ASCII_I, ASCII_S, ASCII_O,     ASCII_MINUS, ASCII_8, ASCII_8,
1480        ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1,     '\0'};
1481 static const char KW_US_ASCII[]
1482     = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S,
1483        ASCII_C, ASCII_I, ASCII_I,     '\0'};
1484 static const char KW_UTF_8[]
1485     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'};
1486 static const char KW_UTF_16[]
1487     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'};
1488 static const char KW_UTF_16BE[]
1489     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
1490        ASCII_6, ASCII_B, ASCII_E, '\0'};
1491 static const char KW_UTF_16LE[]
1492     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
1493        ASCII_6, ASCII_L, ASCII_E, '\0'};
1494 
1495 static int FASTCALL
getEncodingIndex(const char * name)1496 getEncodingIndex(const char *name) {
1497   static const char *const encodingNames[] = {
1498       KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE,
1499   };
1500   int i;
1501   if (name == NULL)
1502     return NO_ENC;
1503   for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++)
1504     if (streqci(name, encodingNames[i]))
1505       return i;
1506   return UNKNOWN_ENC;
1507 }
1508 
1509 /* For binary compatibility, we store the index of the encoding
1510    specified at initialization in the isUtf16 member.
1511 */
1512 
1513 #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
1514 #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
1515 
1516 /* This is what detects the encoding.  encodingTable maps from
1517    encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
1518    the external (protocol) specified encoding; state is
1519    XML_CONTENT_STATE if we're parsing an external text entity, and
1520    XML_PROLOG_STATE otherwise.
1521 */
1522 
1523 static int
initScan(const ENCODING * const * encodingTable,const INIT_ENCODING * enc,int state,const char * ptr,const char * end,const char ** nextTokPtr)1524 initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc,
1525          int state, const char *ptr, const char *end, const char **nextTokPtr) {
1526   const ENCODING **encPtr;
1527 
1528   if (ptr >= end)
1529     return XML_TOK_NONE;
1530   encPtr = enc->encPtr;
1531   if (ptr + 1 == end) {
1532     /* only a single byte available for auto-detection */
1533 #ifndef XML_DTD /* FIXME */
1534     /* a well-formed document entity must have more than one byte */
1535     if (state != XML_CONTENT_STATE)
1536       return XML_TOK_PARTIAL;
1537 #endif
1538     /* so we're parsing an external text entity... */
1539     /* if UTF-16 was externally specified, then we need at least 2 bytes */
1540     switch (INIT_ENC_INDEX(enc)) {
1541     case UTF_16_ENC:
1542     case UTF_16LE_ENC:
1543     case UTF_16BE_ENC:
1544       return XML_TOK_PARTIAL;
1545     }
1546     switch ((unsigned char)*ptr) {
1547     case 0xFE:
1548     case 0xFF:
1549     case 0xEF: /* possibly first byte of UTF-8 BOM */
1550       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
1551         break;
1552       /* fall through */
1553     case 0x00:
1554     case 0x3C:
1555       return XML_TOK_PARTIAL;
1556     }
1557   } else {
1558     switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
1559     case 0xFEFF:
1560       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
1561         break;
1562       *nextTokPtr = ptr + 2;
1563       *encPtr = encodingTable[UTF_16BE_ENC];
1564       return XML_TOK_BOM;
1565     /* 00 3C is handled in the default case */
1566     case 0x3C00:
1567       if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
1568            || INIT_ENC_INDEX(enc) == UTF_16_ENC)
1569           && state == XML_CONTENT_STATE)
1570         break;
1571       *encPtr = encodingTable[UTF_16LE_ENC];
1572       return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1573     case 0xFFFE:
1574       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
1575         break;
1576       *nextTokPtr = ptr + 2;
1577       *encPtr = encodingTable[UTF_16LE_ENC];
1578       return XML_TOK_BOM;
1579     case 0xEFBB:
1580       /* Maybe a UTF-8 BOM (EF BB BF) */
1581       /* If there's an explicitly specified (external) encoding
1582          of ISO-8859-1 or some flavour of UTF-16
1583          and this is an external text entity,
1584          don't look for the BOM,
1585          because it might be a legal data.
1586       */
1587       if (state == XML_CONTENT_STATE) {
1588         int e = INIT_ENC_INDEX(enc);
1589         if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC
1590             || e == UTF_16_ENC)
1591           break;
1592       }
1593       if (ptr + 2 == end)
1594         return XML_TOK_PARTIAL;
1595       if ((unsigned char)ptr[2] == 0xBF) {
1596         *nextTokPtr = ptr + 3;
1597         *encPtr = encodingTable[UTF_8_ENC];
1598         return XML_TOK_BOM;
1599       }
1600       break;
1601     default:
1602       if (ptr[0] == '\0') {
1603         /* 0 isn't a legal data character. Furthermore a document
1604            entity can only start with ASCII characters.  So the only
1605            way this can fail to be big-endian UTF-16 if it it's an
1606            external parsed general entity that's labelled as
1607            UTF-16LE.
1608         */
1609         if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
1610           break;
1611         *encPtr = encodingTable[UTF_16BE_ENC];
1612         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1613       } else if (ptr[1] == '\0') {
1614         /* We could recover here in the case:
1615             - parsing an external entity
1616             - second byte is 0
1617             - no externally specified encoding
1618             - no encoding declaration
1619            by assuming UTF-16LE.  But we don't, because this would mean when
1620            presented just with a single byte, we couldn't reliably determine
1621            whether we needed further bytes.
1622         */
1623         if (state == XML_CONTENT_STATE)
1624           break;
1625         *encPtr = encodingTable[UTF_16LE_ENC];
1626         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1627       }
1628       break;
1629     }
1630   }
1631   *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
1632   return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1633 }
1634 
1635 #define NS(x) x
1636 #define ns(x) x
1637 #define XML_TOK_NS_C
1638 #include "xmltok_ns.c"
1639 #undef XML_TOK_NS_C
1640 #undef NS
1641 #undef ns
1642 
1643 #ifdef XML_NS
1644 
1645 #  define NS(x) x##NS
1646 #  define ns(x) x##_ns
1647 
1648 #  define XML_TOK_NS_C
1649 #  include "xmltok_ns.c"
1650 #  undef XML_TOK_NS_C
1651 
1652 #  undef NS
1653 #  undef ns
1654 
1655 ENCODING *
XmlInitUnknownEncodingNS(void * mem,int * table,CONVERTER convert,void * userData)1656 XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert,
1657                          void *userData) {
1658   ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
1659   if (enc)
1660     ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
1661   return enc;
1662 }
1663 
1664 #endif /* XML_NS */
1665