• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2002      Greg Stein <gstein@users.sourceforge.net>
12    Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
14    Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net>
15    Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
16    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
17    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
18    Licensed under the MIT license:
19 
20    Permission is  hereby granted,  free of charge,  to any  person obtaining
21    a  copy  of  this  software   and  associated  documentation  files  (the
22    "Software"),  to  deal in  the  Software  without restriction,  including
23    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
24    distribute, sublicense, and/or sell copies of the Software, and to permit
25    persons  to whom  the Software  is  furnished to  do so,  subject to  the
26    following conditions:
27 
28    The above copyright  notice and this permission notice  shall be included
29    in all copies or substantial portions of the Software.
30 
31    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
32    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
33    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
34    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
35    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
36    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
37    USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39 
40 #include <stddef.h>
41 
42 #ifdef _WIN32
43 #  include "winconfig.h"
44 #else
45 #  ifdef HAVE_EXPAT_CONFIG_H
46 #    include <expat_config.h>
47 #  endif
48 #endif /* ndef _WIN32 */
49 
50 #include "expat_external.h"
51 #include "internal.h"
52 #include "xmlrole.h"
53 #include "ascii.h"
54 
55 /* Doesn't check:
56 
57  that ,| are not mixed in a model group
58  content of literals
59 
60 */
61 
62 static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'};
63 static const char KW_ATTLIST[]
64     = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'};
65 static const char KW_CDATA[]
66     = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
67 static const char KW_DOCTYPE[]
68     = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'};
69 static const char KW_ELEMENT[]
70     = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'};
71 static const char KW_EMPTY[]
72     = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'};
73 static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
74                                    ASCII_I, ASCII_E, ASCII_S, '\0'};
75 static const char KW_ENTITY[]
76     = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
77 static const char KW_FIXED[]
78     = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'};
79 static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'};
80 static const char KW_IDREF[]
81     = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
82 static const char KW_IDREFS[]
83     = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
84 #ifdef XML_DTD
85 static const char KW_IGNORE[]
86     = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'};
87 #endif
88 static const char KW_IMPLIED[]
89     = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'};
90 #ifdef XML_DTD
91 static const char KW_INCLUDE[]
92     = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'};
93 #endif
94 static const char KW_NDATA[]
95     = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
96 static const char KW_NMTOKEN[]
97     = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
98 static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
99                                    ASCII_E, ASCII_N, ASCII_S, '\0'};
100 static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
101                                    ASCII_I, ASCII_O, ASCII_N, '\0'};
102 static const char KW_PCDATA[]
103     = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
104 static const char KW_PUBLIC[]
105     = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'};
106 static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I,
107                                    ASCII_R, ASCII_E, ASCII_D, '\0'};
108 static const char KW_SYSTEM[]
109     = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'};
110 
111 #ifndef MIN_BYTES_PER_CHAR
112 #  define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
113 #endif
114 
115 #ifdef XML_DTD
116 #  define setTopLevel(state)                                                   \
117     ((state)->handler                                                          \
118      = ((state)->documentEntity ? internalSubset : externalSubset1))
119 #else /* not XML_DTD */
120 #  define setTopLevel(state) ((state)->handler = internalSubset)
121 #endif /* not XML_DTD */
122 
123 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok,
124                                    const char *ptr, const char *end,
125                                    const ENCODING *enc);
126 
127 static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2,
128     doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2,
129     entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10,
130     notation0, notation1, notation2, notation3, notation4, attlist0, attlist1,
131     attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8,
132     attlist9, element0, element1, element2, element3, element4, element5,
133     element6, element7,
134 #ifdef XML_DTD
135     externalSubset0, externalSubset1, condSect0, condSect1, condSect2,
136 #endif /* XML_DTD */
137     declClose, error;
138 
139 static int FASTCALL common(PROLOG_STATE *state, int tok);
140 
141 static int PTRCALL
prolog0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)142 prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
143         const ENCODING *enc) {
144   switch (tok) {
145   case XML_TOK_PROLOG_S:
146     state->handler = prolog1;
147     return XML_ROLE_NONE;
148   case XML_TOK_XML_DECL:
149     state->handler = prolog1;
150     return XML_ROLE_XML_DECL;
151   case XML_TOK_PI:
152     state->handler = prolog1;
153     return XML_ROLE_PI;
154   case XML_TOK_COMMENT:
155     state->handler = prolog1;
156     return XML_ROLE_COMMENT;
157   case XML_TOK_BOM:
158     return XML_ROLE_NONE;
159   case XML_TOK_DECL_OPEN:
160     if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
161                               KW_DOCTYPE))
162       break;
163     state->handler = doctype0;
164     return XML_ROLE_DOCTYPE_NONE;
165   case XML_TOK_INSTANCE_START:
166     state->handler = error;
167     return XML_ROLE_INSTANCE_START;
168   }
169   return common(state, tok);
170 }
171 
172 static int PTRCALL
prolog1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)173 prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
174         const ENCODING *enc) {
175   switch (tok) {
176   case XML_TOK_PROLOG_S:
177     return XML_ROLE_NONE;
178   case XML_TOK_PI:
179     return XML_ROLE_PI;
180   case XML_TOK_COMMENT:
181     return XML_ROLE_COMMENT;
182   case XML_TOK_BOM:
183     /* This case can never arise.  To reach this role function, the
184      * parse must have passed through prolog0 and therefore have had
185      * some form of input, even if only a space.  At that point, a
186      * byte order mark is no longer a valid character (though
187      * technically it should be interpreted as a non-breaking space),
188      * so will be rejected by the tokenizing stages.
189      */
190     return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
191   case XML_TOK_DECL_OPEN:
192     if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
193                               KW_DOCTYPE))
194       break;
195     state->handler = doctype0;
196     return XML_ROLE_DOCTYPE_NONE;
197   case XML_TOK_INSTANCE_START:
198     state->handler = error;
199     return XML_ROLE_INSTANCE_START;
200   }
201   return common(state, tok);
202 }
203 
204 static int PTRCALL
prolog2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)205 prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
206         const ENCODING *enc) {
207   UNUSED_P(ptr);
208   UNUSED_P(end);
209   UNUSED_P(enc);
210   switch (tok) {
211   case XML_TOK_PROLOG_S:
212     return XML_ROLE_NONE;
213   case XML_TOK_PI:
214     return XML_ROLE_PI;
215   case XML_TOK_COMMENT:
216     return XML_ROLE_COMMENT;
217   case XML_TOK_INSTANCE_START:
218     state->handler = error;
219     return XML_ROLE_INSTANCE_START;
220   }
221   return common(state, tok);
222 }
223 
224 static int PTRCALL
doctype0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)225 doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
226          const ENCODING *enc) {
227   UNUSED_P(ptr);
228   UNUSED_P(end);
229   UNUSED_P(enc);
230   switch (tok) {
231   case XML_TOK_PROLOG_S:
232     return XML_ROLE_DOCTYPE_NONE;
233   case XML_TOK_NAME:
234   case XML_TOK_PREFIXED_NAME:
235     state->handler = doctype1;
236     return XML_ROLE_DOCTYPE_NAME;
237   }
238   return common(state, tok);
239 }
240 
241 static int PTRCALL
doctype1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)242 doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
243          const ENCODING *enc) {
244   switch (tok) {
245   case XML_TOK_PROLOG_S:
246     return XML_ROLE_DOCTYPE_NONE;
247   case XML_TOK_OPEN_BRACKET:
248     state->handler = internalSubset;
249     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
250   case XML_TOK_DECL_CLOSE:
251     state->handler = prolog2;
252     return XML_ROLE_DOCTYPE_CLOSE;
253   case XML_TOK_NAME:
254     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
255       state->handler = doctype3;
256       return XML_ROLE_DOCTYPE_NONE;
257     }
258     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
259       state->handler = doctype2;
260       return XML_ROLE_DOCTYPE_NONE;
261     }
262     break;
263   }
264   return common(state, tok);
265 }
266 
267 static int PTRCALL
doctype2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)268 doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
269          const ENCODING *enc) {
270   UNUSED_P(ptr);
271   UNUSED_P(end);
272   UNUSED_P(enc);
273   switch (tok) {
274   case XML_TOK_PROLOG_S:
275     return XML_ROLE_DOCTYPE_NONE;
276   case XML_TOK_LITERAL:
277     state->handler = doctype3;
278     return XML_ROLE_DOCTYPE_PUBLIC_ID;
279   }
280   return common(state, tok);
281 }
282 
283 static int PTRCALL
doctype3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)284 doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
285          const ENCODING *enc) {
286   UNUSED_P(ptr);
287   UNUSED_P(end);
288   UNUSED_P(enc);
289   switch (tok) {
290   case XML_TOK_PROLOG_S:
291     return XML_ROLE_DOCTYPE_NONE;
292   case XML_TOK_LITERAL:
293     state->handler = doctype4;
294     return XML_ROLE_DOCTYPE_SYSTEM_ID;
295   }
296   return common(state, tok);
297 }
298 
299 static int PTRCALL
doctype4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)300 doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
301          const ENCODING *enc) {
302   UNUSED_P(ptr);
303   UNUSED_P(end);
304   UNUSED_P(enc);
305   switch (tok) {
306   case XML_TOK_PROLOG_S:
307     return XML_ROLE_DOCTYPE_NONE;
308   case XML_TOK_OPEN_BRACKET:
309     state->handler = internalSubset;
310     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
311   case XML_TOK_DECL_CLOSE:
312     state->handler = prolog2;
313     return XML_ROLE_DOCTYPE_CLOSE;
314   }
315   return common(state, tok);
316 }
317 
318 static int PTRCALL
doctype5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)319 doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
320          const ENCODING *enc) {
321   UNUSED_P(ptr);
322   UNUSED_P(end);
323   UNUSED_P(enc);
324   switch (tok) {
325   case XML_TOK_PROLOG_S:
326     return XML_ROLE_DOCTYPE_NONE;
327   case XML_TOK_DECL_CLOSE:
328     state->handler = prolog2;
329     return XML_ROLE_DOCTYPE_CLOSE;
330   }
331   return common(state, tok);
332 }
333 
334 static int PTRCALL
internalSubset(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)335 internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
336                const ENCODING *enc) {
337   switch (tok) {
338   case XML_TOK_PROLOG_S:
339     return XML_ROLE_NONE;
340   case XML_TOK_DECL_OPEN:
341     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
342                             KW_ENTITY)) {
343       state->handler = entity0;
344       return XML_ROLE_ENTITY_NONE;
345     }
346     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
347                             KW_ATTLIST)) {
348       state->handler = attlist0;
349       return XML_ROLE_ATTLIST_NONE;
350     }
351     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
352                             KW_ELEMENT)) {
353       state->handler = element0;
354       return XML_ROLE_ELEMENT_NONE;
355     }
356     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
357                             KW_NOTATION)) {
358       state->handler = notation0;
359       return XML_ROLE_NOTATION_NONE;
360     }
361     break;
362   case XML_TOK_PI:
363     return XML_ROLE_PI;
364   case XML_TOK_COMMENT:
365     return XML_ROLE_COMMENT;
366   case XML_TOK_PARAM_ENTITY_REF:
367     return XML_ROLE_PARAM_ENTITY_REF;
368   case XML_TOK_CLOSE_BRACKET:
369     state->handler = doctype5;
370     return XML_ROLE_DOCTYPE_NONE;
371   case XML_TOK_NONE:
372     return XML_ROLE_NONE;
373   }
374   return common(state, tok);
375 }
376 
377 #ifdef XML_DTD
378 
379 static int PTRCALL
externalSubset0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)380 externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
381                 const ENCODING *enc) {
382   state->handler = externalSubset1;
383   if (tok == XML_TOK_XML_DECL)
384     return XML_ROLE_TEXT_DECL;
385   return externalSubset1(state, tok, ptr, end, enc);
386 }
387 
388 static int PTRCALL
externalSubset1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)389 externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
390                 const ENCODING *enc) {
391   switch (tok) {
392   case XML_TOK_COND_SECT_OPEN:
393     state->handler = condSect0;
394     return XML_ROLE_NONE;
395   case XML_TOK_COND_SECT_CLOSE:
396     if (state->includeLevel == 0)
397       break;
398     state->includeLevel -= 1;
399     return XML_ROLE_NONE;
400   case XML_TOK_PROLOG_S:
401     return XML_ROLE_NONE;
402   case XML_TOK_CLOSE_BRACKET:
403     break;
404   case XML_TOK_NONE:
405     if (state->includeLevel)
406       break;
407     return XML_ROLE_NONE;
408   default:
409     return internalSubset(state, tok, ptr, end, enc);
410   }
411   return common(state, tok);
412 }
413 
414 #endif /* XML_DTD */
415 
416 static int PTRCALL
entity0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)417 entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
418         const ENCODING *enc) {
419   UNUSED_P(ptr);
420   UNUSED_P(end);
421   UNUSED_P(enc);
422   switch (tok) {
423   case XML_TOK_PROLOG_S:
424     return XML_ROLE_ENTITY_NONE;
425   case XML_TOK_PERCENT:
426     state->handler = entity1;
427     return XML_ROLE_ENTITY_NONE;
428   case XML_TOK_NAME:
429     state->handler = entity2;
430     return XML_ROLE_GENERAL_ENTITY_NAME;
431   }
432   return common(state, tok);
433 }
434 
435 static int PTRCALL
entity1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)436 entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
437         const ENCODING *enc) {
438   UNUSED_P(ptr);
439   UNUSED_P(end);
440   UNUSED_P(enc);
441   switch (tok) {
442   case XML_TOK_PROLOG_S:
443     return XML_ROLE_ENTITY_NONE;
444   case XML_TOK_NAME:
445     state->handler = entity7;
446     return XML_ROLE_PARAM_ENTITY_NAME;
447   }
448   return common(state, tok);
449 }
450 
451 static int PTRCALL
entity2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)452 entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
453         const ENCODING *enc) {
454   switch (tok) {
455   case XML_TOK_PROLOG_S:
456     return XML_ROLE_ENTITY_NONE;
457   case XML_TOK_NAME:
458     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
459       state->handler = entity4;
460       return XML_ROLE_ENTITY_NONE;
461     }
462     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
463       state->handler = entity3;
464       return XML_ROLE_ENTITY_NONE;
465     }
466     break;
467   case XML_TOK_LITERAL:
468     state->handler = declClose;
469     state->role_none = XML_ROLE_ENTITY_NONE;
470     return XML_ROLE_ENTITY_VALUE;
471   }
472   return common(state, tok);
473 }
474 
475 static int PTRCALL
entity3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)476 entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
477         const ENCODING *enc) {
478   UNUSED_P(ptr);
479   UNUSED_P(end);
480   UNUSED_P(enc);
481   switch (tok) {
482   case XML_TOK_PROLOG_S:
483     return XML_ROLE_ENTITY_NONE;
484   case XML_TOK_LITERAL:
485     state->handler = entity4;
486     return XML_ROLE_ENTITY_PUBLIC_ID;
487   }
488   return common(state, tok);
489 }
490 
491 static int PTRCALL
entity4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)492 entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
493         const ENCODING *enc) {
494   UNUSED_P(ptr);
495   UNUSED_P(end);
496   UNUSED_P(enc);
497   switch (tok) {
498   case XML_TOK_PROLOG_S:
499     return XML_ROLE_ENTITY_NONE;
500   case XML_TOK_LITERAL:
501     state->handler = entity5;
502     return XML_ROLE_ENTITY_SYSTEM_ID;
503   }
504   return common(state, tok);
505 }
506 
507 static int PTRCALL
entity5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)508 entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
509         const ENCODING *enc) {
510   switch (tok) {
511   case XML_TOK_PROLOG_S:
512     return XML_ROLE_ENTITY_NONE;
513   case XML_TOK_DECL_CLOSE:
514     setTopLevel(state);
515     return XML_ROLE_ENTITY_COMPLETE;
516   case XML_TOK_NAME:
517     if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
518       state->handler = entity6;
519       return XML_ROLE_ENTITY_NONE;
520     }
521     break;
522   }
523   return common(state, tok);
524 }
525 
526 static int PTRCALL
entity6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)527 entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
528         const ENCODING *enc) {
529   UNUSED_P(ptr);
530   UNUSED_P(end);
531   UNUSED_P(enc);
532   switch (tok) {
533   case XML_TOK_PROLOG_S:
534     return XML_ROLE_ENTITY_NONE;
535   case XML_TOK_NAME:
536     state->handler = declClose;
537     state->role_none = XML_ROLE_ENTITY_NONE;
538     return XML_ROLE_ENTITY_NOTATION_NAME;
539   }
540   return common(state, tok);
541 }
542 
543 static int PTRCALL
entity7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)544 entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
545         const ENCODING *enc) {
546   switch (tok) {
547   case XML_TOK_PROLOG_S:
548     return XML_ROLE_ENTITY_NONE;
549   case XML_TOK_NAME:
550     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
551       state->handler = entity9;
552       return XML_ROLE_ENTITY_NONE;
553     }
554     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
555       state->handler = entity8;
556       return XML_ROLE_ENTITY_NONE;
557     }
558     break;
559   case XML_TOK_LITERAL:
560     state->handler = declClose;
561     state->role_none = XML_ROLE_ENTITY_NONE;
562     return XML_ROLE_ENTITY_VALUE;
563   }
564   return common(state, tok);
565 }
566 
567 static int PTRCALL
entity8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)568 entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
569         const ENCODING *enc) {
570   UNUSED_P(ptr);
571   UNUSED_P(end);
572   UNUSED_P(enc);
573   switch (tok) {
574   case XML_TOK_PROLOG_S:
575     return XML_ROLE_ENTITY_NONE;
576   case XML_TOK_LITERAL:
577     state->handler = entity9;
578     return XML_ROLE_ENTITY_PUBLIC_ID;
579   }
580   return common(state, tok);
581 }
582 
583 static int PTRCALL
entity9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)584 entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
585         const ENCODING *enc) {
586   UNUSED_P(ptr);
587   UNUSED_P(end);
588   UNUSED_P(enc);
589   switch (tok) {
590   case XML_TOK_PROLOG_S:
591     return XML_ROLE_ENTITY_NONE;
592   case XML_TOK_LITERAL:
593     state->handler = entity10;
594     return XML_ROLE_ENTITY_SYSTEM_ID;
595   }
596   return common(state, tok);
597 }
598 
599 static int PTRCALL
entity10(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)600 entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
601          const ENCODING *enc) {
602   UNUSED_P(ptr);
603   UNUSED_P(end);
604   UNUSED_P(enc);
605   switch (tok) {
606   case XML_TOK_PROLOG_S:
607     return XML_ROLE_ENTITY_NONE;
608   case XML_TOK_DECL_CLOSE:
609     setTopLevel(state);
610     return XML_ROLE_ENTITY_COMPLETE;
611   }
612   return common(state, tok);
613 }
614 
615 static int PTRCALL
notation0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)616 notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
617           const ENCODING *enc) {
618   UNUSED_P(ptr);
619   UNUSED_P(end);
620   UNUSED_P(enc);
621   switch (tok) {
622   case XML_TOK_PROLOG_S:
623     return XML_ROLE_NOTATION_NONE;
624   case XML_TOK_NAME:
625     state->handler = notation1;
626     return XML_ROLE_NOTATION_NAME;
627   }
628   return common(state, tok);
629 }
630 
631 static int PTRCALL
notation1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)632 notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
633           const ENCODING *enc) {
634   switch (tok) {
635   case XML_TOK_PROLOG_S:
636     return XML_ROLE_NOTATION_NONE;
637   case XML_TOK_NAME:
638     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
639       state->handler = notation3;
640       return XML_ROLE_NOTATION_NONE;
641     }
642     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
643       state->handler = notation2;
644       return XML_ROLE_NOTATION_NONE;
645     }
646     break;
647   }
648   return common(state, tok);
649 }
650 
651 static int PTRCALL
notation2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)652 notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
653           const ENCODING *enc) {
654   UNUSED_P(ptr);
655   UNUSED_P(end);
656   UNUSED_P(enc);
657   switch (tok) {
658   case XML_TOK_PROLOG_S:
659     return XML_ROLE_NOTATION_NONE;
660   case XML_TOK_LITERAL:
661     state->handler = notation4;
662     return XML_ROLE_NOTATION_PUBLIC_ID;
663   }
664   return common(state, tok);
665 }
666 
667 static int PTRCALL
notation3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)668 notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
669           const ENCODING *enc) {
670   UNUSED_P(ptr);
671   UNUSED_P(end);
672   UNUSED_P(enc);
673   switch (tok) {
674   case XML_TOK_PROLOG_S:
675     return XML_ROLE_NOTATION_NONE;
676   case XML_TOK_LITERAL:
677     state->handler = declClose;
678     state->role_none = XML_ROLE_NOTATION_NONE;
679     return XML_ROLE_NOTATION_SYSTEM_ID;
680   }
681   return common(state, tok);
682 }
683 
684 static int PTRCALL
notation4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)685 notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
686           const ENCODING *enc) {
687   UNUSED_P(ptr);
688   UNUSED_P(end);
689   UNUSED_P(enc);
690   switch (tok) {
691   case XML_TOK_PROLOG_S:
692     return XML_ROLE_NOTATION_NONE;
693   case XML_TOK_LITERAL:
694     state->handler = declClose;
695     state->role_none = XML_ROLE_NOTATION_NONE;
696     return XML_ROLE_NOTATION_SYSTEM_ID;
697   case XML_TOK_DECL_CLOSE:
698     setTopLevel(state);
699     return XML_ROLE_NOTATION_NO_SYSTEM_ID;
700   }
701   return common(state, tok);
702 }
703 
704 static int PTRCALL
attlist0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)705 attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
706          const ENCODING *enc) {
707   UNUSED_P(ptr);
708   UNUSED_P(end);
709   UNUSED_P(enc);
710   switch (tok) {
711   case XML_TOK_PROLOG_S:
712     return XML_ROLE_ATTLIST_NONE;
713   case XML_TOK_NAME:
714   case XML_TOK_PREFIXED_NAME:
715     state->handler = attlist1;
716     return XML_ROLE_ATTLIST_ELEMENT_NAME;
717   }
718   return common(state, tok);
719 }
720 
721 static int PTRCALL
attlist1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)722 attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
723          const ENCODING *enc) {
724   UNUSED_P(ptr);
725   UNUSED_P(end);
726   UNUSED_P(enc);
727   switch (tok) {
728   case XML_TOK_PROLOG_S:
729     return XML_ROLE_ATTLIST_NONE;
730   case XML_TOK_DECL_CLOSE:
731     setTopLevel(state);
732     return XML_ROLE_ATTLIST_NONE;
733   case XML_TOK_NAME:
734   case XML_TOK_PREFIXED_NAME:
735     state->handler = attlist2;
736     return XML_ROLE_ATTRIBUTE_NAME;
737   }
738   return common(state, tok);
739 }
740 
741 static int PTRCALL
attlist2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)742 attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
743          const ENCODING *enc) {
744   switch (tok) {
745   case XML_TOK_PROLOG_S:
746     return XML_ROLE_ATTLIST_NONE;
747   case XML_TOK_NAME: {
748     static const char *const types[] = {
749         KW_CDATA,  KW_ID,       KW_IDREF,   KW_IDREFS,
750         KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS,
751     };
752     int i;
753     for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++)
754       if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
755         state->handler = attlist8;
756         return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
757       }
758   }
759     if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
760       state->handler = attlist5;
761       return XML_ROLE_ATTLIST_NONE;
762     }
763     break;
764   case XML_TOK_OPEN_PAREN:
765     state->handler = attlist3;
766     return XML_ROLE_ATTLIST_NONE;
767   }
768   return common(state, tok);
769 }
770 
771 static int PTRCALL
attlist3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)772 attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
773          const ENCODING *enc) {
774   UNUSED_P(ptr);
775   UNUSED_P(end);
776   UNUSED_P(enc);
777   switch (tok) {
778   case XML_TOK_PROLOG_S:
779     return XML_ROLE_ATTLIST_NONE;
780   case XML_TOK_NMTOKEN:
781   case XML_TOK_NAME:
782   case XML_TOK_PREFIXED_NAME:
783     state->handler = attlist4;
784     return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
785   }
786   return common(state, tok);
787 }
788 
789 static int PTRCALL
attlist4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)790 attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
791          const ENCODING *enc) {
792   UNUSED_P(ptr);
793   UNUSED_P(end);
794   UNUSED_P(enc);
795   switch (tok) {
796   case XML_TOK_PROLOG_S:
797     return XML_ROLE_ATTLIST_NONE;
798   case XML_TOK_CLOSE_PAREN:
799     state->handler = attlist8;
800     return XML_ROLE_ATTLIST_NONE;
801   case XML_TOK_OR:
802     state->handler = attlist3;
803     return XML_ROLE_ATTLIST_NONE;
804   }
805   return common(state, tok);
806 }
807 
808 static int PTRCALL
attlist5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)809 attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
810          const ENCODING *enc) {
811   UNUSED_P(ptr);
812   UNUSED_P(end);
813   UNUSED_P(enc);
814   switch (tok) {
815   case XML_TOK_PROLOG_S:
816     return XML_ROLE_ATTLIST_NONE;
817   case XML_TOK_OPEN_PAREN:
818     state->handler = attlist6;
819     return XML_ROLE_ATTLIST_NONE;
820   }
821   return common(state, tok);
822 }
823 
824 static int PTRCALL
attlist6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)825 attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
826          const ENCODING *enc) {
827   UNUSED_P(ptr);
828   UNUSED_P(end);
829   UNUSED_P(enc);
830   switch (tok) {
831   case XML_TOK_PROLOG_S:
832     return XML_ROLE_ATTLIST_NONE;
833   case XML_TOK_NAME:
834     state->handler = attlist7;
835     return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
836   }
837   return common(state, tok);
838 }
839 
840 static int PTRCALL
attlist7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)841 attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
842          const ENCODING *enc) {
843   UNUSED_P(ptr);
844   UNUSED_P(end);
845   UNUSED_P(enc);
846   switch (tok) {
847   case XML_TOK_PROLOG_S:
848     return XML_ROLE_ATTLIST_NONE;
849   case XML_TOK_CLOSE_PAREN:
850     state->handler = attlist8;
851     return XML_ROLE_ATTLIST_NONE;
852   case XML_TOK_OR:
853     state->handler = attlist6;
854     return XML_ROLE_ATTLIST_NONE;
855   }
856   return common(state, tok);
857 }
858 
859 /* default value */
860 static int PTRCALL
attlist8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)861 attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
862          const ENCODING *enc) {
863   switch (tok) {
864   case XML_TOK_PROLOG_S:
865     return XML_ROLE_ATTLIST_NONE;
866   case XML_TOK_POUND_NAME:
867     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
868                             KW_IMPLIED)) {
869       state->handler = attlist1;
870       return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
871     }
872     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
873                             KW_REQUIRED)) {
874       state->handler = attlist1;
875       return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
876     }
877     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
878                             KW_FIXED)) {
879       state->handler = attlist9;
880       return XML_ROLE_ATTLIST_NONE;
881     }
882     break;
883   case XML_TOK_LITERAL:
884     state->handler = attlist1;
885     return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
886   }
887   return common(state, tok);
888 }
889 
890 static int PTRCALL
attlist9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)891 attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
892          const ENCODING *enc) {
893   UNUSED_P(ptr);
894   UNUSED_P(end);
895   UNUSED_P(enc);
896   switch (tok) {
897   case XML_TOK_PROLOG_S:
898     return XML_ROLE_ATTLIST_NONE;
899   case XML_TOK_LITERAL:
900     state->handler = attlist1;
901     return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
902   }
903   return common(state, tok);
904 }
905 
906 static int PTRCALL
element0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)907 element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
908          const ENCODING *enc) {
909   UNUSED_P(ptr);
910   UNUSED_P(end);
911   UNUSED_P(enc);
912   switch (tok) {
913   case XML_TOK_PROLOG_S:
914     return XML_ROLE_ELEMENT_NONE;
915   case XML_TOK_NAME:
916   case XML_TOK_PREFIXED_NAME:
917     state->handler = element1;
918     return XML_ROLE_ELEMENT_NAME;
919   }
920   return common(state, tok);
921 }
922 
923 static int PTRCALL
element1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)924 element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
925          const ENCODING *enc) {
926   switch (tok) {
927   case XML_TOK_PROLOG_S:
928     return XML_ROLE_ELEMENT_NONE;
929   case XML_TOK_NAME:
930     if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
931       state->handler = declClose;
932       state->role_none = XML_ROLE_ELEMENT_NONE;
933       return XML_ROLE_CONTENT_EMPTY;
934     }
935     if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
936       state->handler = declClose;
937       state->role_none = XML_ROLE_ELEMENT_NONE;
938       return XML_ROLE_CONTENT_ANY;
939     }
940     break;
941   case XML_TOK_OPEN_PAREN:
942     state->handler = element2;
943     state->level = 1;
944     return XML_ROLE_GROUP_OPEN;
945   }
946   return common(state, tok);
947 }
948 
949 static int PTRCALL
element2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)950 element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
951          const ENCODING *enc) {
952   switch (tok) {
953   case XML_TOK_PROLOG_S:
954     return XML_ROLE_ELEMENT_NONE;
955   case XML_TOK_POUND_NAME:
956     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
957                             KW_PCDATA)) {
958       state->handler = element3;
959       return XML_ROLE_CONTENT_PCDATA;
960     }
961     break;
962   case XML_TOK_OPEN_PAREN:
963     state->level = 2;
964     state->handler = element6;
965     return XML_ROLE_GROUP_OPEN;
966   case XML_TOK_NAME:
967   case XML_TOK_PREFIXED_NAME:
968     state->handler = element7;
969     return XML_ROLE_CONTENT_ELEMENT;
970   case XML_TOK_NAME_QUESTION:
971     state->handler = element7;
972     return XML_ROLE_CONTENT_ELEMENT_OPT;
973   case XML_TOK_NAME_ASTERISK:
974     state->handler = element7;
975     return XML_ROLE_CONTENT_ELEMENT_REP;
976   case XML_TOK_NAME_PLUS:
977     state->handler = element7;
978     return XML_ROLE_CONTENT_ELEMENT_PLUS;
979   }
980   return common(state, tok);
981 }
982 
983 static int PTRCALL
element3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)984 element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
985          const ENCODING *enc) {
986   UNUSED_P(ptr);
987   UNUSED_P(end);
988   UNUSED_P(enc);
989   switch (tok) {
990   case XML_TOK_PROLOG_S:
991     return XML_ROLE_ELEMENT_NONE;
992   case XML_TOK_CLOSE_PAREN:
993     state->handler = declClose;
994     state->role_none = XML_ROLE_ELEMENT_NONE;
995     return XML_ROLE_GROUP_CLOSE;
996   case XML_TOK_CLOSE_PAREN_ASTERISK:
997     state->handler = declClose;
998     state->role_none = XML_ROLE_ELEMENT_NONE;
999     return XML_ROLE_GROUP_CLOSE_REP;
1000   case XML_TOK_OR:
1001     state->handler = element4;
1002     return XML_ROLE_ELEMENT_NONE;
1003   }
1004   return common(state, tok);
1005 }
1006 
1007 static int PTRCALL
element4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1008 element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1009          const ENCODING *enc) {
1010   UNUSED_P(ptr);
1011   UNUSED_P(end);
1012   UNUSED_P(enc);
1013   switch (tok) {
1014   case XML_TOK_PROLOG_S:
1015     return XML_ROLE_ELEMENT_NONE;
1016   case XML_TOK_NAME:
1017   case XML_TOK_PREFIXED_NAME:
1018     state->handler = element5;
1019     return XML_ROLE_CONTENT_ELEMENT;
1020   }
1021   return common(state, tok);
1022 }
1023 
1024 static int PTRCALL
element5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1025 element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1026          const ENCODING *enc) {
1027   UNUSED_P(ptr);
1028   UNUSED_P(end);
1029   UNUSED_P(enc);
1030   switch (tok) {
1031   case XML_TOK_PROLOG_S:
1032     return XML_ROLE_ELEMENT_NONE;
1033   case XML_TOK_CLOSE_PAREN_ASTERISK:
1034     state->handler = declClose;
1035     state->role_none = XML_ROLE_ELEMENT_NONE;
1036     return XML_ROLE_GROUP_CLOSE_REP;
1037   case XML_TOK_OR:
1038     state->handler = element4;
1039     return XML_ROLE_ELEMENT_NONE;
1040   }
1041   return common(state, tok);
1042 }
1043 
1044 static int PTRCALL
element6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1045 element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1046          const ENCODING *enc) {
1047   UNUSED_P(ptr);
1048   UNUSED_P(end);
1049   UNUSED_P(enc);
1050   switch (tok) {
1051   case XML_TOK_PROLOG_S:
1052     return XML_ROLE_ELEMENT_NONE;
1053   case XML_TOK_OPEN_PAREN:
1054     state->level += 1;
1055     return XML_ROLE_GROUP_OPEN;
1056   case XML_TOK_NAME:
1057   case XML_TOK_PREFIXED_NAME:
1058     state->handler = element7;
1059     return XML_ROLE_CONTENT_ELEMENT;
1060   case XML_TOK_NAME_QUESTION:
1061     state->handler = element7;
1062     return XML_ROLE_CONTENT_ELEMENT_OPT;
1063   case XML_TOK_NAME_ASTERISK:
1064     state->handler = element7;
1065     return XML_ROLE_CONTENT_ELEMENT_REP;
1066   case XML_TOK_NAME_PLUS:
1067     state->handler = element7;
1068     return XML_ROLE_CONTENT_ELEMENT_PLUS;
1069   }
1070   return common(state, tok);
1071 }
1072 
1073 static int PTRCALL
element7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1074 element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1075          const ENCODING *enc) {
1076   UNUSED_P(ptr);
1077   UNUSED_P(end);
1078   UNUSED_P(enc);
1079   switch (tok) {
1080   case XML_TOK_PROLOG_S:
1081     return XML_ROLE_ELEMENT_NONE;
1082   case XML_TOK_CLOSE_PAREN:
1083     state->level -= 1;
1084     if (state->level == 0) {
1085       state->handler = declClose;
1086       state->role_none = XML_ROLE_ELEMENT_NONE;
1087     }
1088     return XML_ROLE_GROUP_CLOSE;
1089   case XML_TOK_CLOSE_PAREN_ASTERISK:
1090     state->level -= 1;
1091     if (state->level == 0) {
1092       state->handler = declClose;
1093       state->role_none = XML_ROLE_ELEMENT_NONE;
1094     }
1095     return XML_ROLE_GROUP_CLOSE_REP;
1096   case XML_TOK_CLOSE_PAREN_QUESTION:
1097     state->level -= 1;
1098     if (state->level == 0) {
1099       state->handler = declClose;
1100       state->role_none = XML_ROLE_ELEMENT_NONE;
1101     }
1102     return XML_ROLE_GROUP_CLOSE_OPT;
1103   case XML_TOK_CLOSE_PAREN_PLUS:
1104     state->level -= 1;
1105     if (state->level == 0) {
1106       state->handler = declClose;
1107       state->role_none = XML_ROLE_ELEMENT_NONE;
1108     }
1109     return XML_ROLE_GROUP_CLOSE_PLUS;
1110   case XML_TOK_COMMA:
1111     state->handler = element6;
1112     return XML_ROLE_GROUP_SEQUENCE;
1113   case XML_TOK_OR:
1114     state->handler = element6;
1115     return XML_ROLE_GROUP_CHOICE;
1116   }
1117   return common(state, tok);
1118 }
1119 
1120 #ifdef XML_DTD
1121 
1122 static int PTRCALL
condSect0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1123 condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1124           const ENCODING *enc) {
1125   switch (tok) {
1126   case XML_TOK_PROLOG_S:
1127     return XML_ROLE_NONE;
1128   case XML_TOK_NAME:
1129     if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1130       state->handler = condSect1;
1131       return XML_ROLE_NONE;
1132     }
1133     if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1134       state->handler = condSect2;
1135       return XML_ROLE_NONE;
1136     }
1137     break;
1138   }
1139   return common(state, tok);
1140 }
1141 
1142 static int PTRCALL
condSect1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1143 condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1144           const ENCODING *enc) {
1145   UNUSED_P(ptr);
1146   UNUSED_P(end);
1147   UNUSED_P(enc);
1148   switch (tok) {
1149   case XML_TOK_PROLOG_S:
1150     return XML_ROLE_NONE;
1151   case XML_TOK_OPEN_BRACKET:
1152     state->handler = externalSubset1;
1153     state->includeLevel += 1;
1154     return XML_ROLE_NONE;
1155   }
1156   return common(state, tok);
1157 }
1158 
1159 static int PTRCALL
condSect2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1160 condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1161           const ENCODING *enc) {
1162   UNUSED_P(ptr);
1163   UNUSED_P(end);
1164   UNUSED_P(enc);
1165   switch (tok) {
1166   case XML_TOK_PROLOG_S:
1167     return XML_ROLE_NONE;
1168   case XML_TOK_OPEN_BRACKET:
1169     state->handler = externalSubset1;
1170     return XML_ROLE_IGNORE_SECT;
1171   }
1172   return common(state, tok);
1173 }
1174 
1175 #endif /* XML_DTD */
1176 
1177 static int PTRCALL
declClose(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1178 declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1179           const ENCODING *enc) {
1180   UNUSED_P(ptr);
1181   UNUSED_P(end);
1182   UNUSED_P(enc);
1183   switch (tok) {
1184   case XML_TOK_PROLOG_S:
1185     return state->role_none;
1186   case XML_TOK_DECL_CLOSE:
1187     setTopLevel(state);
1188     return state->role_none;
1189   }
1190   return common(state, tok);
1191 }
1192 
1193 /* This function will only be invoked if the internal logic of the
1194  * parser has broken down.  It is used in two cases:
1195  *
1196  * 1: When the XML prolog has been finished.  At this point the
1197  * processor (the parser level above these role handlers) should
1198  * switch from prologProcessor to contentProcessor and reinitialise
1199  * the handler function.
1200  *
1201  * 2: When an error has been detected (via common() below).  At this
1202  * point again the processor should be switched to errorProcessor,
1203  * which will never call a handler.
1204  *
1205  * The result of this is that error() can only be called if the
1206  * processor switch failed to happen, which is an internal error and
1207  * therefore we shouldn't be able to provoke it simply by using the
1208  * library.  It is a necessary backstop, however, so we merely exclude
1209  * it from the coverage statistics.
1210  *
1211  * LCOV_EXCL_START
1212  */
1213 static int PTRCALL
error(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1214 error(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1215       const ENCODING *enc) {
1216   UNUSED_P(state);
1217   UNUSED_P(tok);
1218   UNUSED_P(ptr);
1219   UNUSED_P(end);
1220   UNUSED_P(enc);
1221   return XML_ROLE_NONE;
1222 }
1223 /* LCOV_EXCL_STOP */
1224 
1225 static int FASTCALL
common(PROLOG_STATE * state,int tok)1226 common(PROLOG_STATE *state, int tok) {
1227 #ifdef XML_DTD
1228   if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1229     return XML_ROLE_INNER_PARAM_ENTITY_REF;
1230 #else
1231   UNUSED_P(tok);
1232 #endif
1233   state->handler = error;
1234   return XML_ROLE_ERROR;
1235 }
1236 
1237 void
XmlPrologStateInit(PROLOG_STATE * state)1238 XmlPrologStateInit(PROLOG_STATE *state) {
1239   state->handler = prolog0;
1240 #ifdef XML_DTD
1241   state->documentEntity = 1;
1242   state->includeLevel = 0;
1243   state->inEntityValue = 0;
1244 #endif /* XML_DTD */
1245 }
1246 
1247 #ifdef XML_DTD
1248 
1249 void
XmlPrologStateInitExternalEntity(PROLOG_STATE * state)1250 XmlPrologStateInitExternalEntity(PROLOG_STATE *state) {
1251   state->handler = externalSubset0;
1252   state->documentEntity = 0;
1253   state->includeLevel = 0;
1254 }
1255 
1256 #endif /* XML_DTD */
1257