1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32
33 #include <stddef.h>
34
35 #ifdef _WIN32
36 #include "winconfig.h"
37 #else
38 #ifdef HAVE_EXPAT_CONFIG_H
39 #include <expat_config.h>
40 #endif
41 #endif /* ndef _WIN32 */
42
43 #include "expat_external.h"
44 #include "internal.h"
45 #include "xmlrole.h"
46 #include "ascii.h"
47
48 /* Doesn't check:
49
50 that ,| are not mixed in a model group
51 content of literals
52
53 */
54
55 static const char KW_ANY[] = {
56 ASCII_A, ASCII_N, ASCII_Y, '\0' };
57 static const char KW_ATTLIST[] = {
58 ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
59 static const char KW_CDATA[] = {
60 ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
61 static const char KW_DOCTYPE[] = {
62 ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
63 static const char KW_ELEMENT[] = {
64 ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
65 static const char KW_EMPTY[] = {
66 ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
67 static const char KW_ENTITIES[] = {
68 ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
69 '\0' };
70 static const char KW_ENTITY[] = {
71 ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
72 static const char KW_FIXED[] = {
73 ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
74 static const char KW_ID[] = {
75 ASCII_I, ASCII_D, '\0' };
76 static const char KW_IDREF[] = {
77 ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
78 static const char KW_IDREFS[] = {
79 ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
80 #ifdef XML_DTD
81 static const char KW_IGNORE[] = {
82 ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
83 #endif
84 static const char KW_IMPLIED[] = {
85 ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
86 #ifdef XML_DTD
87 static const char KW_INCLUDE[] = {
88 ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
89 #endif
90 static const char KW_NDATA[] = {
91 ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
92 static const char KW_NMTOKEN[] = {
93 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
94 static const char KW_NMTOKENS[] = {
95 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
96 '\0' };
97 static const char KW_NOTATION[] =
98 { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
99 '\0' };
100 static const char KW_PCDATA[] = {
101 ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
102 static const char KW_PUBLIC[] = {
103 ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
104 static const char KW_REQUIRED[] = {
105 ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
106 '\0' };
107 static const char KW_SYSTEM[] = {
108 ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
109
110 #ifndef MIN_BYTES_PER_CHAR
111 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
112 #endif
113
114 #ifdef XML_DTD
115 #define setTopLevel(state) \
116 ((state)->handler = ((state)->documentEntity \
117 ? internalSubset \
118 : externalSubset1))
119 #else /* not XML_DTD */
120 #define setTopLevel(state) ((state)->handler = internalSubset)
121 #endif /* not XML_DTD */
122
123 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
124 int tok,
125 const char *ptr,
126 const char *end,
127 const ENCODING *enc);
128
129 static PROLOG_HANDLER
130 prolog0, prolog1, prolog2,
131 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
132 internalSubset,
133 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
134 entity7, entity8, entity9, entity10,
135 notation0, notation1, notation2, notation3, notation4,
136 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
137 attlist7, attlist8, attlist9,
138 element0, element1, element2, element3, element4, element5, element6,
139 element7,
140 #ifdef XML_DTD
141 externalSubset0, externalSubset1,
142 condSect0, condSect1, condSect2,
143 #endif /* XML_DTD */
144 declClose,
145 error;
146
147 static int FASTCALL common(PROLOG_STATE *state, int tok);
148
149 static int PTRCALL
prolog0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)150 prolog0(PROLOG_STATE *state,
151 int tok,
152 const char *ptr,
153 const char *end,
154 const ENCODING *enc)
155 {
156 switch (tok) {
157 case XML_TOK_PROLOG_S:
158 state->handler = prolog1;
159 return XML_ROLE_NONE;
160 case XML_TOK_XML_DECL:
161 state->handler = prolog1;
162 return XML_ROLE_XML_DECL;
163 case XML_TOK_PI:
164 state->handler = prolog1;
165 return XML_ROLE_PI;
166 case XML_TOK_COMMENT:
167 state->handler = prolog1;
168 return XML_ROLE_COMMENT;
169 case XML_TOK_BOM:
170 return XML_ROLE_NONE;
171 case XML_TOK_DECL_OPEN:
172 if (!XmlNameMatchesAscii(enc,
173 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
174 end,
175 KW_DOCTYPE))
176 break;
177 state->handler = doctype0;
178 return XML_ROLE_DOCTYPE_NONE;
179 case XML_TOK_INSTANCE_START:
180 state->handler = error;
181 return XML_ROLE_INSTANCE_START;
182 }
183 return common(state, tok);
184 }
185
186 static int PTRCALL
prolog1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)187 prolog1(PROLOG_STATE *state,
188 int tok,
189 const char *ptr,
190 const char *end,
191 const ENCODING *enc)
192 {
193 switch (tok) {
194 case XML_TOK_PROLOG_S:
195 return XML_ROLE_NONE;
196 case XML_TOK_PI:
197 return XML_ROLE_PI;
198 case XML_TOK_COMMENT:
199 return XML_ROLE_COMMENT;
200 case XML_TOK_BOM:
201 /* This case can never arise. To reach this role function, the
202 * parse must have passed through prolog0 and therefore have had
203 * some form of input, even if only a space. At that point, a
204 * byte order mark is no longer a valid character (though
205 * technically it should be interpreted as a non-breaking space),
206 * so will be rejected by the tokenizing stages.
207 */
208 return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
209 case XML_TOK_DECL_OPEN:
210 if (!XmlNameMatchesAscii(enc,
211 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
212 end,
213 KW_DOCTYPE))
214 break;
215 state->handler = doctype0;
216 return XML_ROLE_DOCTYPE_NONE;
217 case XML_TOK_INSTANCE_START:
218 state->handler = error;
219 return XML_ROLE_INSTANCE_START;
220 }
221 return common(state, tok);
222 }
223
224 static int PTRCALL
prolog2(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))225 prolog2(PROLOG_STATE *state,
226 int tok,
227 const char *UNUSED_P(ptr),
228 const char *UNUSED_P(end),
229 const ENCODING *UNUSED_P(enc))
230 {
231 switch (tok) {
232 case XML_TOK_PROLOG_S:
233 return XML_ROLE_NONE;
234 case XML_TOK_PI:
235 return XML_ROLE_PI;
236 case XML_TOK_COMMENT:
237 return XML_ROLE_COMMENT;
238 case XML_TOK_INSTANCE_START:
239 state->handler = error;
240 return XML_ROLE_INSTANCE_START;
241 }
242 return common(state, tok);
243 }
244
245 static int PTRCALL
doctype0(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))246 doctype0(PROLOG_STATE *state,
247 int tok,
248 const char *UNUSED_P(ptr),
249 const char *UNUSED_P(end),
250 const ENCODING *UNUSED_P(enc))
251 {
252 switch (tok) {
253 case XML_TOK_PROLOG_S:
254 return XML_ROLE_DOCTYPE_NONE;
255 case XML_TOK_NAME:
256 case XML_TOK_PREFIXED_NAME:
257 state->handler = doctype1;
258 return XML_ROLE_DOCTYPE_NAME;
259 }
260 return common(state, tok);
261 }
262
263 static int PTRCALL
doctype1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)264 doctype1(PROLOG_STATE *state,
265 int tok,
266 const char *ptr,
267 const char *end,
268 const ENCODING *enc)
269 {
270 switch (tok) {
271 case XML_TOK_PROLOG_S:
272 return XML_ROLE_DOCTYPE_NONE;
273 case XML_TOK_OPEN_BRACKET:
274 state->handler = internalSubset;
275 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
276 case XML_TOK_DECL_CLOSE:
277 state->handler = prolog2;
278 return XML_ROLE_DOCTYPE_CLOSE;
279 case XML_TOK_NAME:
280 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
281 state->handler = doctype3;
282 return XML_ROLE_DOCTYPE_NONE;
283 }
284 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
285 state->handler = doctype2;
286 return XML_ROLE_DOCTYPE_NONE;
287 }
288 break;
289 }
290 return common(state, tok);
291 }
292
293 static int PTRCALL
doctype2(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))294 doctype2(PROLOG_STATE *state,
295 int tok,
296 const char *UNUSED_P(ptr),
297 const char *UNUSED_P(end),
298 const ENCODING *UNUSED_P(enc))
299 {
300 switch (tok) {
301 case XML_TOK_PROLOG_S:
302 return XML_ROLE_DOCTYPE_NONE;
303 case XML_TOK_LITERAL:
304 state->handler = doctype3;
305 return XML_ROLE_DOCTYPE_PUBLIC_ID;
306 }
307 return common(state, tok);
308 }
309
310 static int PTRCALL
doctype3(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))311 doctype3(PROLOG_STATE *state,
312 int tok,
313 const char *UNUSED_P(ptr),
314 const char *UNUSED_P(end),
315 const ENCODING *UNUSED_P(enc))
316 {
317 switch (tok) {
318 case XML_TOK_PROLOG_S:
319 return XML_ROLE_DOCTYPE_NONE;
320 case XML_TOK_LITERAL:
321 state->handler = doctype4;
322 return XML_ROLE_DOCTYPE_SYSTEM_ID;
323 }
324 return common(state, tok);
325 }
326
327 static int PTRCALL
doctype4(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))328 doctype4(PROLOG_STATE *state,
329 int tok,
330 const char *UNUSED_P(ptr),
331 const char *UNUSED_P(end),
332 const ENCODING *UNUSED_P(enc))
333 {
334 switch (tok) {
335 case XML_TOK_PROLOG_S:
336 return XML_ROLE_DOCTYPE_NONE;
337 case XML_TOK_OPEN_BRACKET:
338 state->handler = internalSubset;
339 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
340 case XML_TOK_DECL_CLOSE:
341 state->handler = prolog2;
342 return XML_ROLE_DOCTYPE_CLOSE;
343 }
344 return common(state, tok);
345 }
346
347 static int PTRCALL
doctype5(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))348 doctype5(PROLOG_STATE *state,
349 int tok,
350 const char *UNUSED_P(ptr),
351 const char *UNUSED_P(end),
352 const ENCODING *UNUSED_P(enc))
353 {
354 switch (tok) {
355 case XML_TOK_PROLOG_S:
356 return XML_ROLE_DOCTYPE_NONE;
357 case XML_TOK_DECL_CLOSE:
358 state->handler = prolog2;
359 return XML_ROLE_DOCTYPE_CLOSE;
360 }
361 return common(state, tok);
362 }
363
364 static int PTRCALL
internalSubset(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)365 internalSubset(PROLOG_STATE *state,
366 int tok,
367 const char *ptr,
368 const char *end,
369 const ENCODING *enc)
370 {
371 switch (tok) {
372 case XML_TOK_PROLOG_S:
373 return XML_ROLE_NONE;
374 case XML_TOK_DECL_OPEN:
375 if (XmlNameMatchesAscii(enc,
376 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
377 end,
378 KW_ENTITY)) {
379 state->handler = entity0;
380 return XML_ROLE_ENTITY_NONE;
381 }
382 if (XmlNameMatchesAscii(enc,
383 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
384 end,
385 KW_ATTLIST)) {
386 state->handler = attlist0;
387 return XML_ROLE_ATTLIST_NONE;
388 }
389 if (XmlNameMatchesAscii(enc,
390 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
391 end,
392 KW_ELEMENT)) {
393 state->handler = element0;
394 return XML_ROLE_ELEMENT_NONE;
395 }
396 if (XmlNameMatchesAscii(enc,
397 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
398 end,
399 KW_NOTATION)) {
400 state->handler = notation0;
401 return XML_ROLE_NOTATION_NONE;
402 }
403 break;
404 case XML_TOK_PI:
405 return XML_ROLE_PI;
406 case XML_TOK_COMMENT:
407 return XML_ROLE_COMMENT;
408 case XML_TOK_PARAM_ENTITY_REF:
409 return XML_ROLE_PARAM_ENTITY_REF;
410 case XML_TOK_CLOSE_BRACKET:
411 state->handler = doctype5;
412 return XML_ROLE_DOCTYPE_NONE;
413 case XML_TOK_NONE:
414 return XML_ROLE_NONE;
415 }
416 return common(state, tok);
417 }
418
419 #ifdef XML_DTD
420
421 static int PTRCALL
externalSubset0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)422 externalSubset0(PROLOG_STATE *state,
423 int tok,
424 const char *ptr,
425 const char *end,
426 const ENCODING *enc)
427 {
428 state->handler = externalSubset1;
429 if (tok == XML_TOK_XML_DECL)
430 return XML_ROLE_TEXT_DECL;
431 return externalSubset1(state, tok, ptr, end, enc);
432 }
433
434 static int PTRCALL
externalSubset1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)435 externalSubset1(PROLOG_STATE *state,
436 int tok,
437 const char *ptr,
438 const char *end,
439 const ENCODING *enc)
440 {
441 switch (tok) {
442 case XML_TOK_COND_SECT_OPEN:
443 state->handler = condSect0;
444 return XML_ROLE_NONE;
445 case XML_TOK_COND_SECT_CLOSE:
446 if (state->includeLevel == 0)
447 break;
448 state->includeLevel -= 1;
449 return XML_ROLE_NONE;
450 case XML_TOK_PROLOG_S:
451 return XML_ROLE_NONE;
452 case XML_TOK_CLOSE_BRACKET:
453 break;
454 case XML_TOK_NONE:
455 if (state->includeLevel)
456 break;
457 return XML_ROLE_NONE;
458 default:
459 return internalSubset(state, tok, ptr, end, enc);
460 }
461 return common(state, tok);
462 }
463
464 #endif /* XML_DTD */
465
466 static int PTRCALL
entity0(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))467 entity0(PROLOG_STATE *state,
468 int tok,
469 const char *UNUSED_P(ptr),
470 const char *UNUSED_P(end),
471 const ENCODING *UNUSED_P(enc))
472 {
473 switch (tok) {
474 case XML_TOK_PROLOG_S:
475 return XML_ROLE_ENTITY_NONE;
476 case XML_TOK_PERCENT:
477 state->handler = entity1;
478 return XML_ROLE_ENTITY_NONE;
479 case XML_TOK_NAME:
480 state->handler = entity2;
481 return XML_ROLE_GENERAL_ENTITY_NAME;
482 }
483 return common(state, tok);
484 }
485
486 static int PTRCALL
entity1(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))487 entity1(PROLOG_STATE *state,
488 int tok,
489 const char *UNUSED_P(ptr),
490 const char *UNUSED_P(end),
491 const ENCODING *UNUSED_P(enc))
492 {
493 switch (tok) {
494 case XML_TOK_PROLOG_S:
495 return XML_ROLE_ENTITY_NONE;
496 case XML_TOK_NAME:
497 state->handler = entity7;
498 return XML_ROLE_PARAM_ENTITY_NAME;
499 }
500 return common(state, tok);
501 }
502
503 static int PTRCALL
entity2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)504 entity2(PROLOG_STATE *state,
505 int tok,
506 const char *ptr,
507 const char *end,
508 const ENCODING *enc)
509 {
510 switch (tok) {
511 case XML_TOK_PROLOG_S:
512 return XML_ROLE_ENTITY_NONE;
513 case XML_TOK_NAME:
514 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
515 state->handler = entity4;
516 return XML_ROLE_ENTITY_NONE;
517 }
518 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
519 state->handler = entity3;
520 return XML_ROLE_ENTITY_NONE;
521 }
522 break;
523 case XML_TOK_LITERAL:
524 state->handler = declClose;
525 state->role_none = XML_ROLE_ENTITY_NONE;
526 return XML_ROLE_ENTITY_VALUE;
527 }
528 return common(state, tok);
529 }
530
531 static int PTRCALL
entity3(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))532 entity3(PROLOG_STATE *state,
533 int tok,
534 const char *UNUSED_P(ptr),
535 const char *UNUSED_P(end),
536 const ENCODING *UNUSED_P(enc))
537 {
538 switch (tok) {
539 case XML_TOK_PROLOG_S:
540 return XML_ROLE_ENTITY_NONE;
541 case XML_TOK_LITERAL:
542 state->handler = entity4;
543 return XML_ROLE_ENTITY_PUBLIC_ID;
544 }
545 return common(state, tok);
546 }
547
548 static int PTRCALL
entity4(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))549 entity4(PROLOG_STATE *state,
550 int tok,
551 const char *UNUSED_P(ptr),
552 const char *UNUSED_P(end),
553 const ENCODING *UNUSED_P(enc))
554 {
555 switch (tok) {
556 case XML_TOK_PROLOG_S:
557 return XML_ROLE_ENTITY_NONE;
558 case XML_TOK_LITERAL:
559 state->handler = entity5;
560 return XML_ROLE_ENTITY_SYSTEM_ID;
561 }
562 return common(state, tok);
563 }
564
565 static int PTRCALL
entity5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)566 entity5(PROLOG_STATE *state,
567 int tok,
568 const char *ptr,
569 const char *end,
570 const ENCODING *enc)
571 {
572 switch (tok) {
573 case XML_TOK_PROLOG_S:
574 return XML_ROLE_ENTITY_NONE;
575 case XML_TOK_DECL_CLOSE:
576 setTopLevel(state);
577 return XML_ROLE_ENTITY_COMPLETE;
578 case XML_TOK_NAME:
579 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
580 state->handler = entity6;
581 return XML_ROLE_ENTITY_NONE;
582 }
583 break;
584 }
585 return common(state, tok);
586 }
587
588 static int PTRCALL
entity6(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))589 entity6(PROLOG_STATE *state,
590 int tok,
591 const char *UNUSED_P(ptr),
592 const char *UNUSED_P(end),
593 const ENCODING *UNUSED_P(enc))
594 {
595 switch (tok) {
596 case XML_TOK_PROLOG_S:
597 return XML_ROLE_ENTITY_NONE;
598 case XML_TOK_NAME:
599 state->handler = declClose;
600 state->role_none = XML_ROLE_ENTITY_NONE;
601 return XML_ROLE_ENTITY_NOTATION_NAME;
602 }
603 return common(state, tok);
604 }
605
606 static int PTRCALL
entity7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)607 entity7(PROLOG_STATE *state,
608 int tok,
609 const char *ptr,
610 const char *end,
611 const ENCODING *enc)
612 {
613 switch (tok) {
614 case XML_TOK_PROLOG_S:
615 return XML_ROLE_ENTITY_NONE;
616 case XML_TOK_NAME:
617 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
618 state->handler = entity9;
619 return XML_ROLE_ENTITY_NONE;
620 }
621 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
622 state->handler = entity8;
623 return XML_ROLE_ENTITY_NONE;
624 }
625 break;
626 case XML_TOK_LITERAL:
627 state->handler = declClose;
628 state->role_none = XML_ROLE_ENTITY_NONE;
629 return XML_ROLE_ENTITY_VALUE;
630 }
631 return common(state, tok);
632 }
633
634 static int PTRCALL
entity8(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))635 entity8(PROLOG_STATE *state,
636 int tok,
637 const char *UNUSED_P(ptr),
638 const char *UNUSED_P(end),
639 const ENCODING *UNUSED_P(enc))
640 {
641 switch (tok) {
642 case XML_TOK_PROLOG_S:
643 return XML_ROLE_ENTITY_NONE;
644 case XML_TOK_LITERAL:
645 state->handler = entity9;
646 return XML_ROLE_ENTITY_PUBLIC_ID;
647 }
648 return common(state, tok);
649 }
650
651 static int PTRCALL
entity9(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))652 entity9(PROLOG_STATE *state,
653 int tok,
654 const char *UNUSED_P(ptr),
655 const char *UNUSED_P(end),
656 const ENCODING *UNUSED_P(enc))
657 {
658 switch (tok) {
659 case XML_TOK_PROLOG_S:
660 return XML_ROLE_ENTITY_NONE;
661 case XML_TOK_LITERAL:
662 state->handler = entity10;
663 return XML_ROLE_ENTITY_SYSTEM_ID;
664 }
665 return common(state, tok);
666 }
667
668 static int PTRCALL
entity10(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))669 entity10(PROLOG_STATE *state,
670 int tok,
671 const char *UNUSED_P(ptr),
672 const char *UNUSED_P(end),
673 const ENCODING *UNUSED_P(enc))
674 {
675 switch (tok) {
676 case XML_TOK_PROLOG_S:
677 return XML_ROLE_ENTITY_NONE;
678 case XML_TOK_DECL_CLOSE:
679 setTopLevel(state);
680 return XML_ROLE_ENTITY_COMPLETE;
681 }
682 return common(state, tok);
683 }
684
685 static int PTRCALL
notation0(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))686 notation0(PROLOG_STATE *state,
687 int tok,
688 const char *UNUSED_P(ptr),
689 const char *UNUSED_P(end),
690 const ENCODING *UNUSED_P(enc))
691 {
692 switch (tok) {
693 case XML_TOK_PROLOG_S:
694 return XML_ROLE_NOTATION_NONE;
695 case XML_TOK_NAME:
696 state->handler = notation1;
697 return XML_ROLE_NOTATION_NAME;
698 }
699 return common(state, tok);
700 }
701
702 static int PTRCALL
notation1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)703 notation1(PROLOG_STATE *state,
704 int tok,
705 const char *ptr,
706 const char *end,
707 const ENCODING *enc)
708 {
709 switch (tok) {
710 case XML_TOK_PROLOG_S:
711 return XML_ROLE_NOTATION_NONE;
712 case XML_TOK_NAME:
713 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
714 state->handler = notation3;
715 return XML_ROLE_NOTATION_NONE;
716 }
717 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
718 state->handler = notation2;
719 return XML_ROLE_NOTATION_NONE;
720 }
721 break;
722 }
723 return common(state, tok);
724 }
725
726 static int PTRCALL
notation2(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))727 notation2(PROLOG_STATE *state,
728 int tok,
729 const char *UNUSED_P(ptr),
730 const char *UNUSED_P(end),
731 const ENCODING *UNUSED_P(enc))
732 {
733 switch (tok) {
734 case XML_TOK_PROLOG_S:
735 return XML_ROLE_NOTATION_NONE;
736 case XML_TOK_LITERAL:
737 state->handler = notation4;
738 return XML_ROLE_NOTATION_PUBLIC_ID;
739 }
740 return common(state, tok);
741 }
742
743 static int PTRCALL
notation3(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))744 notation3(PROLOG_STATE *state,
745 int tok,
746 const char *UNUSED_P(ptr),
747 const char *UNUSED_P(end),
748 const ENCODING *UNUSED_P(enc))
749 {
750 switch (tok) {
751 case XML_TOK_PROLOG_S:
752 return XML_ROLE_NOTATION_NONE;
753 case XML_TOK_LITERAL:
754 state->handler = declClose;
755 state->role_none = XML_ROLE_NOTATION_NONE;
756 return XML_ROLE_NOTATION_SYSTEM_ID;
757 }
758 return common(state, tok);
759 }
760
761 static int PTRCALL
notation4(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))762 notation4(PROLOG_STATE *state,
763 int tok,
764 const char *UNUSED_P(ptr),
765 const char *UNUSED_P(end),
766 const ENCODING *UNUSED_P(enc))
767 {
768 switch (tok) {
769 case XML_TOK_PROLOG_S:
770 return XML_ROLE_NOTATION_NONE;
771 case XML_TOK_LITERAL:
772 state->handler = declClose;
773 state->role_none = XML_ROLE_NOTATION_NONE;
774 return XML_ROLE_NOTATION_SYSTEM_ID;
775 case XML_TOK_DECL_CLOSE:
776 setTopLevel(state);
777 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
778 }
779 return common(state, tok);
780 }
781
782 static int PTRCALL
attlist0(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))783 attlist0(PROLOG_STATE *state,
784 int tok,
785 const char *UNUSED_P(ptr),
786 const char *UNUSED_P(end),
787 const ENCODING *UNUSED_P(enc))
788 {
789 switch (tok) {
790 case XML_TOK_PROLOG_S:
791 return XML_ROLE_ATTLIST_NONE;
792 case XML_TOK_NAME:
793 case XML_TOK_PREFIXED_NAME:
794 state->handler = attlist1;
795 return XML_ROLE_ATTLIST_ELEMENT_NAME;
796 }
797 return common(state, tok);
798 }
799
800 static int PTRCALL
attlist1(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))801 attlist1(PROLOG_STATE *state,
802 int tok,
803 const char *UNUSED_P(ptr),
804 const char *UNUSED_P(end),
805 const ENCODING *UNUSED_P(enc))
806 {
807 switch (tok) {
808 case XML_TOK_PROLOG_S:
809 return XML_ROLE_ATTLIST_NONE;
810 case XML_TOK_DECL_CLOSE:
811 setTopLevel(state);
812 return XML_ROLE_ATTLIST_NONE;
813 case XML_TOK_NAME:
814 case XML_TOK_PREFIXED_NAME:
815 state->handler = attlist2;
816 return XML_ROLE_ATTRIBUTE_NAME;
817 }
818 return common(state, tok);
819 }
820
821 static int PTRCALL
attlist2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)822 attlist2(PROLOG_STATE *state,
823 int tok,
824 const char *ptr,
825 const char *end,
826 const ENCODING *enc)
827 {
828 switch (tok) {
829 case XML_TOK_PROLOG_S:
830 return XML_ROLE_ATTLIST_NONE;
831 case XML_TOK_NAME:
832 {
833 static const char * const types[] = {
834 KW_CDATA,
835 KW_ID,
836 KW_IDREF,
837 KW_IDREFS,
838 KW_ENTITY,
839 KW_ENTITIES,
840 KW_NMTOKEN,
841 KW_NMTOKENS,
842 };
843 int i;
844 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
845 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
846 state->handler = attlist8;
847 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
848 }
849 }
850 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
851 state->handler = attlist5;
852 return XML_ROLE_ATTLIST_NONE;
853 }
854 break;
855 case XML_TOK_OPEN_PAREN:
856 state->handler = attlist3;
857 return XML_ROLE_ATTLIST_NONE;
858 }
859 return common(state, tok);
860 }
861
862 static int PTRCALL
attlist3(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))863 attlist3(PROLOG_STATE *state,
864 int tok,
865 const char *UNUSED_P(ptr),
866 const char *UNUSED_P(end),
867 const ENCODING *UNUSED_P(enc))
868 {
869 switch (tok) {
870 case XML_TOK_PROLOG_S:
871 return XML_ROLE_ATTLIST_NONE;
872 case XML_TOK_NMTOKEN:
873 case XML_TOK_NAME:
874 case XML_TOK_PREFIXED_NAME:
875 state->handler = attlist4;
876 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
877 }
878 return common(state, tok);
879 }
880
881 static int PTRCALL
attlist4(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))882 attlist4(PROLOG_STATE *state,
883 int tok,
884 const char *UNUSED_P(ptr),
885 const char *UNUSED_P(end),
886 const ENCODING *UNUSED_P(enc))
887 {
888 switch (tok) {
889 case XML_TOK_PROLOG_S:
890 return XML_ROLE_ATTLIST_NONE;
891 case XML_TOK_CLOSE_PAREN:
892 state->handler = attlist8;
893 return XML_ROLE_ATTLIST_NONE;
894 case XML_TOK_OR:
895 state->handler = attlist3;
896 return XML_ROLE_ATTLIST_NONE;
897 }
898 return common(state, tok);
899 }
900
901 static int PTRCALL
attlist5(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))902 attlist5(PROLOG_STATE *state,
903 int tok,
904 const char *UNUSED_P(ptr),
905 const char *UNUSED_P(end),
906 const ENCODING *UNUSED_P(enc))
907 {
908 switch (tok) {
909 case XML_TOK_PROLOG_S:
910 return XML_ROLE_ATTLIST_NONE;
911 case XML_TOK_OPEN_PAREN:
912 state->handler = attlist6;
913 return XML_ROLE_ATTLIST_NONE;
914 }
915 return common(state, tok);
916 }
917
918 static int PTRCALL
attlist6(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))919 attlist6(PROLOG_STATE *state,
920 int tok,
921 const char *UNUSED_P(ptr),
922 const char *UNUSED_P(end),
923 const ENCODING *UNUSED_P(enc))
924 {
925 switch (tok) {
926 case XML_TOK_PROLOG_S:
927 return XML_ROLE_ATTLIST_NONE;
928 case XML_TOK_NAME:
929 state->handler = attlist7;
930 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
931 }
932 return common(state, tok);
933 }
934
935 static int PTRCALL
attlist7(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))936 attlist7(PROLOG_STATE *state,
937 int tok,
938 const char *UNUSED_P(ptr),
939 const char *UNUSED_P(end),
940 const ENCODING *UNUSED_P(enc))
941 {
942 switch (tok) {
943 case XML_TOK_PROLOG_S:
944 return XML_ROLE_ATTLIST_NONE;
945 case XML_TOK_CLOSE_PAREN:
946 state->handler = attlist8;
947 return XML_ROLE_ATTLIST_NONE;
948 case XML_TOK_OR:
949 state->handler = attlist6;
950 return XML_ROLE_ATTLIST_NONE;
951 }
952 return common(state, tok);
953 }
954
955 /* default value */
956 static int PTRCALL
attlist8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)957 attlist8(PROLOG_STATE *state,
958 int tok,
959 const char *ptr,
960 const char *end,
961 const ENCODING *enc)
962 {
963 switch (tok) {
964 case XML_TOK_PROLOG_S:
965 return XML_ROLE_ATTLIST_NONE;
966 case XML_TOK_POUND_NAME:
967 if (XmlNameMatchesAscii(enc,
968 ptr + MIN_BYTES_PER_CHAR(enc),
969 end,
970 KW_IMPLIED)) {
971 state->handler = attlist1;
972 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
973 }
974 if (XmlNameMatchesAscii(enc,
975 ptr + MIN_BYTES_PER_CHAR(enc),
976 end,
977 KW_REQUIRED)) {
978 state->handler = attlist1;
979 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
980 }
981 if (XmlNameMatchesAscii(enc,
982 ptr + MIN_BYTES_PER_CHAR(enc),
983 end,
984 KW_FIXED)) {
985 state->handler = attlist9;
986 return XML_ROLE_ATTLIST_NONE;
987 }
988 break;
989 case XML_TOK_LITERAL:
990 state->handler = attlist1;
991 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
992 }
993 return common(state, tok);
994 }
995
996 static int PTRCALL
attlist9(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))997 attlist9(PROLOG_STATE *state,
998 int tok,
999 const char *UNUSED_P(ptr),
1000 const char *UNUSED_P(end),
1001 const ENCODING *UNUSED_P(enc))
1002 {
1003 switch (tok) {
1004 case XML_TOK_PROLOG_S:
1005 return XML_ROLE_ATTLIST_NONE;
1006 case XML_TOK_LITERAL:
1007 state->handler = attlist1;
1008 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
1009 }
1010 return common(state, tok);
1011 }
1012
1013 static int PTRCALL
element0(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1014 element0(PROLOG_STATE *state,
1015 int tok,
1016 const char *UNUSED_P(ptr),
1017 const char *UNUSED_P(end),
1018 const ENCODING *UNUSED_P(enc))
1019 {
1020 switch (tok) {
1021 case XML_TOK_PROLOG_S:
1022 return XML_ROLE_ELEMENT_NONE;
1023 case XML_TOK_NAME:
1024 case XML_TOK_PREFIXED_NAME:
1025 state->handler = element1;
1026 return XML_ROLE_ELEMENT_NAME;
1027 }
1028 return common(state, tok);
1029 }
1030
1031 static int PTRCALL
element1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1032 element1(PROLOG_STATE *state,
1033 int tok,
1034 const char *ptr,
1035 const char *end,
1036 const ENCODING *enc)
1037 {
1038 switch (tok) {
1039 case XML_TOK_PROLOG_S:
1040 return XML_ROLE_ELEMENT_NONE;
1041 case XML_TOK_NAME:
1042 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
1043 state->handler = declClose;
1044 state->role_none = XML_ROLE_ELEMENT_NONE;
1045 return XML_ROLE_CONTENT_EMPTY;
1046 }
1047 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1048 state->handler = declClose;
1049 state->role_none = XML_ROLE_ELEMENT_NONE;
1050 return XML_ROLE_CONTENT_ANY;
1051 }
1052 break;
1053 case XML_TOK_OPEN_PAREN:
1054 state->handler = element2;
1055 state->level = 1;
1056 return XML_ROLE_GROUP_OPEN;
1057 }
1058 return common(state, tok);
1059 }
1060
1061 static int PTRCALL
element2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1062 element2(PROLOG_STATE *state,
1063 int tok,
1064 const char *ptr,
1065 const char *end,
1066 const ENCODING *enc)
1067 {
1068 switch (tok) {
1069 case XML_TOK_PROLOG_S:
1070 return XML_ROLE_ELEMENT_NONE;
1071 case XML_TOK_POUND_NAME:
1072 if (XmlNameMatchesAscii(enc,
1073 ptr + MIN_BYTES_PER_CHAR(enc),
1074 end,
1075 KW_PCDATA)) {
1076 state->handler = element3;
1077 return XML_ROLE_CONTENT_PCDATA;
1078 }
1079 break;
1080 case XML_TOK_OPEN_PAREN:
1081 state->level = 2;
1082 state->handler = element6;
1083 return XML_ROLE_GROUP_OPEN;
1084 case XML_TOK_NAME:
1085 case XML_TOK_PREFIXED_NAME:
1086 state->handler = element7;
1087 return XML_ROLE_CONTENT_ELEMENT;
1088 case XML_TOK_NAME_QUESTION:
1089 state->handler = element7;
1090 return XML_ROLE_CONTENT_ELEMENT_OPT;
1091 case XML_TOK_NAME_ASTERISK:
1092 state->handler = element7;
1093 return XML_ROLE_CONTENT_ELEMENT_REP;
1094 case XML_TOK_NAME_PLUS:
1095 state->handler = element7;
1096 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1097 }
1098 return common(state, tok);
1099 }
1100
1101 static int PTRCALL
element3(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1102 element3(PROLOG_STATE *state,
1103 int tok,
1104 const char *UNUSED_P(ptr),
1105 const char *UNUSED_P(end),
1106 const ENCODING *UNUSED_P(enc))
1107 {
1108 switch (tok) {
1109 case XML_TOK_PROLOG_S:
1110 return XML_ROLE_ELEMENT_NONE;
1111 case XML_TOK_CLOSE_PAREN:
1112 state->handler = declClose;
1113 state->role_none = XML_ROLE_ELEMENT_NONE;
1114 return XML_ROLE_GROUP_CLOSE;
1115 case XML_TOK_CLOSE_PAREN_ASTERISK:
1116 state->handler = declClose;
1117 state->role_none = XML_ROLE_ELEMENT_NONE;
1118 return XML_ROLE_GROUP_CLOSE_REP;
1119 case XML_TOK_OR:
1120 state->handler = element4;
1121 return XML_ROLE_ELEMENT_NONE;
1122 }
1123 return common(state, tok);
1124 }
1125
1126 static int PTRCALL
element4(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1127 element4(PROLOG_STATE *state,
1128 int tok,
1129 const char *UNUSED_P(ptr),
1130 const char *UNUSED_P(end),
1131 const ENCODING *UNUSED_P(enc))
1132 {
1133 switch (tok) {
1134 case XML_TOK_PROLOG_S:
1135 return XML_ROLE_ELEMENT_NONE;
1136 case XML_TOK_NAME:
1137 case XML_TOK_PREFIXED_NAME:
1138 state->handler = element5;
1139 return XML_ROLE_CONTENT_ELEMENT;
1140 }
1141 return common(state, tok);
1142 }
1143
1144 static int PTRCALL
element5(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1145 element5(PROLOG_STATE *state,
1146 int tok,
1147 const char *UNUSED_P(ptr),
1148 const char *UNUSED_P(end),
1149 const ENCODING *UNUSED_P(enc))
1150 {
1151 switch (tok) {
1152 case XML_TOK_PROLOG_S:
1153 return XML_ROLE_ELEMENT_NONE;
1154 case XML_TOK_CLOSE_PAREN_ASTERISK:
1155 state->handler = declClose;
1156 state->role_none = XML_ROLE_ELEMENT_NONE;
1157 return XML_ROLE_GROUP_CLOSE_REP;
1158 case XML_TOK_OR:
1159 state->handler = element4;
1160 return XML_ROLE_ELEMENT_NONE;
1161 }
1162 return common(state, tok);
1163 }
1164
1165 static int PTRCALL
element6(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1166 element6(PROLOG_STATE *state,
1167 int tok,
1168 const char *UNUSED_P(ptr),
1169 const char *UNUSED_P(end),
1170 const ENCODING *UNUSED_P(enc))
1171 {
1172 switch (tok) {
1173 case XML_TOK_PROLOG_S:
1174 return XML_ROLE_ELEMENT_NONE;
1175 case XML_TOK_OPEN_PAREN:
1176 state->level += 1;
1177 return XML_ROLE_GROUP_OPEN;
1178 case XML_TOK_NAME:
1179 case XML_TOK_PREFIXED_NAME:
1180 state->handler = element7;
1181 return XML_ROLE_CONTENT_ELEMENT;
1182 case XML_TOK_NAME_QUESTION:
1183 state->handler = element7;
1184 return XML_ROLE_CONTENT_ELEMENT_OPT;
1185 case XML_TOK_NAME_ASTERISK:
1186 state->handler = element7;
1187 return XML_ROLE_CONTENT_ELEMENT_REP;
1188 case XML_TOK_NAME_PLUS:
1189 state->handler = element7;
1190 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1191 }
1192 return common(state, tok);
1193 }
1194
1195 static int PTRCALL
element7(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1196 element7(PROLOG_STATE *state,
1197 int tok,
1198 const char *UNUSED_P(ptr),
1199 const char *UNUSED_P(end),
1200 const ENCODING *UNUSED_P(enc))
1201 {
1202 switch (tok) {
1203 case XML_TOK_PROLOG_S:
1204 return XML_ROLE_ELEMENT_NONE;
1205 case XML_TOK_CLOSE_PAREN:
1206 state->level -= 1;
1207 if (state->level == 0) {
1208 state->handler = declClose;
1209 state->role_none = XML_ROLE_ELEMENT_NONE;
1210 }
1211 return XML_ROLE_GROUP_CLOSE;
1212 case XML_TOK_CLOSE_PAREN_ASTERISK:
1213 state->level -= 1;
1214 if (state->level == 0) {
1215 state->handler = declClose;
1216 state->role_none = XML_ROLE_ELEMENT_NONE;
1217 }
1218 return XML_ROLE_GROUP_CLOSE_REP;
1219 case XML_TOK_CLOSE_PAREN_QUESTION:
1220 state->level -= 1;
1221 if (state->level == 0) {
1222 state->handler = declClose;
1223 state->role_none = XML_ROLE_ELEMENT_NONE;
1224 }
1225 return XML_ROLE_GROUP_CLOSE_OPT;
1226 case XML_TOK_CLOSE_PAREN_PLUS:
1227 state->level -= 1;
1228 if (state->level == 0) {
1229 state->handler = declClose;
1230 state->role_none = XML_ROLE_ELEMENT_NONE;
1231 }
1232 return XML_ROLE_GROUP_CLOSE_PLUS;
1233 case XML_TOK_COMMA:
1234 state->handler = element6;
1235 return XML_ROLE_GROUP_SEQUENCE;
1236 case XML_TOK_OR:
1237 state->handler = element6;
1238 return XML_ROLE_GROUP_CHOICE;
1239 }
1240 return common(state, tok);
1241 }
1242
1243 #ifdef XML_DTD
1244
1245 static int PTRCALL
condSect0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1246 condSect0(PROLOG_STATE *state,
1247 int tok,
1248 const char *ptr,
1249 const char *end,
1250 const ENCODING *enc)
1251 {
1252 switch (tok) {
1253 case XML_TOK_PROLOG_S:
1254 return XML_ROLE_NONE;
1255 case XML_TOK_NAME:
1256 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1257 state->handler = condSect1;
1258 return XML_ROLE_NONE;
1259 }
1260 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1261 state->handler = condSect2;
1262 return XML_ROLE_NONE;
1263 }
1264 break;
1265 }
1266 return common(state, tok);
1267 }
1268
1269 static int PTRCALL
condSect1(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1270 condSect1(PROLOG_STATE *state,
1271 int tok,
1272 const char *UNUSED_P(ptr),
1273 const char *UNUSED_P(end),
1274 const ENCODING *UNUSED_P(enc))
1275 {
1276 switch (tok) {
1277 case XML_TOK_PROLOG_S:
1278 return XML_ROLE_NONE;
1279 case XML_TOK_OPEN_BRACKET:
1280 state->handler = externalSubset1;
1281 state->includeLevel += 1;
1282 return XML_ROLE_NONE;
1283 }
1284 return common(state, tok);
1285 }
1286
1287 static int PTRCALL
condSect2(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1288 condSect2(PROLOG_STATE *state,
1289 int tok,
1290 const char *UNUSED_P(ptr),
1291 const char *UNUSED_P(end),
1292 const ENCODING *UNUSED_P(enc))
1293 {
1294 switch (tok) {
1295 case XML_TOK_PROLOG_S:
1296 return XML_ROLE_NONE;
1297 case XML_TOK_OPEN_BRACKET:
1298 state->handler = externalSubset1;
1299 return XML_ROLE_IGNORE_SECT;
1300 }
1301 return common(state, tok);
1302 }
1303
1304 #endif /* XML_DTD */
1305
1306 static int PTRCALL
declClose(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1307 declClose(PROLOG_STATE *state,
1308 int tok,
1309 const char *UNUSED_P(ptr),
1310 const char *UNUSED_P(end),
1311 const ENCODING *UNUSED_P(enc))
1312 {
1313 switch (tok) {
1314 case XML_TOK_PROLOG_S:
1315 return state->role_none;
1316 case XML_TOK_DECL_CLOSE:
1317 setTopLevel(state);
1318 return state->role_none;
1319 }
1320 return common(state, tok);
1321 }
1322
1323 /* This function will only be invoked if the internal logic of the
1324 * parser has broken down. It is used in two cases:
1325 *
1326 * 1: When the XML prolog has been finished. At this point the
1327 * processor (the parser level above these role handlers) should
1328 * switch from prologProcessor to contentProcessor and reinitialise
1329 * the handler function.
1330 *
1331 * 2: When an error has been detected (via common() below). At this
1332 * point again the processor should be switched to errorProcessor,
1333 * which will never call a handler.
1334 *
1335 * The result of this is that error() can only be called if the
1336 * processor switch failed to happen, which is an internal error and
1337 * therefore we shouldn't be able to provoke it simply by using the
1338 * library. It is a necessary backstop, however, so we merely exclude
1339 * it from the coverage statistics.
1340 *
1341 * LCOV_EXCL_START
1342 */
1343 static int PTRCALL
error(PROLOG_STATE * UNUSED_P (state),int UNUSED_P (tok),const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1344 error(PROLOG_STATE *UNUSED_P(state),
1345 int UNUSED_P(tok),
1346 const char *UNUSED_P(ptr),
1347 const char *UNUSED_P(end),
1348 const ENCODING *UNUSED_P(enc))
1349 {
1350 return XML_ROLE_NONE;
1351 }
1352 /* LCOV_EXCL_STOP */
1353
1354 static int FASTCALL
common(PROLOG_STATE * state,int tok)1355 common(PROLOG_STATE *state, int tok)
1356 {
1357 #ifdef XML_DTD
1358 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1359 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1360 #endif
1361 state->handler = error;
1362 return XML_ROLE_ERROR;
1363 }
1364
1365 void
XmlPrologStateInit(PROLOG_STATE * state)1366 XmlPrologStateInit(PROLOG_STATE *state)
1367 {
1368 state->handler = prolog0;
1369 #ifdef XML_DTD
1370 state->documentEntity = 1;
1371 state->includeLevel = 0;
1372 state->inEntityValue = 0;
1373 #endif /* XML_DTD */
1374 }
1375
1376 #ifdef XML_DTD
1377
1378 void
XmlPrologStateInitExternalEntity(PROLOG_STATE * state)1379 XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1380 {
1381 state->handler = externalSubset0;
1382 state->documentEntity = 0;
1383 state->includeLevel = 0;
1384 }
1385
1386 #endif /* XML_DTD */
1387