• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::reader::error::SyntaxError;
2 use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
3 use crate::reader::lexer::Token;
4 
5 use super::{DoctypeSubstate, PullParser, QuoteToken, Result, State};
6 
7 impl PullParser {
inside_doctype(&mut self, t: Token, substate: DoctypeSubstate) -> Option<Result>8     pub fn inside_doctype(&mut self, t: Token, substate: DoctypeSubstate) -> Option<Result> {
9         match substate {
10             DoctypeSubstate::Outside => match t {
11                 Token::TagEnd => self.into_state_continue(State::OutsideTag),
12                 Token::MarkupDeclarationStart => {
13                     self.buf.clear();
14                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::InsideName))
15                 },
16                 Token::Character('%') => {
17                     self.data.ref_data.clear();
18                     self.data.ref_data.push('%');
19                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInDtd))
20                 },
21                 Token::CommentStart => {
22                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Comment))
23                 },
24                 Token::SingleQuote | Token::DoubleQuote => {
25                     // just discard string literals
26                     self.data.quote = Some(super::QuoteToken::from_token(&t));
27                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::String))
28                 },
29                 Token::CDataEnd | Token::CDataStart => Some(self.error(SyntaxError::UnexpectedToken(t))),
30                 // TODO: parse SYSTEM, and [
31                 _ => None,
32             },
33             DoctypeSubstate::String => match t {
34                 Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => None,
35                 Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => None,
36                 Token::SingleQuote | Token::DoubleQuote => {
37                     self.data.quote = None;
38                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
39                 },
40                 _ => None,
41             },
42             DoctypeSubstate::Comment => match t {
43                 Token::CommentEnd => {
44                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
45                 },
46                 _ => None,
47             },
48             DoctypeSubstate::InsideName => match t {
49                 Token::Character(c @ 'A'..='Z') => {
50                     self.buf.push(c);
51                     None
52                 },
53                 Token::Character(c) if is_whitespace_char(c) => {
54                     let buf = self.take_buf();
55                     match buf.as_str() {
56                         "ENTITY" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityName)),
57                         "NOTATION" | "ELEMENT" | "ATTLIST" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)),
58                         _ => Some(self.error(SyntaxError::UnknownMarkupDeclaration(buf.into()))),
59                     }
60                 },
61                 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
62             },
63             DoctypeSubstate::BeforeEntityName => {
64                 self.data.name.clear();
65                 match t {
66                     Token::Character(c) if is_whitespace_char(c) => None,
67                     Token::Character('%') => { // % is for PEDecl
68                         self.data.name.push('%');
69                         self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinitionStart))
70                     },
71                     Token::Character(c) if is_name_start_char(c) => {
72                         if self.data.name.len() > self.config.max_name_length {
73                             return Some(self.error(SyntaxError::ExceededConfiguredLimit));
74                         }
75                         self.data.name.push(c);
76                         self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityName))
77                     },
78                     _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
79                 }
80             },
81             DoctypeSubstate::EntityName => match t {
82                 Token::Character(c) if is_whitespace_char(c) => {
83                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue))
84                 },
85                 Token::Character(c) if is_name_char(c) => {
86                     if self.data.name.len() > self.config.max_name_length {
87                         return Some(self.error(SyntaxError::ExceededConfiguredLimit));
88                     }
89                     self.data.name.push(c);
90                     None
91                 },
92                 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
93             },
94             DoctypeSubstate::BeforeEntityValue => {
95                 self.buf.clear();
96                 match t {
97                     Token::Character(c) if is_whitespace_char(c) => None,
98                     // SYSTEM/PUBLIC not supported
99                     Token::Character('S' | 'P') => {
100                         let name = self.data.take_name();
101                         self.entities.entry(name).or_insert_with(String::new); // Dummy value, but at least the name is recognized
102 
103                         self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration))
104                     },
105                     Token::SingleQuote | Token::DoubleQuote => {
106                         self.data.quote = Some(super::QuoteToken::from_token(&t));
107                         self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
108                     },
109                     _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
110                 }
111             },
112             DoctypeSubstate::EntityValue => match t {
113                 Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => { self.buf.push('\''); None },
114                 Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => { self.buf.push('"'); None },
115                 Token::SingleQuote | Token::DoubleQuote => {
116                     self.data.quote = None;
117                     let name = self.data.take_name();
118                     let val = self.take_buf();
119                     self.entities.entry(name).or_insert(val); // First wins
120                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)) // FIXME
121                 },
122                 Token::ReferenceStart | Token::Character('&') => {
123                     self.data.ref_data.clear();
124                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReferenceStart))
125                 },
126                 Token::Character('%') => {
127                     self.data.ref_data.clear();
128                     self.data.ref_data.push('%'); // include literal % in the name to distinguish from regular entities
129                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInValue))
130                 },
131                 Token::Character(c) if !self.is_valid_xml_char(c) => {
132                     Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
133                 },
134                 Token::Character(c) => {
135                     self.buf.push(c);
136                     None
137                 },
138                 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
139             },
140             DoctypeSubstate::PEReferenceDefinitionStart => match t {
141                 Token::Character(c) if is_whitespace_char(c) => {
142                     None
143                 },
144                 Token::Character(c) if is_name_start_char(c) => {
145                     debug_assert_eq!(self.data.name, "%");
146                     self.data.name.push(c);
147                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinition))
148                 },
149                 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
150             },
151             DoctypeSubstate::PEReferenceDefinition => match t {
152                 Token::Character(c) if is_name_char(c) => {
153                     if self.data.name.len() > self.config.max_name_length {
154                         return Some(self.error(SyntaxError::ExceededConfiguredLimit));
155                     }
156                     self.data.name.push(c);
157                     None
158                 },
159                 Token::Character(c) if is_whitespace_char(c) => {
160                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue))
161                 },
162                 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
163             },
164             DoctypeSubstate::PEReferenceInDtd => match t {
165                 Token::Character(c) if is_name_char(c) => {
166                     self.data.ref_data.push(c);
167                     None
168                 },
169                 Token::ReferenceEnd | Token::Character(';') => {
170                     let name = self.data.take_ref_data();
171                     match self.entities.get(&name) {
172                         Some(ent) => {
173                             if let Err(e) = self.lexer.reparse(ent) {
174                                 return Some(Err(e));
175                             }
176                             self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
177                         },
178                         None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))),
179                     }
180                 },
181                 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
182             },
183             DoctypeSubstate::PEReferenceInValue => match t {
184                 Token::Character(c) if is_name_char(c) => {
185                     self.data.ref_data.push(c);
186                     None
187                 },
188                 Token::ReferenceEnd | Token::Character(';') => {
189                     let name = self.data.take_ref_data();
190                     match self.entities.get(&name) {
191                         Some(ent) => {
192                             self.buf.push_str(ent);
193                             self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
194                         },
195                         None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))),
196                     }
197                 },
198                 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
199             },
200             DoctypeSubstate::NumericReferenceStart => match t {
201                 Token::Character('#') => {
202                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReference))
203                 },
204                 Token::Character(c) if !self.is_valid_xml_char(c) => {
205                     Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
206                 },
207                 Token::Character(c) => {
208                     self.buf.push('&');
209                     self.buf.push(c);
210                     // named entities are not expanded inside doctype
211                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
212                 },
213                 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
214             },
215             DoctypeSubstate::NumericReference => match t {
216                 Token::ReferenceEnd | Token::Character(';') => {
217                     let r = self.data.take_ref_data();
218                     // https://www.w3.org/TR/xml/#sec-entexpand
219                     match self.numeric_reference_from_str(&r) {
220                         Ok(c) => {
221                             self.buf.push(c);
222                             self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
223                         }
224                         Err(e) => Some(self.error(e)),
225                     }
226                 },
227                 Token::Character(c) if !self.is_valid_xml_char(c) => {
228                     Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
229                 },
230                 Token::Character(c) => {
231                     self.data.ref_data.push(c);
232                     None
233                 },
234                 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
235             },
236             DoctypeSubstate::SkipDeclaration => match t {
237                 Token::TagEnd => {
238                     self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
239                 },
240                 _ => None,
241             },
242         }
243     }
244 }
245