1 use crate::reader::error::SyntaxError; 2 use crate::common::{is_name_char, is_name_start_char, is_whitespace_char}; 3 use crate::reader::lexer::Token; 4 5 use super::{DoctypeSubstate, PullParser, QuoteToken, Result, State}; 6 7 impl PullParser { inside_doctype(&mut self, t: Token, substate: DoctypeSubstate) -> Option<Result>8 pub fn inside_doctype(&mut self, t: Token, substate: DoctypeSubstate) -> Option<Result> { 9 match substate { 10 DoctypeSubstate::Outside => match t { 11 Token::TagEnd => self.into_state_continue(State::OutsideTag), 12 Token::MarkupDeclarationStart => { 13 self.buf.clear(); 14 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::InsideName)) 15 }, 16 Token::Character('%') => { 17 self.data.ref_data.clear(); 18 self.data.ref_data.push('%'); 19 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInDtd)) 20 }, 21 Token::CommentStart => { 22 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Comment)) 23 }, 24 Token::SingleQuote | Token::DoubleQuote => { 25 // just discard string literals 26 self.data.quote = Some(super::QuoteToken::from_token(&t)); 27 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::String)) 28 }, 29 Token::CDataEnd | Token::CDataStart => Some(self.error(SyntaxError::UnexpectedToken(t))), 30 // TODO: parse SYSTEM, and [ 31 _ => None, 32 }, 33 DoctypeSubstate::String => match t { 34 Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => None, 35 Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => None, 36 Token::SingleQuote | Token::DoubleQuote => { 37 self.data.quote = None; 38 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside)) 39 }, 40 _ => None, 41 }, 42 DoctypeSubstate::Comment => match t { 43 Token::CommentEnd => { 44 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside)) 45 }, 46 _ => None, 47 }, 48 DoctypeSubstate::InsideName => match t { 49 Token::Character(c @ 'A'..='Z') => { 50 self.buf.push(c); 51 None 52 }, 53 Token::Character(c) if is_whitespace_char(c) => { 54 let buf = self.take_buf(); 55 match buf.as_str() { 56 "ENTITY" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityName)), 57 "NOTATION" | "ELEMENT" | "ATTLIST" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)), 58 _ => Some(self.error(SyntaxError::UnknownMarkupDeclaration(buf.into()))), 59 } 60 }, 61 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 62 }, 63 DoctypeSubstate::BeforeEntityName => { 64 self.data.name.clear(); 65 match t { 66 Token::Character(c) if is_whitespace_char(c) => None, 67 Token::Character('%') => { // % is for PEDecl 68 self.data.name.push('%'); 69 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinitionStart)) 70 }, 71 Token::Character(c) if is_name_start_char(c) => { 72 if self.data.name.len() > self.config.max_name_length { 73 return Some(self.error(SyntaxError::ExceededConfiguredLimit)); 74 } 75 self.data.name.push(c); 76 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityName)) 77 }, 78 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), 79 } 80 }, 81 DoctypeSubstate::EntityName => match t { 82 Token::Character(c) if is_whitespace_char(c) => { 83 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue)) 84 }, 85 Token::Character(c) if is_name_char(c) => { 86 if self.data.name.len() > self.config.max_name_length { 87 return Some(self.error(SyntaxError::ExceededConfiguredLimit)); 88 } 89 self.data.name.push(c); 90 None 91 }, 92 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), 93 }, 94 DoctypeSubstate::BeforeEntityValue => { 95 self.buf.clear(); 96 match t { 97 Token::Character(c) if is_whitespace_char(c) => None, 98 // SYSTEM/PUBLIC not supported 99 Token::Character('S' | 'P') => { 100 let name = self.data.take_name(); 101 self.entities.entry(name).or_insert_with(String::new); // Dummy value, but at least the name is recognized 102 103 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)) 104 }, 105 Token::SingleQuote | Token::DoubleQuote => { 106 self.data.quote = Some(super::QuoteToken::from_token(&t)); 107 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue)) 108 }, 109 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), 110 } 111 }, 112 DoctypeSubstate::EntityValue => match t { 113 Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => { self.buf.push('\''); None }, 114 Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => { self.buf.push('"'); None }, 115 Token::SingleQuote | Token::DoubleQuote => { 116 self.data.quote = None; 117 let name = self.data.take_name(); 118 let val = self.take_buf(); 119 self.entities.entry(name).or_insert(val); // First wins 120 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)) // FIXME 121 }, 122 Token::ReferenceStart | Token::Character('&') => { 123 self.data.ref_data.clear(); 124 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReferenceStart)) 125 }, 126 Token::Character('%') => { 127 self.data.ref_data.clear(); 128 self.data.ref_data.push('%'); // include literal % in the name to distinguish from regular entities 129 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInValue)) 130 }, 131 Token::Character(c) if !self.is_valid_xml_char(c) => { 132 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) 133 }, 134 Token::Character(c) => { 135 self.buf.push(c); 136 None 137 }, 138 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), 139 }, 140 DoctypeSubstate::PEReferenceDefinitionStart => match t { 141 Token::Character(c) if is_whitespace_char(c) => { 142 None 143 }, 144 Token::Character(c) if is_name_start_char(c) => { 145 debug_assert_eq!(self.data.name, "%"); 146 self.data.name.push(c); 147 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinition)) 148 }, 149 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), 150 }, 151 DoctypeSubstate::PEReferenceDefinition => match t { 152 Token::Character(c) if is_name_char(c) => { 153 if self.data.name.len() > self.config.max_name_length { 154 return Some(self.error(SyntaxError::ExceededConfiguredLimit)); 155 } 156 self.data.name.push(c); 157 None 158 }, 159 Token::Character(c) if is_whitespace_char(c) => { 160 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue)) 161 }, 162 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), 163 }, 164 DoctypeSubstate::PEReferenceInDtd => match t { 165 Token::Character(c) if is_name_char(c) => { 166 self.data.ref_data.push(c); 167 None 168 }, 169 Token::ReferenceEnd | Token::Character(';') => { 170 let name = self.data.take_ref_data(); 171 match self.entities.get(&name) { 172 Some(ent) => { 173 if let Err(e) = self.lexer.reparse(ent) { 174 return Some(Err(e)); 175 } 176 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside)) 177 }, 178 None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))), 179 } 180 }, 181 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), 182 }, 183 DoctypeSubstate::PEReferenceInValue => match t { 184 Token::Character(c) if is_name_char(c) => { 185 self.data.ref_data.push(c); 186 None 187 }, 188 Token::ReferenceEnd | Token::Character(';') => { 189 let name = self.data.take_ref_data(); 190 match self.entities.get(&name) { 191 Some(ent) => { 192 self.buf.push_str(ent); 193 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue)) 194 }, 195 None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))), 196 } 197 }, 198 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), 199 }, 200 DoctypeSubstate::NumericReferenceStart => match t { 201 Token::Character('#') => { 202 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReference)) 203 }, 204 Token::Character(c) if !self.is_valid_xml_char(c) => { 205 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) 206 }, 207 Token::Character(c) => { 208 self.buf.push('&'); 209 self.buf.push(c); 210 // named entities are not expanded inside doctype 211 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue)) 212 }, 213 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), 214 }, 215 DoctypeSubstate::NumericReference => match t { 216 Token::ReferenceEnd | Token::Character(';') => { 217 let r = self.data.take_ref_data(); 218 // https://www.w3.org/TR/xml/#sec-entexpand 219 match self.numeric_reference_from_str(&r) { 220 Ok(c) => { 221 self.buf.push(c); 222 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue)) 223 } 224 Err(e) => Some(self.error(e)), 225 } 226 }, 227 Token::Character(c) if !self.is_valid_xml_char(c) => { 228 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) 229 }, 230 Token::Character(c) => { 231 self.data.ref_data.push(c); 232 None 233 }, 234 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), 235 }, 236 DoctypeSubstate::SkipDeclaration => match t { 237 Token::TagEnd => { 238 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside)) 239 }, 240 _ => None, 241 }, 242 } 243 } 244 } 245