1 use crate::common::{is_whitespace_char, XmlVersion}; 2 use crate::reader::error::SyntaxError; 3 use crate::reader::events::XmlEvent; 4 use crate::reader::lexer::Token; 5 use crate::util::Encoding; 6 7 use super::{ 8 DeclarationSubstate, Encountered, PullParser, QualifiedNameTarget, Result, State, 9 DEFAULT_VERSION, 10 }; 11 12 impl PullParser { 13 #[inline(never)] emit_start_document(&mut self) -> Option<Result>14 fn emit_start_document(&mut self) -> Option<Result> { 15 debug_assert!(self.encountered == Encountered::None); 16 self.encountered = Encountered::Declaration; 17 18 let version = self.data.version; 19 let encoding = self.data.take_encoding(); 20 let standalone = self.data.standalone; 21 22 if let Some(new_encoding) = encoding.as_deref() { 23 let new_encoding = match new_encoding.parse() { 24 Ok(e) => e, 25 Err(_) if self.config.ignore_invalid_encoding_declarations => Encoding::Latin1, 26 Err(_) => return Some(self.error(SyntaxError::UnsupportedEncoding(new_encoding.into()))), 27 }; 28 let current_encoding = self.lexer.encoding(); 29 if current_encoding != new_encoding { 30 let set = match (current_encoding, new_encoding) { 31 (Encoding::Unknown | Encoding::Default, new) if new != Encoding::Utf16 => new, 32 (Encoding::Utf16Be | Encoding::Utf16Le, Encoding::Utf16) => current_encoding, 33 _ if self.config.ignore_invalid_encoding_declarations => current_encoding, 34 _ => return Some(self.error(SyntaxError::ConflictingEncoding(new_encoding, current_encoding))), 35 }; 36 self.lexer.set_encoding(set); 37 } 38 } 39 40 let current_encoding = self.lexer.encoding(); 41 self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument { 42 version: version.unwrap_or(DEFAULT_VERSION), 43 encoding: encoding.unwrap_or_else(move || current_encoding.to_string()), 44 standalone 45 })) 46 } 47 48 // TODO: remove redundancy via macros or extra methods inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result>49 pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> { 50 51 match s { 52 DeclarationSubstate::BeforeVersion => match t { 53 Token::Character('v') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)), 54 Token::Character(c) if is_whitespace_char(c) => None, // continue 55 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 56 }, 57 58 DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { 59 match &*name.local_name { 60 "ersion" if name.namespace.is_none() => 61 this.into_state_continue(State::InsideDeclaration( 62 if token == Token::EqualsSign { 63 DeclarationSubstate::InsideVersionValue 64 } else { 65 DeclarationSubstate::AfterVersion 66 } 67 )), 68 _ => Some(this.error(SyntaxError::UnexpectedNameInsideXml(name.to_string().into()))), 69 } 70 }), 71 72 DeclarationSubstate::AfterVersion => match t { 73 Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)), 74 Token::Character(c) if is_whitespace_char(c) => None, 75 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 76 }, 77 78 DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| { 79 this.data.version = match &*value { 80 "1.0" => Some(XmlVersion::Version10), 81 "1.1" => Some(XmlVersion::Version11), 82 _ => None 83 }; 84 if this.data.version.is_some() { 85 this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue)) 86 } else { 87 Some(this.error(SyntaxError::UnexpectedXmlVersion(value.into()))) 88 } 89 }), 90 91 DeclarationSubstate::AfterVersionValue => match t { 92 Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)), 93 Token::ProcessingInstructionEnd => self.emit_start_document(), 94 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 95 }, 96 97 DeclarationSubstate::BeforeEncoding => match t { 98 Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)), 99 Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)), 100 Token::ProcessingInstructionEnd => self.emit_start_document(), 101 Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace 102 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 103 }, 104 105 DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { 106 match &*name.local_name { 107 "ncoding" if name.namespace.is_none() => 108 this.into_state_continue(State::InsideDeclaration( 109 if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding } 110 )), 111 _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))) 112 } 113 }), 114 115 DeclarationSubstate::AfterEncoding => match t { 116 Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)), 117 Token::Character(c) if is_whitespace_char(c) => None, 118 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 119 }, 120 121 DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| { 122 this.data.encoding = Some(value); 123 this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue)) 124 }), 125 126 DeclarationSubstate::AfterEncodingValue => match t { 127 Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)), 128 Token::ProcessingInstructionEnd => self.emit_start_document(), 129 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 130 }, 131 132 DeclarationSubstate::BeforeStandaloneDecl => match t { 133 Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)), 134 Token::ProcessingInstructionEnd => self.emit_start_document(), 135 Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace 136 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 137 }, 138 139 DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { 140 match &*name.local_name { 141 "tandalone" if name.namespace.is_none() => 142 this.into_state_continue(State::InsideDeclaration( 143 if token == Token::EqualsSign { 144 DeclarationSubstate::InsideStandaloneDeclValue 145 } else { 146 DeclarationSubstate::AfterStandaloneDecl 147 } 148 )), 149 _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))), 150 } 151 }), 152 153 DeclarationSubstate::AfterStandaloneDecl => match t { 154 Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)), 155 Token::Character(c) if is_whitespace_char(c) => None, 156 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 157 }, 158 159 DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| { 160 let standalone = match &*value { 161 "yes" => Some(true), 162 "no" => Some(false), 163 _ => None 164 }; 165 if standalone.is_some() { 166 this.data.standalone = standalone; 167 this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue)) 168 } else { 169 Some(this.error(SyntaxError::InvalidStandaloneDeclaration(value.into()))) 170 } 171 }), 172 173 DeclarationSubstate::AfterStandaloneDeclValue => match t { 174 Token::ProcessingInstructionEnd => self.emit_start_document(), 175 Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace 176 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 177 }, 178 } 179 } 180 } 181