1 use crate::reader::error::SyntaxError; 2 use crate::common::is_whitespace_char; 3 use crate::reader::events::XmlEvent; 4 use crate::reader::lexer::Token; 5 6 use super::{ 7 ClosingTagSubstate, DoctypeSubstate, Encountered, OpeningTagSubstate, 8 ProcessingInstructionSubstate, PullParser, Result, State, 9 }; 10 11 impl PullParser { outside_tag(&mut self, t: Token) -> Option<Result>12 pub fn outside_tag(&mut self, t: Token) -> Option<Result> { 13 match t { 14 Token::Character(c) => { 15 if is_whitespace_char(c) { 16 // skip whitespace outside of the root element 17 if (self.config.c.trim_whitespace && self.buf.is_empty()) || 18 (self.depth() == 0 && self.config.c.ignore_root_level_whitespace) { 19 return None; 20 } 21 } else { 22 self.inside_whitespace = false; 23 if self.depth() == 0 { 24 return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t))); 25 } 26 } 27 28 if !self.is_valid_xml_char_not_restricted(c) { 29 return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))); 30 } 31 32 if self.buf.is_empty() { 33 self.push_pos(); 34 } else if self.buf.len() > self.config.max_data_length { 35 return Some(self.error(SyntaxError::ExceededConfiguredLimit)); 36 } 37 self.buf.push(c); 38 None 39 }, 40 41 Token::CommentEnd | Token::TagEnd | Token::EqualsSign | 42 Token::DoubleQuote | Token::SingleQuote | 43 Token::ProcessingInstructionEnd | Token::EmptyTagEnd => { 44 if self.depth() == 0 { 45 return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t))); 46 } 47 self.inside_whitespace = false; 48 49 if let Some(s) = t.as_static_str() { 50 if self.buf.is_empty() { 51 self.push_pos(); 52 } else if self.buf.len() > self.config.max_data_length { 53 return Some(self.error(SyntaxError::ExceededConfiguredLimit)); 54 } 55 56 self.buf.push_str(s); 57 } 58 None 59 }, 60 61 Token::ReferenceStart if self.depth() > 0 => { 62 self.state_after_reference = State::OutsideTag; 63 self.into_state_continue(State::InsideReference) 64 }, 65 66 Token::ReferenceEnd if self.depth() > 0 => { // Semi-colon in a text outside an entity 67 self.inside_whitespace = false; 68 if self.buf.len() > self.config.max_data_length { 69 return Some(self.error(SyntaxError::ExceededConfiguredLimit)); 70 } 71 Token::ReferenceEnd.push_to_string(&mut self.buf); 72 None 73 }, 74 75 Token::CommentStart if self.config.c.coalesce_characters && self.config.c.ignore_comments => { 76 let next_event = self.set_encountered(Encountered::Comment); 77 // We need to switch the lexer into a comment mode inside comments 78 self.into_state(State::InsideComment, next_event) 79 } 80 81 Token::CDataStart if self.depth() > 0 && self.config.c.coalesce_characters && self.config.c.cdata_to_characters => { 82 if self.buf.is_empty() { 83 self.push_pos(); 84 } 85 self.into_state_continue(State::InsideCData) 86 }, 87 88 _ => { 89 // Encountered some markup event, flush the buffer as characters 90 // or a whitespace 91 let mut next_event = if self.buf_has_data() { 92 let buf = self.take_buf(); 93 if self.inside_whitespace && self.config.c.trim_whitespace { 94 None 95 } else if self.inside_whitespace && !self.config.c.whitespace_to_characters { 96 debug_assert!(buf.chars().all(|ch| ch.is_whitespace()), "ws={buf:?}"); 97 Some(Ok(XmlEvent::Whitespace(buf))) 98 } else if self.config.c.trim_whitespace { 99 Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into()))) 100 } else { 101 Some(Ok(XmlEvent::Characters(buf))) 102 } 103 } else { None }; 104 self.inside_whitespace = true; // Reset inside_whitespace flag 105 106 // pos is popped whenever an event is emitted, so pushes must happen only if there will be an event to balance it 107 // and ignored comments don't pop 108 if t != Token::CommentStart || !self.config.c.ignore_comments { 109 self.push_pos(); 110 } 111 match t { 112 Token::OpeningTagStart if self.depth() > 0 || self.encountered < Encountered::Element || self.config.allow_multiple_root_elements => { 113 if let Some(e) = self.set_encountered(Encountered::Element) { 114 next_event = Some(e); 115 } 116 self.nst.push_empty(); 117 self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event) 118 }, 119 120 Token::ClosingTagStart if self.depth() > 0 => 121 self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event), 122 123 Token::CommentStart => { 124 if let Some(e) = self.set_encountered(Encountered::Comment) { 125 next_event = Some(e); 126 } 127 // We need to switch the lexer into a comment mode inside comments 128 self.into_state(State::InsideComment, next_event) 129 }, 130 131 Token::DoctypeStart if self.encountered < Encountered::Doctype => { 132 if let Some(e) = self.set_encountered(Encountered::Doctype) { 133 next_event = Some(e); 134 } 135 136 // We don't have a doctype event so skip this position 137 // FIXME: update when we have a doctype event 138 self.next_pos(); 139 self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event) 140 }, 141 142 Token::ProcessingInstructionStart => 143 self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event), 144 145 Token::CDataStart if self.depth() > 0 => { 146 self.into_state(State::InsideCData, next_event) 147 }, 148 149 _ => Some(self.error(SyntaxError::UnexpectedToken(t))) 150 } 151 } 152 } 153 } 154 document_start(&mut self, t: Token) -> Option<Result>155 pub fn document_start(&mut self, t: Token) -> Option<Result> { 156 debug_assert!(self.encountered < Encountered::Declaration); 157 158 match t { 159 Token::Character(c) => { 160 let next_event = self.set_encountered(Encountered::AnyChars); 161 162 if !is_whitespace_char(c) { 163 return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t))); 164 } 165 self.inside_whitespace = true; 166 167 // skip whitespace outside of the root element 168 if (self.config.c.trim_whitespace && self.buf.is_empty()) || 169 (self.depth() == 0 && self.config.c.ignore_root_level_whitespace) { 170 return self.into_state(State::OutsideTag, next_event); 171 } 172 173 self.push_pos(); 174 self.buf.push(c); 175 self.into_state(State::OutsideTag, next_event) 176 }, 177 178 Token::CommentStart => { 179 let next_event = self.set_encountered(Encountered::Comment); 180 self.into_state(State::InsideComment, next_event) 181 } 182 183 Token::OpeningTagStart => { 184 let next_event = self.set_encountered(Encountered::Element); 185 self.nst.push_empty(); 186 self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event) 187 }, 188 189 Token::DoctypeStart => { 190 let next_event = self.set_encountered(Encountered::Doctype); 191 // We don't have a doctype event so skip this position 192 // FIXME: update when we have a doctype event 193 self.next_pos(); 194 self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event) 195 }, 196 197 Token::ProcessingInstructionStart => { 198 self.push_pos(); 199 self.into_state_continue(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName)) 200 }, 201 202 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 203 } 204 } 205 } 206