1 use crate::reader::error::SyntaxError; 2 use crate::common::{is_name_char, is_name_start_char, is_whitespace_char}; 3 4 use crate::reader::events::XmlEvent; 5 use crate::reader::lexer::Token; 6 7 use super::{DeclarationSubstate, ProcessingInstructionSubstate, PullParser, Result, State, Encountered}; 8 9 impl PullParser { inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result>10 pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> { 11 match s { 12 ProcessingInstructionSubstate::PIInsideName => match t { 13 Token::Character(c) if self.buf.is_empty() && is_name_start_char(c) || 14 self.buf_has_data() && is_name_char(c) => { 15 if self.buf.len() > self.config.max_name_length { 16 return Some(self.error(SyntaxError::ExceededConfiguredLimit)); 17 } 18 self.buf.push(c); 19 None 20 }, 21 22 Token::ProcessingInstructionEnd => { 23 // self.buf contains PI name 24 let name = self.take_buf(); 25 26 // Don't need to check for declaration because it has mandatory attributes 27 // but there is none 28 match &*name { 29 // Name is empty, it is an error 30 "" => Some(self.error(SyntaxError::ProcessingInstructionWithoutName)), 31 32 // Found <?xml-like PI not at the beginning of a document, 33 // it is an error - see section 2.6 of XML 1.1 spec 34 n if "xml".eq_ignore_ascii_case(n) => 35 Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))), 36 37 // All is ok, emitting event 38 _ => { 39 debug_assert!(self.next_event.is_none(), "{:?}", self.next_event); 40 // can't have a PI before `<?xml` 41 let event1 = self.set_encountered(Encountered::Declaration); 42 let event2 = Some(Ok(XmlEvent::ProcessingInstruction { 43 name, 44 data: None 45 })); 46 // emitting two events at once is cumbersome 47 let event1 = if event1.is_some() { 48 self.next_event = event2; 49 event1 50 } else { 51 event2 52 }; 53 self.into_state(State::OutsideTag, event1) 54 } 55 } 56 } 57 58 Token::Character(c) if is_whitespace_char(c) => { 59 // self.buf contains PI name 60 let name = self.take_buf(); 61 62 match &*name { 63 // We have not ever encountered an element and have not parsed XML declaration 64 "xml" if self.encountered == Encountered::None => 65 self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)), 66 67 // Found <?xml-like PI after the beginning of a document, 68 // it is an error - see section 2.6 of XML 1.1 spec 69 n if "xml".eq_ignore_ascii_case(n) => 70 Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))), 71 72 // All is ok, starting parsing PI data 73 _ => { 74 self.data.name = name; 75 // can't have a PI before `<?xml` 76 let next_event = self.set_encountered(Encountered::Declaration); 77 self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData), next_event) 78 } 79 } 80 } 81 82 _ => { 83 let buf = self.take_buf(); 84 Some(self.error(SyntaxError::UnexpectedProcessingInstruction(buf.into(), t))) 85 } 86 }, 87 88 ProcessingInstructionSubstate::PIInsideData => match t { 89 Token::ProcessingInstructionEnd => { 90 let name = self.data.take_name(); 91 let data = self.take_buf(); 92 self.into_state_emit( 93 State::OutsideTag, 94 Ok(XmlEvent::ProcessingInstruction { 95 name, 96 data: Some(data), 97 }), 98 ) 99 }, 100 101 Token::Character(c) if !self.is_valid_xml_char(c) => { 102 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) 103 }, 104 105 // Any other token should be treated as plain characters 106 _ => { 107 if self.buf.len() > self.config.max_data_length { 108 return Some(self.error(SyntaxError::ExceededConfiguredLimit)); 109 } 110 t.push_to_string(&mut self.buf); 111 None 112 } 113 }, 114 } 115 } 116 } 117