• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::reader::error::SyntaxError;
2 use crate::common::is_whitespace_char;
3 use crate::reader::events::XmlEvent;
4 use crate::reader::lexer::Token;
5 
6 use super::{
7     ClosingTagSubstate, DoctypeSubstate, Encountered, OpeningTagSubstate,
8     ProcessingInstructionSubstate, PullParser, Result, State,
9 };
10 
11 impl PullParser {
outside_tag(&mut self, t: Token) -> Option<Result>12     pub fn outside_tag(&mut self, t: Token) -> Option<Result> {
13         match t {
14             Token::Character(c) => {
15                 if is_whitespace_char(c) {
16                     // skip whitespace outside of the root element
17                     if (self.config.c.trim_whitespace && self.buf.is_empty()) ||
18                         (self.depth() == 0 && self.config.c.ignore_root_level_whitespace) {
19                             return None;
20                     }
21                 } else {
22                     self.inside_whitespace = false;
23                     if self.depth() == 0 {
24                         return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
25                     }
26                 }
27 
28                 if !self.is_valid_xml_char_not_restricted(c) {
29                     return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
30                 }
31 
32                 if self.buf.is_empty() {
33                     self.push_pos();
34                 } else if self.buf.len() > self.config.max_data_length {
35                     return Some(self.error(SyntaxError::ExceededConfiguredLimit));
36                 }
37                 self.buf.push(c);
38                 None
39             },
40 
41             Token::CommentEnd | Token::TagEnd | Token::EqualsSign |
42             Token::DoubleQuote | Token::SingleQuote |
43             Token::ProcessingInstructionEnd | Token::EmptyTagEnd => {
44                 if self.depth() == 0 {
45                     return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
46                 }
47                 self.inside_whitespace = false;
48 
49                 if let Some(s) = t.as_static_str() {
50                     if self.buf.is_empty() {
51                         self.push_pos();
52                     } else if self.buf.len() > self.config.max_data_length {
53                         return Some(self.error(SyntaxError::ExceededConfiguredLimit));
54                     }
55 
56                     self.buf.push_str(s);
57                 }
58                 None
59             },
60 
61             Token::ReferenceStart if self.depth() > 0 => {
62                 self.state_after_reference = State::OutsideTag;
63                 self.into_state_continue(State::InsideReference)
64             },
65 
66             Token::ReferenceEnd if self.depth() > 0 => { // Semi-colon in a text outside an entity
67                 self.inside_whitespace = false;
68                 if self.buf.len() > self.config.max_data_length {
69                     return Some(self.error(SyntaxError::ExceededConfiguredLimit));
70                 }
71                 Token::ReferenceEnd.push_to_string(&mut self.buf);
72                 None
73             },
74 
75             Token::CommentStart if self.config.c.coalesce_characters && self.config.c.ignore_comments => {
76                 let next_event = self.set_encountered(Encountered::Comment);
77                 // We need to switch the lexer into a comment mode inside comments
78                 self.into_state(State::InsideComment, next_event)
79             }
80 
81             Token::CDataStart if self.depth() > 0 && self.config.c.coalesce_characters && self.config.c.cdata_to_characters => {
82                 if self.buf.is_empty() {
83                     self.push_pos();
84                 }
85                 self.into_state_continue(State::InsideCData)
86             },
87 
88             _ => {
89                 // Encountered some markup event, flush the buffer as characters
90                 // or a whitespace
91                 let mut next_event = if self.buf_has_data() {
92                     let buf = self.take_buf();
93                     if self.inside_whitespace && self.config.c.trim_whitespace {
94                         None
95                     } else if self.inside_whitespace && !self.config.c.whitespace_to_characters {
96                         debug_assert!(buf.chars().all(|ch| ch.is_whitespace()), "ws={buf:?}");
97                         Some(Ok(XmlEvent::Whitespace(buf)))
98                     } else if self.config.c.trim_whitespace {
99                         Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
100                     } else {
101                         Some(Ok(XmlEvent::Characters(buf)))
102                     }
103                 } else { None };
104                 self.inside_whitespace = true;  // Reset inside_whitespace flag
105 
106                 // pos is popped whenever an event is emitted, so pushes must happen only if there will be an event to balance it
107                 // and ignored comments don't pop
108                 if t != Token::CommentStart || !self.config.c.ignore_comments {
109                     self.push_pos();
110                 }
111                 match t {
112                     Token::OpeningTagStart if self.depth() > 0 || self.encountered < Encountered::Element || self.config.allow_multiple_root_elements => {
113                         if let Some(e) = self.set_encountered(Encountered::Element) {
114                             next_event = Some(e);
115                         }
116                         self.nst.push_empty();
117                         self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
118                     },
119 
120                     Token::ClosingTagStart if self.depth() > 0 =>
121                         self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event),
122 
123                     Token::CommentStart => {
124                         if let Some(e) = self.set_encountered(Encountered::Comment) {
125                             next_event = Some(e);
126                         }
127                         // We need to switch the lexer into a comment mode inside comments
128                         self.into_state(State::InsideComment, next_event)
129                     },
130 
131                     Token::DoctypeStart if self.encountered < Encountered::Doctype => {
132                         if let Some(e) = self.set_encountered(Encountered::Doctype) {
133                             next_event = Some(e);
134                         }
135 
136                         // We don't have a doctype event so skip this position
137                         // FIXME: update when we have a doctype event
138                         self.next_pos();
139                         self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event)
140                     },
141 
142                     Token::ProcessingInstructionStart =>
143                         self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event),
144 
145                     Token::CDataStart if self.depth() > 0 => {
146                         self.into_state(State::InsideCData, next_event)
147                     },
148 
149                     _ => Some(self.error(SyntaxError::UnexpectedToken(t)))
150                 }
151             }
152         }
153     }
154 
document_start(&mut self, t: Token) -> Option<Result>155     pub fn document_start(&mut self, t: Token) -> Option<Result> {
156         debug_assert!(self.encountered < Encountered::Declaration);
157 
158         match t {
159             Token::Character(c) => {
160                 let next_event = self.set_encountered(Encountered::AnyChars);
161 
162                 if !is_whitespace_char(c) {
163                     return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
164                 }
165                 self.inside_whitespace = true;
166 
167                 // skip whitespace outside of the root element
168                 if (self.config.c.trim_whitespace && self.buf.is_empty()) ||
169                     (self.depth() == 0 && self.config.c.ignore_root_level_whitespace) {
170                         return self.into_state(State::OutsideTag, next_event);
171                 }
172 
173                 self.push_pos();
174                 self.buf.push(c);
175                 self.into_state(State::OutsideTag, next_event)
176             },
177 
178             Token::CommentStart => {
179                 let next_event = self.set_encountered(Encountered::Comment);
180                 self.into_state(State::InsideComment, next_event)
181             }
182 
183             Token::OpeningTagStart => {
184                 let next_event = self.set_encountered(Encountered::Element);
185                 self.nst.push_empty();
186                 self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
187             },
188 
189             Token::DoctypeStart => {
190                 let next_event = self.set_encountered(Encountered::Doctype);
191                 // We don't have a doctype event so skip this position
192                 // FIXME: update when we have a doctype event
193                 self.next_pos();
194                 self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event)
195             },
196 
197             Token::ProcessingInstructionStart => {
198                 self.push_pos();
199                 self.into_state_continue(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName))
200             },
201 
202             _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
203         }
204     }
205 }
206