• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::reader::error::SyntaxError;
2 use std::char;
3 use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
4 use crate::reader::lexer::Token;
5 use super::{PullParser, Result, State};
6 
7 impl PullParser {
inside_reference(&mut self, t: Token) -> Option<Result>8     pub fn inside_reference(&mut self, t: Token) -> Option<Result> {
9         match t {
10             Token::Character(c) if !self.data.ref_data.is_empty() && is_name_char(c) ||
11                              self.data.ref_data.is_empty() && (is_name_start_char(c) || c == '#') => {
12                 self.data.ref_data.push(c);
13                 None
14             }
15 
16             Token::ReferenceEnd => {
17                 let name = self.data.take_ref_data();
18                 if name.is_empty() {
19                     return Some(self.error(SyntaxError::EmptyEntity));
20                 }
21 
22                 let c = match &*name {
23                     "lt"   => Some('<'),
24                     "gt"   => Some('>'),
25                     "amp"  => Some('&'),
26                     "apos" => Some('\''),
27                     "quot" => Some('"'),
28                     _ if name.starts_with('#') => match self.numeric_reference_from_str(&name[1..]) {
29                         Ok(c) => Some(c),
30                         Err(e) => return Some(self.error(e))
31                     },
32                     _ => None,
33                 };
34                 if let Some(c) = c {
35                     self.buf.push(c);
36                 } else if let Some(v) = self.config.c.extra_entities.get(&name) {
37                     self.buf.push_str(v);
38                 } else if let Some(v) = self.entities.get(&name) {
39                     if self.state_after_reference == State::OutsideTag {
40                         // an entity can expand to *elements*, so outside of a tag it needs a full reparse
41                         if let Err(e) = self.lexer.reparse(v) {
42                             return Some(Err(e));
43                         }
44                     } else {
45                         // however, inside attributes it's not allowed to affect attribute quoting,
46                         // so it can't be fed to the lexer
47                         self.buf.push_str(v);
48                     }
49                 } else {
50                     return Some(self.error(SyntaxError::UnexpectedEntity(name.into())));
51                 }
52                 let prev_st = self.state_after_reference;
53                 if prev_st == State::OutsideTag && !is_whitespace_char(self.buf.chars().last().unwrap_or('\0')) {
54                     self.inside_whitespace = false;
55                 }
56                 self.into_state_continue(prev_st)
57             }
58 
59             _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
60         }
61     }
62 
numeric_reference_from_str(&self, num_str: &str) -> std::result::Result<char, SyntaxError>63     pub(crate) fn numeric_reference_from_str(&self, num_str: &str) -> std::result::Result<char, SyntaxError> {
64         let val = if let Some(hex) = num_str.strip_prefix('x') {
65             u32::from_str_radix(hex, 16).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))?
66         } else {
67             u32::from_str_radix(num_str, 10).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))?
68         };
69         match char::from_u32(val) {
70             Some(c) if self.is_valid_xml_char(c) => Ok(c),
71             Some(_) if self.config.c.replace_unknown_entity_references => Ok('\u{fffd}'),
72             None if self.config.c.replace_unknown_entity_references => {
73                 Ok('\u{fffd}')
74             },
75             _ => Err(SyntaxError::InvalidCharacterEntity(val)),
76         }
77     }
78 }
79