1 use crate::reader::error::SyntaxError; 2 use std::char; 3 use crate::common::{is_name_char, is_name_start_char, is_whitespace_char}; 4 use crate::reader::lexer::Token; 5 use super::{PullParser, Result, State}; 6 7 impl PullParser { inside_reference(&mut self, t: Token) -> Option<Result>8 pub fn inside_reference(&mut self, t: Token) -> Option<Result> { 9 match t { 10 Token::Character(c) if !self.data.ref_data.is_empty() && is_name_char(c) || 11 self.data.ref_data.is_empty() && (is_name_start_char(c) || c == '#') => { 12 self.data.ref_data.push(c); 13 None 14 } 15 16 Token::ReferenceEnd => { 17 let name = self.data.take_ref_data(); 18 if name.is_empty() { 19 return Some(self.error(SyntaxError::EmptyEntity)); 20 } 21 22 let c = match &*name { 23 "lt" => Some('<'), 24 "gt" => Some('>'), 25 "amp" => Some('&'), 26 "apos" => Some('\''), 27 "quot" => Some('"'), 28 _ if name.starts_with('#') => match self.numeric_reference_from_str(&name[1..]) { 29 Ok(c) => Some(c), 30 Err(e) => return Some(self.error(e)) 31 }, 32 _ => None, 33 }; 34 if let Some(c) = c { 35 self.buf.push(c); 36 } else if let Some(v) = self.config.c.extra_entities.get(&name) { 37 self.buf.push_str(v); 38 } else if let Some(v) = self.entities.get(&name) { 39 if self.state_after_reference == State::OutsideTag { 40 // an entity can expand to *elements*, so outside of a tag it needs a full reparse 41 if let Err(e) = self.lexer.reparse(v) { 42 return Some(Err(e)); 43 } 44 } else { 45 // however, inside attributes it's not allowed to affect attribute quoting, 46 // so it can't be fed to the lexer 47 self.buf.push_str(v); 48 } 49 } else { 50 return Some(self.error(SyntaxError::UnexpectedEntity(name.into()))); 51 } 52 let prev_st = self.state_after_reference; 53 if prev_st == State::OutsideTag && !is_whitespace_char(self.buf.chars().last().unwrap_or('\0')) { 54 self.inside_whitespace = false; 55 } 56 self.into_state_continue(prev_st) 57 } 58 59 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), 60 } 61 } 62 numeric_reference_from_str(&self, num_str: &str) -> std::result::Result<char, SyntaxError>63 pub(crate) fn numeric_reference_from_str(&self, num_str: &str) -> std::result::Result<char, SyntaxError> { 64 let val = if let Some(hex) = num_str.strip_prefix('x') { 65 u32::from_str_radix(hex, 16).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))? 66 } else { 67 u32::from_str_radix(num_str, 10).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))? 68 }; 69 match char::from_u32(val) { 70 Some(c) if self.is_valid_xml_char(c) => Ok(c), 71 Some(_) if self.config.c.replace_unknown_entity_references => Ok('\u{fffd}'), 72 None if self.config.c.replace_unknown_entity_references => { 73 Ok('\u{fffd}') 74 }, 75 _ => Err(SyntaxError::InvalidCharacterEntity(val)), 76 } 77 } 78 } 79