• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Contains high-level interface for a pull-based XML parser.
2 //!
3 //! The most important type in this module is `EventReader`, which provides an iterator
4 //! view for events in XML document.
5 
6 use std::io::Read;
7 use std::iter::FusedIterator;
8 use std::result;
9 
10 use crate::common::{Position, TextPosition};
11 
12 pub use self::config::ParserConfig;
13 pub use self::config::ParserConfig2;
14 pub use self::error::{Error, ErrorKind};
15 pub use self::events::XmlEvent;
16 
17 use self::parser::PullParser;
18 
19 mod config;
20 mod events;
21 mod lexer;
22 mod parser;
23 mod indexset;
24 mod error;
25 
26 
27 /// A result type yielded by `XmlReader`.
28 pub type Result<T, E = Error> = result::Result<T, E>;
29 
30 /// A wrapper around an `std::io::Read` instance which provides pull-based XML parsing.
31 pub struct EventReader<R: Read> {
32     source: R,
33     parser: PullParser,
34 }
35 
36 impl<R: Read> EventReader<R> {
37     /// Creates a new reader, consuming the given stream.
38     #[inline]
new(source: R) -> EventReader<R>39     pub fn new(source: R) -> EventReader<R> {
40         EventReader::new_with_config(source, ParserConfig2::new())
41     }
42 
43     /// Creates a new reader with the provded configuration, consuming the given stream.
44     #[inline]
new_with_config(source: R, config: impl Into<ParserConfig2>) -> EventReader<R>45     pub fn new_with_config(source: R, config: impl Into<ParserConfig2>) -> EventReader<R> {
46         EventReader { source, parser: PullParser::new(config) }
47     }
48 
49     /// Pulls and returns next XML event from the stream.
50     ///
51     /// If returned event is `XmlEvent::Error` or `XmlEvent::EndDocument`, then
52     /// further calls to this method will return this event again.
53     #[inline]
next(&mut self) -> Result<XmlEvent>54     pub fn next(&mut self) -> Result<XmlEvent> {
55         self.parser.next(&mut self.source)
56     }
57 
58     /// Skips all XML events until the next end tag at the current level.
59     ///
60     /// Convenience function that is useful for the case where you have
61     /// encountered a start tag that is of no interest and want to
62     /// skip the entire XML subtree until the corresponding end tag.
63     #[inline]
skip(&mut self) -> Result<()>64     pub fn skip(&mut self) -> Result<()> {
65         let mut depth = 1;
66 
67         while depth > 0 {
68             match self.next()? {
69                 XmlEvent::StartElement { .. } => depth += 1,
70                 XmlEvent::EndElement { .. } => depth -= 1,
71                 XmlEvent::EndDocument => unreachable!(),
72                 _ => {}
73             }
74         }
75 
76         Ok(())
77     }
78 
source(&self) -> &R79     pub fn source(&self) -> &R { &self.source }
source_mut(&mut self) -> &mut R80     pub fn source_mut(&mut self) -> &mut R { &mut self.source }
81 
82     /// Unwraps this `EventReader`, returning the underlying reader.
83     ///
84     /// Note that this operation is destructive; unwrapping the reader and wrapping it
85     /// again with `EventReader::new()` will create a fresh reader which will attempt
86     /// to parse an XML document from the beginning.
into_inner(self) -> R87     pub fn into_inner(self) -> R {
88         self.source
89     }
90 }
91 
92 impl<B: Read> Position for EventReader<B> {
93     /// Returns the position of the last event produced by the reader.
94     #[inline]
position(&self) -> TextPosition95     fn position(&self) -> TextPosition {
96         self.parser.position()
97     }
98 }
99 
100 impl<R: Read> IntoIterator for EventReader<R> {
101     type Item = Result<XmlEvent>;
102     type IntoIter = Events<R>;
103 
into_iter(self) -> Events<R>104     fn into_iter(self) -> Events<R> {
105         Events { reader: self, finished: false }
106     }
107 }
108 
109 /// An iterator over XML events created from some type implementing `Read`.
110 ///
111 /// When the next event is `xml::event::Error` or `xml::event::EndDocument`, then
112 /// it will be returned by the iterator once, and then it will stop producing events.
113 pub struct Events<R: Read> {
114     reader: EventReader<R>,
115     finished: bool,
116 }
117 
118 impl<R: Read> Events<R> {
119     /// Unwraps the iterator, returning the internal `EventReader`.
120     #[inline]
into_inner(self) -> EventReader<R>121     pub fn into_inner(self) -> EventReader<R> {
122         self.reader
123     }
124 
source(&self) -> &R125     pub fn source(&self) -> &R { &self.reader.source }
source_mut(&mut self) -> &mut R126     pub fn source_mut(&mut self) -> &mut R { &mut self.reader.source }
127 
128 }
129 
130 impl<R: Read> FusedIterator for Events<R> {
131 }
132 
133 impl<R: Read> Iterator for Events<R> {
134     type Item = Result<XmlEvent>;
135 
136     #[inline]
next(&mut self) -> Option<Result<XmlEvent>>137     fn next(&mut self) -> Option<Result<XmlEvent>> {
138         if self.finished && !self.reader.parser.is_ignoring_end_of_stream() {
139             None
140         } else {
141             let ev = self.reader.next();
142             if let Ok(XmlEvent::EndDocument) | Err(_) = ev {
143                 self.finished = true;
144             }
145             Some(ev)
146         }
147     }
148 }
149 
150 impl<'r> EventReader<&'r [u8]> {
151     /// A convenience method to create an `XmlReader` from a string slice.
152     #[inline]
153     #[must_use]
from_str(source: &'r str) -> EventReader<&'r [u8]>154     pub fn from_str(source: &'r str) -> EventReader<&'r [u8]> {
155         EventReader::new(source.as_bytes())
156     }
157 }
158