• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Contains parser configuration structure.
2 use std::io::Read;
3 use std::collections::HashMap;
4 
5 use reader::EventReader;
6 
7 /// Parser configuration structure.
8 ///
9 /// This structure contains various configuration options which affect
10 /// behavior of the parser.
11 #[derive(Clone, PartialEq, Eq, Debug)]
12 pub struct ParserConfig {
13     /// Whether or not should whitespace in textual events be removed. Default is false.
14     ///
15     /// When true, all standalone whitespace will be removed (this means no
16     /// `Whitespace` events will be emitted), and leading and trailing whitespace
17     /// from `Character` events will be deleted. If after trimming `Characters`
18     /// event will be empty, it will also be omitted from output stream. This is
19     /// possible, however, only if `whitespace_to_characters` or
20     /// `cdata_to_characters` options are set.
21     ///
22     /// This option does not affect CDATA events, unless `cdata_to_characters`
23     /// option is also set. In that case CDATA content will also be trimmed.
24     pub trim_whitespace: bool,
25 
26     /// Whether or not should whitespace be converted to characters.
27     /// Default is false.
28     ///
29     /// If true, instead of `Whitespace` events `Characters` events with the
30     /// same content will be emitted. If `trim_whitespace` is also true, these
31     /// events will be trimmed to nothing and, consequently, not emitted.
32     pub whitespace_to_characters: bool,
33 
34     /// Whether or not should CDATA be converted to characters.
35     /// Default is false.
36     ///
37     /// If true, instead of `CData` events `Characters` events with the same
38     /// content will be emitted. If `trim_whitespace` is also true, these events
39     /// will be trimmed. If corresponding CDATA contained nothing but whitespace,
40     /// this event will be omitted from the stream.
41     pub cdata_to_characters: bool,
42 
43     /// Whether or not should comments be omitted. Default is true.
44     ///
45     /// If true, `Comment` events will not be emitted at all.
46     pub ignore_comments: bool,
47 
48     /// Whether or not should sequential `Characters` events be merged.
49     /// Default is true.
50     ///
51     /// If true, multiple sequential `Characters` events will be merged into
52     /// a single event, that is, their data will be concatenated.
53     ///
54     /// Multiple sequential `Characters` events are only possible if either
55     /// `cdata_to_characters` or `ignore_comments` are set. Otherwise character
56     /// events will always be separated by other events.
57     pub coalesce_characters: bool,
58 
59     /// A map of extra entities recognized by the parser. Default is an empty map.
60     ///
61     /// By default the XML parser recognizes the entities defined in the XML spec. Sometimes,
62     /// however, it is convenient to make the parser recognize additional entities which
63     /// are also not available through the DTD definitions (especially given that at the moment
64     /// DTD parsing is not supported).
65     pub extra_entities: HashMap<String, String>,
66 
67     /// Whether or not the parser should ignore the end of stream. Default is false.
68     ///
69     /// By default the parser will either error out when it encounters a premature end of
70     /// stream or complete normally if the end of stream was expected. If you want to continue
71     /// reading from a stream whose input is supplied progressively, you can set this option to true.
72     /// In this case the parser will allow you to invoke the next() method even if a supposed end
73     /// of stream has happened.
74     ///
75     /// Note that support for this functionality is incomplete; for example, the parser will fail if
76     /// the premature end of stream happens inside PCDATA. Therefore, use this option at your own risk.
77     pub ignore_end_of_stream: bool,
78 
79     /// Whether or not non-unicode entity references get replaced with the replacement character
80     ///
81     /// When true, any decimal or hexadecimal character reference that cannot be converted from a
82     /// u32 to a char using [std::char::from_u32](https://doc.rust-lang.org/std/char/fn.from_u32.html)
83     /// will be converted into the unicode REPLACEMENT CHARACTER (U+FFFD).
84     pub replace_unknown_entity_references: bool,
85 
86     /// Whether or not whitespace at the root level of the document is ignored. Default is true.
87     ///
88     /// By default any whitespace that is not enclosed within at least one level of elements will be
89     /// ignored. Setting this value to false will cause root level whitespace events to be emitted.
90     pub ignore_root_level_whitespace: bool,
91 }
92 
93 impl ParserConfig {
94     /// Returns a new config with default values.
95     ///
96     /// You can tweak default values using builder-like pattern:
97     ///
98     /// ```rust
99     /// use xml::reader::ParserConfig;
100     ///
101     /// let config = ParserConfig::new()
102     ///     .trim_whitespace(true)
103     ///     .ignore_comments(true)
104     ///     .coalesce_characters(false);
105     /// ```
new() -> ParserConfig106     pub fn new() -> ParserConfig {
107         ParserConfig {
108             trim_whitespace: false,
109             whitespace_to_characters: false,
110             cdata_to_characters: false,
111             ignore_comments: true,
112             coalesce_characters: true,
113             extra_entities: HashMap::new(),
114             ignore_end_of_stream: false,
115             replace_unknown_entity_references: false,
116             ignore_root_level_whitespace: true,
117         }
118     }
119 
120     /// Creates an XML reader with this configuration.
121     ///
122     /// This is a convenience method for configuring and creating a reader at the same time:
123     ///
124     /// ```rust
125     /// use xml::reader::ParserConfig;
126     ///
127     /// let mut source: &[u8] = b"...";
128     ///
129     /// let reader = ParserConfig::new()
130     ///     .trim_whitespace(true)
131     ///     .ignore_comments(true)
132     ///     .coalesce_characters(false)
133     ///     .create_reader(&mut source);
134     /// ```
135     ///
136     /// This method is exactly equivalent to calling `EventReader::new_with_config()` with
137     /// this configuration object.
138     #[inline]
create_reader<R: Read>(self, source: R) -> EventReader<R>139     pub fn create_reader<R: Read>(self, source: R) -> EventReader<R> {
140         EventReader::new_with_config(source, self)
141     }
142 
143     /// Adds a new entity mapping and returns an updated config object.
144     ///
145     /// This is a convenience method for adding external entities mappings to the XML parser.
146     /// An example:
147     ///
148     /// ```rust
149     /// use xml::reader::ParserConfig;
150     ///
151     /// let mut source: &[u8] = b"...";
152     ///
153     /// let reader = ParserConfig::new()
154     ///     .add_entity("nbsp", " ")
155     ///     .add_entity("copy", "©")
156     ///     .add_entity("reg", "®")
157     ///     .create_reader(&mut source);
158     /// ```
add_entity<S: Into<String>, T: Into<String>>(mut self, entity: S, value: T) -> ParserConfig159     pub fn add_entity<S: Into<String>, T: Into<String>>(mut self, entity: S, value: T) -> ParserConfig {
160         self.extra_entities.insert(entity.into(), value.into());
161         self
162     }
163 }
164 
165 impl Default for ParserConfig {
166     #[inline]
default() -> ParserConfig167     fn default() -> ParserConfig {
168         ParserConfig::new()
169     }
170 }
171 
172 gen_setters! { ParserConfig,
173     trim_whitespace: val bool,
174     whitespace_to_characters: val bool,
175     cdata_to_characters: val bool,
176     ignore_comments: val bool,
177     coalesce_characters: val bool,
178     ignore_end_of_stream: val bool,
179     replace_unknown_entity_references: val bool,
180     ignore_root_level_whitespace: val bool
181 }
182