1 //! Contains parser configuration structure. 2 use std::io::Read; 3 use std::collections::HashMap; 4 5 use reader::EventReader; 6 7 /// Parser configuration structure. 8 /// 9 /// This structure contains various configuration options which affect 10 /// behavior of the parser. 11 #[derive(Clone, PartialEq, Eq, Debug)] 12 pub struct ParserConfig { 13 /// Whether or not should whitespace in textual events be removed. Default is false. 14 /// 15 /// When true, all standalone whitespace will be removed (this means no 16 /// `Whitespace` events will be emitted), and leading and trailing whitespace 17 /// from `Character` events will be deleted. If after trimming `Characters` 18 /// event will be empty, it will also be omitted from output stream. This is 19 /// possible, however, only if `whitespace_to_characters` or 20 /// `cdata_to_characters` options are set. 21 /// 22 /// This option does not affect CDATA events, unless `cdata_to_characters` 23 /// option is also set. In that case CDATA content will also be trimmed. 24 pub trim_whitespace: bool, 25 26 /// Whether or not should whitespace be converted to characters. 27 /// Default is false. 28 /// 29 /// If true, instead of `Whitespace` events `Characters` events with the 30 /// same content will be emitted. If `trim_whitespace` is also true, these 31 /// events will be trimmed to nothing and, consequently, not emitted. 32 pub whitespace_to_characters: bool, 33 34 /// Whether or not should CDATA be converted to characters. 35 /// Default is false. 36 /// 37 /// If true, instead of `CData` events `Characters` events with the same 38 /// content will be emitted. If `trim_whitespace` is also true, these events 39 /// will be trimmed. If corresponding CDATA contained nothing but whitespace, 40 /// this event will be omitted from the stream. 41 pub cdata_to_characters: bool, 42 43 /// Whether or not should comments be omitted. Default is true. 44 /// 45 /// If true, `Comment` events will not be emitted at all. 46 pub ignore_comments: bool, 47 48 /// Whether or not should sequential `Characters` events be merged. 49 /// Default is true. 50 /// 51 /// If true, multiple sequential `Characters` events will be merged into 52 /// a single event, that is, their data will be concatenated. 53 /// 54 /// Multiple sequential `Characters` events are only possible if either 55 /// `cdata_to_characters` or `ignore_comments` are set. Otherwise character 56 /// events will always be separated by other events. 57 pub coalesce_characters: bool, 58 59 /// A map of extra entities recognized by the parser. Default is an empty map. 60 /// 61 /// By default the XML parser recognizes the entities defined in the XML spec. Sometimes, 62 /// however, it is convenient to make the parser recognize additional entities which 63 /// are also not available through the DTD definitions (especially given that at the moment 64 /// DTD parsing is not supported). 65 pub extra_entities: HashMap<String, String>, 66 67 /// Whether or not the parser should ignore the end of stream. Default is false. 68 /// 69 /// By default the parser will either error out when it encounters a premature end of 70 /// stream or complete normally if the end of stream was expected. If you want to continue 71 /// reading from a stream whose input is supplied progressively, you can set this option to true. 72 /// In this case the parser will allow you to invoke the next() method even if a supposed end 73 /// of stream has happened. 74 /// 75 /// Note that support for this functionality is incomplete; for example, the parser will fail if 76 /// the premature end of stream happens inside PCDATA. Therefore, use this option at your own risk. 77 pub ignore_end_of_stream: bool, 78 79 /// Whether or not non-unicode entity references get replaced with the replacement character 80 /// 81 /// When true, any decimal or hexadecimal character reference that cannot be converted from a 82 /// u32 to a char using [std::char::from_u32](https://doc.rust-lang.org/std/char/fn.from_u32.html) 83 /// will be converted into the unicode REPLACEMENT CHARACTER (U+FFFD). 84 pub replace_unknown_entity_references: bool, 85 86 /// Whether or not whitespace at the root level of the document is ignored. Default is true. 87 /// 88 /// By default any whitespace that is not enclosed within at least one level of elements will be 89 /// ignored. Setting this value to false will cause root level whitespace events to be emitted. 90 pub ignore_root_level_whitespace: bool, 91 } 92 93 impl ParserConfig { 94 /// Returns a new config with default values. 95 /// 96 /// You can tweak default values using builder-like pattern: 97 /// 98 /// ```rust 99 /// use xml::reader::ParserConfig; 100 /// 101 /// let config = ParserConfig::new() 102 /// .trim_whitespace(true) 103 /// .ignore_comments(true) 104 /// .coalesce_characters(false); 105 /// ``` new() -> ParserConfig106 pub fn new() -> ParserConfig { 107 ParserConfig { 108 trim_whitespace: false, 109 whitespace_to_characters: false, 110 cdata_to_characters: false, 111 ignore_comments: true, 112 coalesce_characters: true, 113 extra_entities: HashMap::new(), 114 ignore_end_of_stream: false, 115 replace_unknown_entity_references: false, 116 ignore_root_level_whitespace: true, 117 } 118 } 119 120 /// Creates an XML reader with this configuration. 121 /// 122 /// This is a convenience method for configuring and creating a reader at the same time: 123 /// 124 /// ```rust 125 /// use xml::reader::ParserConfig; 126 /// 127 /// let mut source: &[u8] = b"..."; 128 /// 129 /// let reader = ParserConfig::new() 130 /// .trim_whitespace(true) 131 /// .ignore_comments(true) 132 /// .coalesce_characters(false) 133 /// .create_reader(&mut source); 134 /// ``` 135 /// 136 /// This method is exactly equivalent to calling `EventReader::new_with_config()` with 137 /// this configuration object. 138 #[inline] create_reader<R: Read>(self, source: R) -> EventReader<R>139 pub fn create_reader<R: Read>(self, source: R) -> EventReader<R> { 140 EventReader::new_with_config(source, self) 141 } 142 143 /// Adds a new entity mapping and returns an updated config object. 144 /// 145 /// This is a convenience method for adding external entities mappings to the XML parser. 146 /// An example: 147 /// 148 /// ```rust 149 /// use xml::reader::ParserConfig; 150 /// 151 /// let mut source: &[u8] = b"..."; 152 /// 153 /// let reader = ParserConfig::new() 154 /// .add_entity("nbsp", " ") 155 /// .add_entity("copy", "©") 156 /// .add_entity("reg", "®") 157 /// .create_reader(&mut source); 158 /// ``` add_entity<S: Into<String>, T: Into<String>>(mut self, entity: S, value: T) -> ParserConfig159 pub fn add_entity<S: Into<String>, T: Into<String>>(mut self, entity: S, value: T) -> ParserConfig { 160 self.extra_entities.insert(entity.into(), value.into()); 161 self 162 } 163 } 164 165 impl Default for ParserConfig { 166 #[inline] default() -> ParserConfig167 fn default() -> ParserConfig { 168 ParserConfig::new() 169 } 170 } 171 172 gen_setters! { ParserConfig, 173 trim_whitespace: val bool, 174 whitespace_to_characters: val bool, 175 cdata_to_characters: val bool, 176 ignore_comments: val bool, 177 coalesce_characters: val bool, 178 ignore_end_of_stream: val bool, 179 replace_unknown_entity_references: val bool, 180 ignore_root_level_whitespace: val bool 181 } 182