• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Contains functions for performing XML special characters escaping.
2 
3 use std::borrow::Cow;
4 
5 enum Value {
6     Char(char),
7     Str(&'static str)
8 }
9 
10 impl Value {
dispatch_for_attribute(c: char) -> Value11     fn dispatch_for_attribute(c: char) -> Value {
12         match c {
13             '<'  => Value::Str("&lt;"),
14             '>'  => Value::Str("&gt;"),
15             '"'  => Value::Str("&quot;"),
16             '\'' => Value::Str("&apos;"),
17             '&'  => Value::Str("&amp;"),
18             '\n' => Value::Str("&#xA;"),
19             '\r' => Value::Str("&#xD;"),
20             _    => Value::Char(c)
21         }
22     }
23 
dispatch_for_pcdata(c: char) -> Value24     fn dispatch_for_pcdata(c: char) -> Value {
25         match c {
26             '<'  => Value::Str("&lt;"),
27             '&'  => Value::Str("&amp;"),
28             _    => Value::Char(c)
29         }
30     }
31 }
32 
33 enum Process<'a> {
34     Borrowed(&'a str),
35     Owned(String)
36 }
37 
38 impl<'a> Process<'a> {
process(&mut self, (i, next): (usize, Value))39     fn process(&mut self, (i, next): (usize, Value)) {
40         match next {
41             Value::Str(s) => match *self {
42                 Process::Owned(ref mut o) => o.push_str(s),
43                 Process::Borrowed(b) => {
44                     let mut r = String::with_capacity(b.len() + s.len());
45                     r.push_str(&b[..i]);
46                     r.push_str(s);
47                     *self = Process::Owned(r);
48                 }
49             },
50             Value::Char(c) => match *self {
51                 Process::Borrowed(_) => {}
52                 Process::Owned(ref mut o) => o.push(c)
53             }
54         }
55     }
56 
into_result(self) -> Cow<'a, str>57     fn into_result(self) -> Cow<'a, str> {
58         match self {
59             Process::Borrowed(b) => Cow::Borrowed(b),
60             Process::Owned(o) => Cow::Owned(o)
61         }
62     }
63 }
64 
65 impl<'a> Extend<(usize, Value)> for Process<'a> {
extend<I: IntoIterator<Item=(usize, Value)>>(&mut self, it: I)66     fn extend<I: IntoIterator<Item=(usize, Value)>>(&mut self, it: I) {
67         for v in it.into_iter() {
68             self.process(v);
69         }
70     }
71 }
72 
escape_str(s: &str, dispatch: fn(char) -> Value) -> Cow<str>73 fn escape_str(s: &str, dispatch: fn(char) -> Value) -> Cow<str> {
74     let mut p = Process::Borrowed(s);
75     p.extend(s.char_indices().map(|(ind, c)| (ind, dispatch(c))));
76     p.into_result()
77 }
78 
79 /// Performs escaping of common XML characters inside an attribute value.
80 ///
81 /// This function replaces several important markup characters with their
82 /// entity equivalents:
83 ///
84 /// * `<` → `&lt;`
85 /// * `>` → `&gt;`
86 /// * `"` → `&quot;`
87 /// * `'` → `&apos;`
88 /// * `&` → `&amp;`
89 ///
90 /// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
91 ///
92 /// Does not perform allocations if the given string does not contain escapable characters.
93 #[inline]
escape_str_attribute(s: &str) -> Cow<str>94 pub fn escape_str_attribute(s: &str) -> Cow<str> {
95     escape_str(s, Value::dispatch_for_attribute)
96 }
97 
98 /// Performs escaping of common XML characters inside PCDATA.
99 ///
100 /// This function replaces several important markup characters with their
101 /// entity equivalents:
102 ///
103 /// * `<` → `&lt;`
104 /// * `&` → `&amp;`
105 ///
106 /// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
107 ///
108 /// Does not perform allocations if the given string does not contain escapable characters.
109 #[inline]
escape_str_pcdata(s: &str) -> Cow<str>110 pub fn escape_str_pcdata(s: &str) -> Cow<str> {
111     escape_str(s, Value::dispatch_for_pcdata)
112 }
113 
114 #[cfg(test)]
115 mod tests {
116     use super::{escape_str_pcdata, escape_str_attribute};
117 
118     // TODO: add more tests
119 
120     #[test]
test_escape_multibyte_code_points()121     fn test_escape_multibyte_code_points() {
122         assert_eq!(escape_str_attribute("☃<"), "☃&lt;");
123         assert_eq!(escape_str_pcdata("☃<"), "☃&lt;");
124     }
125 }
126 
127