• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 //! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
10 //! http://url.spec.whatwg.org/#application/x-www-form-urlencoded),
11 //! as used by HTML forms.
12 //!
13 //! Converts between a string (such as an URL’s query string)
14 //! and a sequence of (name, value) pairs.
15 
16 use percent_encoding::{percent_decode, percent_encode_byte};
17 use std::borrow::{Borrow, Cow};
18 use std::str;
19 
20 /// Convert a byte string in the `application/x-www-form-urlencoded` syntax
21 /// into a iterator of (name, value) pairs.
22 ///
23 /// Use `parse(input.as_bytes())` to parse a `&str` string.
24 ///
25 /// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
26 /// converted to `[("#first", "%try%")]`.
27 #[inline]
parse(input: &[u8]) -> Parse<'_>28 pub fn parse(input: &[u8]) -> Parse<'_> {
29     Parse { input }
30 }
31 /// The return type of `parse()`.
32 #[derive(Copy, Clone)]
33 pub struct Parse<'a> {
34     input: &'a [u8],
35 }
36 
37 impl<'a> Iterator for Parse<'a> {
38     type Item = (Cow<'a, str>, Cow<'a, str>);
39 
next(&mut self) -> Option<Self::Item>40     fn next(&mut self) -> Option<Self::Item> {
41         loop {
42             if self.input.is_empty() {
43                 return None;
44             }
45             let mut split2 = self.input.splitn(2, |&b| b == b'&');
46             let sequence = split2.next().unwrap();
47             self.input = split2.next().unwrap_or(&[][..]);
48             if sequence.is_empty() {
49                 continue;
50             }
51             let mut split2 = sequence.splitn(2, |&b| b == b'=');
52             let name = split2.next().unwrap();
53             let value = split2.next().unwrap_or(&[][..]);
54             return Some((decode(name), decode(value)));
55         }
56     }
57 }
58 
decode(input: &[u8]) -> Cow<'_, str>59 fn decode(input: &[u8]) -> Cow<'_, str> {
60     let replaced = replace_plus(input);
61     decode_utf8_lossy(match percent_decode(&replaced).into() {
62         Cow::Owned(vec) => Cow::Owned(vec),
63         Cow::Borrowed(_) => replaced,
64     })
65 }
66 
67 /// Replace b'+' with b' '
replace_plus(input: &[u8]) -> Cow<'_, [u8]>68 fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> {
69     match input.iter().position(|&b| b == b'+') {
70         None => Cow::Borrowed(input),
71         Some(first_position) => {
72             let mut replaced = input.to_owned();
73             replaced[first_position] = b' ';
74             for byte in &mut replaced[first_position + 1..] {
75                 if *byte == b'+' {
76                     *byte = b' ';
77                 }
78             }
79             Cow::Owned(replaced)
80         }
81     }
82 }
83 
84 impl<'a> Parse<'a> {
85     /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
into_owned(self) -> ParseIntoOwned<'a>86     pub fn into_owned(self) -> ParseIntoOwned<'a> {
87         ParseIntoOwned { inner: self }
88     }
89 }
90 
91 /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
92 pub struct ParseIntoOwned<'a> {
93     inner: Parse<'a>,
94 }
95 
96 impl<'a> Iterator for ParseIntoOwned<'a> {
97     type Item = (String, String);
98 
next(&mut self) -> Option<Self::Item>99     fn next(&mut self) -> Option<Self::Item> {
100         self.inner
101             .next()
102             .map(|(k, v)| (k.into_owned(), v.into_owned()))
103     }
104 }
105 
106 /// The [`application/x-www-form-urlencoded` byte serializer](
107 /// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
108 ///
109 /// Return an iterator of `&str` slices.
byte_serialize(input: &[u8]) -> ByteSerialize<'_>110 pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
111     ByteSerialize { bytes: input }
112 }
113 
114 /// Return value of `byte_serialize()`.
115 #[derive(Debug)]
116 pub struct ByteSerialize<'a> {
117     bytes: &'a [u8],
118 }
119 
byte_serialized_unchanged(byte: u8) -> bool120 fn byte_serialized_unchanged(byte: u8) -> bool {
121     matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z')
122 }
123 
124 impl<'a> Iterator for ByteSerialize<'a> {
125     type Item = &'a str;
126 
next(&mut self) -> Option<&'a str>127     fn next(&mut self) -> Option<&'a str> {
128         if let Some((&first, tail)) = self.bytes.split_first() {
129             if !byte_serialized_unchanged(first) {
130                 self.bytes = tail;
131                 return Some(if first == b' ' {
132                     "+"
133                 } else {
134                     percent_encode_byte(first)
135                 });
136             }
137             let position = tail.iter().position(|&b| !byte_serialized_unchanged(b));
138             let (unchanged_slice, remaining) = match position {
139                 // 1 for first_byte + i unchanged in tail
140                 Some(i) => self.bytes.split_at(1 + i),
141                 None => (self.bytes, &[][..]),
142             };
143             self.bytes = remaining;
144             // This unsafe is appropriate because we have already checked these
145             // bytes in byte_serialized_unchanged, which checks for a subset
146             // of UTF-8. So we know these bytes are valid UTF-8, and doing
147             // another UTF-8 check would be wasteful.
148             Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
149         } else {
150             None
151         }
152     }
153 
size_hint(&self) -> (usize, Option<usize>)154     fn size_hint(&self) -> (usize, Option<usize>) {
155         if self.bytes.is_empty() {
156             (0, Some(0))
157         } else {
158             (1, Some(self.bytes.len()))
159         }
160     }
161 }
162 
163 /// The [`application/x-www-form-urlencoded` serializer](
164 /// https://url.spec.whatwg.org/#concept-urlencoded-serializer).
165 pub struct Serializer<'a, T: Target> {
166     target: Option<T>,
167     start_position: usize,
168     encoding: EncodingOverride<'a>,
169 }
170 
171 pub trait Target {
as_mut_string(&mut self) -> &mut String172     fn as_mut_string(&mut self) -> &mut String;
finish(self) -> Self::Finished173     fn finish(self) -> Self::Finished;
174     type Finished;
175 }
176 
177 impl Target for String {
as_mut_string(&mut self) -> &mut String178     fn as_mut_string(&mut self) -> &mut String {
179         self
180     }
finish(self) -> Self181     fn finish(self) -> Self {
182         self
183     }
184     type Finished = Self;
185 }
186 
187 impl<'a> Target for &'a mut String {
as_mut_string(&mut self) -> &mut String188     fn as_mut_string(&mut self) -> &mut String {
189         &mut **self
190     }
finish(self) -> Self191     fn finish(self) -> Self {
192         self
193     }
194     type Finished = Self;
195 }
196 
197 impl<'a, T: Target> Serializer<'a, T> {
198     /// Create a new `application/x-www-form-urlencoded` serializer for the given target.
199     ///
200     /// If the target is non-empty,
201     /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
new(target: T) -> Self202     pub fn new(target: T) -> Self {
203         Self::for_suffix(target, 0)
204     }
205 
206     /// Create a new `application/x-www-form-urlencoded` serializer
207     /// for a suffix of the given target.
208     ///
209     /// If that suffix is non-empty,
210     /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
for_suffix(mut target: T, start_position: usize) -> Self211     pub fn for_suffix(mut target: T, start_position: usize) -> Self {
212         if target.as_mut_string().len() < start_position {
213             panic!(
214                 "invalid length {} for target of length {}",
215                 start_position,
216                 target.as_mut_string().len()
217             );
218         }
219 
220         Serializer {
221             target: Some(target),
222             start_position,
223             encoding: None,
224         }
225     }
226 
227     /// Remove any existing name/value pair.
228     ///
229     /// Panics if called after `.finish()`.
clear(&mut self) -> &mut Self230     pub fn clear(&mut self) -> &mut Self {
231         string(&mut self.target).truncate(self.start_position);
232         self
233     }
234 
235     /// Set the character encoding to be used for names and values before percent-encoding.
encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self236     pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self {
237         self.encoding = new;
238         self
239     }
240 
241     /// Serialize and append a name/value pair.
242     ///
243     /// Panics if called after `.finish()`.
append_pair(&mut self, name: &str, value: &str) -> &mut Self244     pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
245         append_pair(
246             string(&mut self.target),
247             self.start_position,
248             self.encoding,
249             name,
250             value,
251         );
252         self
253     }
254 
255     /// Serialize and append a name of parameter without any value.
256     ///
257     /// Panics if called after `.finish()`.
append_key_only(&mut self, name: &str) -> &mut Self258     pub fn append_key_only(&mut self, name: &str) -> &mut Self {
259         append_key_only(
260             string(&mut self.target),
261             self.start_position,
262             self.encoding,
263             name,
264         );
265         self
266     }
267 
268     /// Serialize and append a number of name/value pairs.
269     ///
270     /// This simply calls `append_pair` repeatedly.
271     /// This can be more convenient, so the user doesn’t need to introduce a block
272     /// to limit the scope of `Serializer`’s borrow of its string.
273     ///
274     /// Panics if called after `.finish()`.
extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>,275     pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self
276     where
277         I: IntoIterator,
278         I::Item: Borrow<(K, V)>,
279         K: AsRef<str>,
280         V: AsRef<str>,
281     {
282         {
283             let string = string(&mut self.target);
284             for pair in iter {
285                 let &(ref k, ref v) = pair.borrow();
286                 append_pair(
287                     string,
288                     self.start_position,
289                     self.encoding,
290                     k.as_ref(),
291                     v.as_ref(),
292                 );
293             }
294         }
295         self
296     }
297 
298     /// Serialize and append a number of names without values.
299     ///
300     /// This simply calls `append_key_only` repeatedly.
301     /// This can be more convenient, so the user doesn’t need to introduce a block
302     /// to limit the scope of `Serializer`’s borrow of its string.
303     ///
304     /// Panics if called after `.finish()`.
extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self where I: IntoIterator, I::Item: Borrow<K>, K: AsRef<str>,305     pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self
306     where
307         I: IntoIterator,
308         I::Item: Borrow<K>,
309         K: AsRef<str>,
310     {
311         {
312             let string = string(&mut self.target);
313             for key in iter {
314                 let k = key.borrow().as_ref();
315                 append_key_only(string, self.start_position, self.encoding, k);
316             }
317         }
318         self
319     }
320 
321     /// If this serializer was constructed with a string, take and return that string.
322     ///
323     /// ```rust
324     /// use form_urlencoded;
325     /// let encoded: String = form_urlencoded::Serializer::new(String::new())
326     ///     .append_pair("foo", "bar & baz")
327     ///     .append_pair("saison", "Été+hiver")
328     ///     .finish();
329     /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
330     /// ```
331     ///
332     /// Panics if called more than once.
finish(&mut self) -> T::Finished333     pub fn finish(&mut self) -> T::Finished {
334         self.target
335             .take()
336             .expect("url::form_urlencoded::Serializer double finish")
337             .finish()
338     }
339 }
340 
append_separator_if_needed(string: &mut String, start_position: usize)341 fn append_separator_if_needed(string: &mut String, start_position: usize) {
342     if string.len() > start_position {
343         string.push('&')
344     }
345 }
346 
string<T: Target>(target: &mut Option<T>) -> &mut String347 fn string<T: Target>(target: &mut Option<T>) -> &mut String {
348     target
349         .as_mut()
350         .expect("url::form_urlencoded::Serializer finished")
351         .as_mut_string()
352 }
353 
append_pair( string: &mut String, start_position: usize, encoding: EncodingOverride<'_>, name: &str, value: &str, )354 fn append_pair(
355     string: &mut String,
356     start_position: usize,
357     encoding: EncodingOverride<'_>,
358     name: &str,
359     value: &str,
360 ) {
361     append_separator_if_needed(string, start_position);
362     append_encoded(name, string, encoding);
363     string.push('=');
364     append_encoded(value, string, encoding);
365 }
366 
append_key_only( string: &mut String, start_position: usize, encoding: EncodingOverride, name: &str, )367 fn append_key_only(
368     string: &mut String,
369     start_position: usize,
370     encoding: EncodingOverride,
371     name: &str,
372 ) {
373     append_separator_if_needed(string, start_position);
374     append_encoded(name, string, encoding);
375 }
376 
append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>)377 fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) {
378     string.extend(byte_serialize(&encode(encoding, s)))
379 }
380 
encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]>381 pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> {
382     if let Some(o) = encoding_override {
383         return o(input);
384     }
385     input.as_bytes().into()
386 }
387 
decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str>388 pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
389     // Note: This function is duplicated in `percent_encoding/lib.rs`.
390     match input {
391         Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
392         Cow::Owned(bytes) => {
393             match String::from_utf8_lossy(&bytes) {
394                 Cow::Borrowed(utf8) => {
395                     // If from_utf8_lossy returns a Cow::Borrowed, then we can
396                     // be sure our original bytes were valid UTF-8. This is because
397                     // if the bytes were invalid UTF-8 from_utf8_lossy would have
398                     // to allocate a new owned string to back the Cow so it could
399                     // replace invalid bytes with a placeholder.
400 
401                     // First we do a debug_assert to confirm our description above.
402                     let raw_utf8: *const [u8] = utf8.as_bytes();
403                     debug_assert!(raw_utf8 == &*bytes as *const [u8]);
404 
405                     // Given we know the original input bytes are valid UTF-8,
406                     // and we have ownership of those bytes, we re-use them and
407                     // return a Cow::Owned here.
408                     Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
409                 }
410                 Cow::Owned(s) => Cow::Owned(s),
411             }
412         }
413     }
414 }
415 
416 pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;
417