1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8
9 //! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
10 //! http://url.spec.whatwg.org/#application/x-www-form-urlencoded),
11 //! as used by HTML forms.
12 //!
13 //! Converts between a string (such as an URL’s query string)
14 //! and a sequence of (name, value) pairs.
15
16 #[macro_use]
17 extern crate matches;
18
19 use percent_encoding::{percent_decode, percent_encode_byte};
20 use std::borrow::{Borrow, Cow};
21 use std::str;
22
23 /// Convert a byte string in the `application/x-www-form-urlencoded` syntax
24 /// into a iterator of (name, value) pairs.
25 ///
26 /// Use `parse(input.as_bytes())` to parse a `&str` string.
27 ///
28 /// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
29 /// converted to `[("#first", "%try%")]`.
30 #[inline]
parse(input: &[u8]) -> Parse<'_>31 pub fn parse(input: &[u8]) -> Parse<'_> {
32 Parse { input }
33 }
34 /// The return type of `parse()`.
35 #[derive(Copy, Clone)]
36 pub struct Parse<'a> {
37 input: &'a [u8],
38 }
39
40 impl<'a> Iterator for Parse<'a> {
41 type Item = (Cow<'a, str>, Cow<'a, str>);
42
next(&mut self) -> Option<Self::Item>43 fn next(&mut self) -> Option<Self::Item> {
44 loop {
45 if self.input.is_empty() {
46 return None;
47 }
48 let mut split2 = self.input.splitn(2, |&b| b == b'&');
49 let sequence = split2.next().unwrap();
50 self.input = split2.next().unwrap_or(&[][..]);
51 if sequence.is_empty() {
52 continue;
53 }
54 let mut split2 = sequence.splitn(2, |&b| b == b'=');
55 let name = split2.next().unwrap();
56 let value = split2.next().unwrap_or(&[][..]);
57 return Some((decode(name), decode(value)));
58 }
59 }
60 }
61
decode(input: &[u8]) -> Cow<'_, str>62 fn decode(input: &[u8]) -> Cow<'_, str> {
63 let replaced = replace_plus(input);
64 decode_utf8_lossy(match percent_decode(&replaced).into() {
65 Cow::Owned(vec) => Cow::Owned(vec),
66 Cow::Borrowed(_) => replaced,
67 })
68 }
69
70 /// Replace b'+' with b' '
replace_plus(input: &[u8]) -> Cow<'_, [u8]>71 fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> {
72 match input.iter().position(|&b| b == b'+') {
73 None => Cow::Borrowed(input),
74 Some(first_position) => {
75 let mut replaced = input.to_owned();
76 replaced[first_position] = b' ';
77 for byte in &mut replaced[first_position + 1..] {
78 if *byte == b'+' {
79 *byte = b' ';
80 }
81 }
82 Cow::Owned(replaced)
83 }
84 }
85 }
86
87 impl<'a> Parse<'a> {
88 /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
into_owned(self) -> ParseIntoOwned<'a>89 pub fn into_owned(self) -> ParseIntoOwned<'a> {
90 ParseIntoOwned { inner: self }
91 }
92 }
93
94 /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
95 pub struct ParseIntoOwned<'a> {
96 inner: Parse<'a>,
97 }
98
99 impl<'a> Iterator for ParseIntoOwned<'a> {
100 type Item = (String, String);
101
next(&mut self) -> Option<Self::Item>102 fn next(&mut self) -> Option<Self::Item> {
103 self.inner
104 .next()
105 .map(|(k, v)| (k.into_owned(), v.into_owned()))
106 }
107 }
108
109 /// The [`application/x-www-form-urlencoded` byte serializer](
110 /// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
111 ///
112 /// Return an iterator of `&str` slices.
byte_serialize(input: &[u8]) -> ByteSerialize<'_>113 pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
114 ByteSerialize { bytes: input }
115 }
116
117 /// Return value of `byte_serialize()`.
118 #[derive(Debug)]
119 pub struct ByteSerialize<'a> {
120 bytes: &'a [u8],
121 }
122
byte_serialized_unchanged(byte: u8) -> bool123 fn byte_serialized_unchanged(byte: u8) -> bool {
124 matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z')
125 }
126
127 impl<'a> Iterator for ByteSerialize<'a> {
128 type Item = &'a str;
129
next(&mut self) -> Option<&'a str>130 fn next(&mut self) -> Option<&'a str> {
131 if let Some((&first, tail)) = self.bytes.split_first() {
132 if !byte_serialized_unchanged(first) {
133 self.bytes = tail;
134 return Some(if first == b' ' {
135 "+"
136 } else {
137 percent_encode_byte(first)
138 });
139 }
140 let position = tail.iter().position(|&b| !byte_serialized_unchanged(b));
141 let (unchanged_slice, remaining) = match position {
142 // 1 for first_byte + i unchanged in tail
143 Some(i) => self.bytes.split_at(1 + i),
144 None => (self.bytes, &[][..]),
145 };
146 self.bytes = remaining;
147 // This unsafe is appropriate because we have already checked these
148 // bytes in byte_serialized_unchanged, which checks for a subset
149 // of UTF-8. So we know these bytes are valid UTF-8, and doing
150 // another UTF-8 check would be wasteful.
151 Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
152 } else {
153 None
154 }
155 }
156
size_hint(&self) -> (usize, Option<usize>)157 fn size_hint(&self) -> (usize, Option<usize>) {
158 if self.bytes.is_empty() {
159 (0, Some(0))
160 } else {
161 (1, Some(self.bytes.len()))
162 }
163 }
164 }
165
166 /// The [`application/x-www-form-urlencoded` serializer](
167 /// https://url.spec.whatwg.org/#concept-urlencoded-serializer).
168 pub struct Serializer<'a, T: Target> {
169 target: Option<T>,
170 start_position: usize,
171 encoding: EncodingOverride<'a>,
172 }
173
174 pub trait Target {
as_mut_string(&mut self) -> &mut String175 fn as_mut_string(&mut self) -> &mut String;
finish(self) -> Self::Finished176 fn finish(self) -> Self::Finished;
177 type Finished;
178 }
179
180 impl Target for String {
as_mut_string(&mut self) -> &mut String181 fn as_mut_string(&mut self) -> &mut String {
182 self
183 }
finish(self) -> Self184 fn finish(self) -> Self {
185 self
186 }
187 type Finished = Self;
188 }
189
190 impl<'a> Target for &'a mut String {
as_mut_string(&mut self) -> &mut String191 fn as_mut_string(&mut self) -> &mut String {
192 &mut **self
193 }
finish(self) -> Self194 fn finish(self) -> Self {
195 self
196 }
197 type Finished = Self;
198 }
199
200 impl<'a, T: Target> Serializer<'a, T> {
201 /// Create a new `application/x-www-form-urlencoded` serializer for the given target.
202 ///
203 /// If the target is non-empty,
204 /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
new(target: T) -> Self205 pub fn new(target: T) -> Self {
206 Self::for_suffix(target, 0)
207 }
208
209 /// Create a new `application/x-www-form-urlencoded` serializer
210 /// for a suffix of the given target.
211 ///
212 /// If that suffix is non-empty,
213 /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
for_suffix(mut target: T, start_position: usize) -> Self214 pub fn for_suffix(mut target: T, start_position: usize) -> Self {
215 if target.as_mut_string().len() < start_position {
216 panic!(
217 "invalid length {} for target of length {}",
218 start_position,
219 target.as_mut_string().len()
220 );
221 }
222
223 Serializer {
224 target: Some(target),
225 start_position,
226 encoding: None,
227 }
228 }
229
230 /// Remove any existing name/value pair.
231 ///
232 /// Panics if called after `.finish()`.
clear(&mut self) -> &mut Self233 pub fn clear(&mut self) -> &mut Self {
234 string(&mut self.target).truncate(self.start_position);
235 self
236 }
237
238 /// Set the character encoding to be used for names and values before percent-encoding.
encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self239 pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self {
240 self.encoding = new;
241 self
242 }
243
244 /// Serialize and append a name/value pair.
245 ///
246 /// Panics if called after `.finish()`.
append_pair(&mut self, name: &str, value: &str) -> &mut Self247 pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
248 append_pair(
249 string(&mut self.target),
250 self.start_position,
251 self.encoding,
252 name,
253 value,
254 );
255 self
256 }
257
258 /// Serialize and append a name of parameter without any value.
259 ///
260 /// Panics if called after `.finish()`.
append_key_only(&mut self, name: &str) -> &mut Self261 pub fn append_key_only(&mut self, name: &str) -> &mut Self {
262 append_key_only(
263 string(&mut self.target),
264 self.start_position,
265 self.encoding,
266 name,
267 );
268 self
269 }
270
271 /// Serialize and append a number of name/value pairs.
272 ///
273 /// This simply calls `append_pair` repeatedly.
274 /// This can be more convenient, so the user doesn’t need to introduce a block
275 /// to limit the scope of `Serializer`’s borrow of its string.
276 ///
277 /// Panics if called after `.finish()`.
extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>,278 pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self
279 where
280 I: IntoIterator,
281 I::Item: Borrow<(K, V)>,
282 K: AsRef<str>,
283 V: AsRef<str>,
284 {
285 {
286 let string = string(&mut self.target);
287 for pair in iter {
288 let &(ref k, ref v) = pair.borrow();
289 append_pair(
290 string,
291 self.start_position,
292 self.encoding,
293 k.as_ref(),
294 v.as_ref(),
295 );
296 }
297 }
298 self
299 }
300
301 /// Serialize and append a number of names without values.
302 ///
303 /// This simply calls `append_key_only` repeatedly.
304 /// This can be more convenient, so the user doesn’t need to introduce a block
305 /// to limit the scope of `Serializer`’s borrow of its string.
306 ///
307 /// Panics if called after `.finish()`.
extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self where I: IntoIterator, I::Item: Borrow<K>, K: AsRef<str>,308 pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self
309 where
310 I: IntoIterator,
311 I::Item: Borrow<K>,
312 K: AsRef<str>,
313 {
314 {
315 let string = string(&mut self.target);
316 for key in iter {
317 let k = key.borrow().as_ref();
318 append_key_only(string, self.start_position, self.encoding, k);
319 }
320 }
321 self
322 }
323
324 /// If this serializer was constructed with a string, take and return that string.
325 ///
326 /// ```rust
327 /// use form_urlencoded;
328 /// let encoded: String = form_urlencoded::Serializer::new(String::new())
329 /// .append_pair("foo", "bar & baz")
330 /// .append_pair("saison", "Été+hiver")
331 /// .finish();
332 /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
333 /// ```
334 ///
335 /// Panics if called more than once.
finish(&mut self) -> T::Finished336 pub fn finish(&mut self) -> T::Finished {
337 self.target
338 .take()
339 .expect("url::form_urlencoded::Serializer double finish")
340 .finish()
341 }
342 }
343
append_separator_if_needed(string: &mut String, start_position: usize)344 fn append_separator_if_needed(string: &mut String, start_position: usize) {
345 if string.len() > start_position {
346 string.push('&')
347 }
348 }
349
string<T: Target>(target: &mut Option<T>) -> &mut String350 fn string<T: Target>(target: &mut Option<T>) -> &mut String {
351 target
352 .as_mut()
353 .expect("url::form_urlencoded::Serializer finished")
354 .as_mut_string()
355 }
356
append_pair( string: &mut String, start_position: usize, encoding: EncodingOverride<'_>, name: &str, value: &str, )357 fn append_pair(
358 string: &mut String,
359 start_position: usize,
360 encoding: EncodingOverride<'_>,
361 name: &str,
362 value: &str,
363 ) {
364 append_separator_if_needed(string, start_position);
365 append_encoded(name, string, encoding);
366 string.push('=');
367 append_encoded(value, string, encoding);
368 }
369
append_key_only( string: &mut String, start_position: usize, encoding: EncodingOverride, name: &str, )370 fn append_key_only(
371 string: &mut String,
372 start_position: usize,
373 encoding: EncodingOverride,
374 name: &str,
375 ) {
376 append_separator_if_needed(string, start_position);
377 append_encoded(name, string, encoding);
378 }
379
append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>)380 fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) {
381 string.extend(byte_serialize(&encode(encoding, s)))
382 }
383
encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]>384 pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> {
385 if let Some(o) = encoding_override {
386 return o(input);
387 }
388 input.as_bytes().into()
389 }
390
decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str>391 pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
392 // Note: This function is duplicated in `percent_encoding/lib.rs`.
393 match input {
394 Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
395 Cow::Owned(bytes) => {
396 match String::from_utf8_lossy(&bytes) {
397 Cow::Borrowed(utf8) => {
398 // If from_utf8_lossy returns a Cow::Borrowed, then we can
399 // be sure our original bytes were valid UTF-8. This is because
400 // if the bytes were invalid UTF-8 from_utf8_lossy would have
401 // to allocate a new owned string to back the Cow so it could
402 // replace invalid bytes with a placeholder.
403
404 // First we do a debug_assert to confirm our description above.
405 let raw_utf8: *const [u8];
406 raw_utf8 = utf8.as_bytes();
407 debug_assert!(raw_utf8 == &*bytes as *const [u8]);
408
409 // Given we know the original input bytes are valid UTF-8,
410 // and we have ownership of those bytes, we re-use them and
411 // return a Cow::Owned here.
412 Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
413 }
414 Cow::Owned(s) => Cow::Owned(s),
415 }
416 }
417 }
418 }
419
420 pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;
421