• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 Nicholas Allegra (comex).
2 // Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or
3 // the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be
4 // copied, modified, or distributed except according to those terms.
5 
6 //! Same idea as (but implementation not directly based on) the Python shlex module.  However, this
7 //! implementation does not support any of the Python module's customization because it makes
8 //! parsing slower and is fairly useless.  You only get the default settings of shlex.split, which
9 //! mimic the POSIX shell:
10 //! <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html>
11 //!
12 //! This implementation also deviates from the Python version in not treating `\r` specially, which
13 //! I believe is more compliant.
14 //!
15 //! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes
16 //! directly as a micro-optimization.
17 //!
18 //! Disabling the `std` feature (which is enabled by default) will allow the crate to work in
19 //! `no_std` environments, where the `alloc` crate, and a global allocator, are available.
20 
21 #![cfg_attr(not(feature = "std"), no_std)]
22 
23 extern crate alloc;
24 use alloc::vec::Vec;
25 use alloc::borrow::Cow;
26 use alloc::string::String;
27 #[cfg(test)]
28 use alloc::vec;
29 #[cfg(test)]
30 use alloc::borrow::ToOwned;
31 
32 /// An iterator that takes an input string and splits it into the words using the same syntax as
33 /// the POSIX shell.
34 pub struct Shlex<'a> {
35     in_iter: core::str::Bytes<'a>,
36     /// The number of newlines read so far, plus one.
37     pub line_no: usize,
38     /// An input string is erroneous if it ends while inside a quotation or right after an
39     /// unescaped backslash.  Since Iterator does not have a mechanism to return an error, if that
40     /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to
41     /// true; best to check it after you're done iterating.
42     pub had_error: bool,
43 }
44 
45 impl<'a> Shlex<'a> {
new(in_str: &'a str) -> Self46     pub fn new(in_str: &'a str) -> Self {
47         Shlex {
48             in_iter: in_str.bytes(),
49             line_no: 1,
50             had_error: false,
51         }
52     }
53 
parse_word(&mut self, mut ch: u8) -> Option<String>54     fn parse_word(&mut self, mut ch: u8) -> Option<String> {
55         let mut result: Vec<u8> = Vec::new();
56         loop {
57             match ch as char {
58                 '"' => if let Err(()) = self.parse_double(&mut result) {
59                     self.had_error = true;
60                     return None;
61                 },
62                 '\'' => if let Err(()) = self.parse_single(&mut result) {
63                     self.had_error = true;
64                     return None;
65                 },
66                 '\\' => if let Some(ch2) = self.next_char() {
67                     if ch2 != '\n' as u8 { result.push(ch2); }
68                 } else {
69                     self.had_error = true;
70                     return None;
71                 },
72                 ' ' | '\t' | '\n' => { break; },
73                 _ => { result.push(ch as u8); },
74             }
75             if let Some(ch2) = self.next_char() { ch = ch2; } else { break; }
76         }
77         unsafe { Some(String::from_utf8_unchecked(result)) }
78     }
79 
parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()>80     fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
81         loop {
82             if let Some(ch2) = self.next_char() {
83                 match ch2 as char {
84                     '\\' => {
85                         if let Some(ch3) = self.next_char() {
86                             match ch3 as char {
87                                 // \$ => $
88                                 '$' | '`' | '"' | '\\' => { result.push(ch3); },
89                                 // \<newline> => nothing
90                                 '\n' => {},
91                                 // \x => =x
92                                 _ => { result.push('\\' as u8); result.push(ch3); }
93                             }
94                         } else {
95                             return Err(());
96                         }
97                     },
98                     '"' => { return Ok(()); },
99                     _ => { result.push(ch2); },
100                 }
101             } else {
102                 return Err(());
103             }
104         }
105     }
106 
parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()>107     fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
108         loop {
109             if let Some(ch2) = self.next_char() {
110                 match ch2 as char {
111                     '\'' => { return Ok(()); },
112                     _ => { result.push(ch2); },
113                 }
114             } else {
115                 return Err(());
116             }
117         }
118     }
119 
next_char(&mut self) -> Option<u8>120     fn next_char(&mut self) -> Option<u8> {
121         let res = self.in_iter.next();
122         if res == Some('\n' as u8) { self.line_no += 1; }
123         res
124     }
125 }
126 
127 impl<'a> Iterator for Shlex<'a> {
128     type Item = String;
next(&mut self) -> Option<String>129     fn next(&mut self) -> Option<String> {
130         if let Some(mut ch) = self.next_char() {
131             // skip initial whitespace
132             loop {
133                 match ch as char {
134                     ' ' | '\t' | '\n' => {},
135                     '#' => {
136                         while let Some(ch2) = self.next_char() {
137                             if ch2 as char == '\n' { break; }
138                         }
139                     },
140                     _ => { break; }
141                 }
142                 if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; }
143             }
144             self.parse_word(ch)
145         } else { // no initial character
146             None
147         }
148     }
149 
150 }
151 
152 /// Convenience function that consumes the whole string at once.  Returns None if the input was
153 /// erroneous.
split(in_str: &str) -> Option<Vec<String>>154 pub fn split(in_str: &str) -> Option<Vec<String>> {
155     let mut shl = Shlex::new(in_str);
156     let res = shl.by_ref().collect();
157     if shl.had_error { None } else { Some(res) }
158 }
159 
160 /// Given a single word, return a string suitable to encode it as a shell argument.
quote(in_str: &str) -> Cow<str>161 pub fn quote(in_str: &str) -> Cow<str> {
162     if in_str.len() == 0 {
163         "\"\"".into()
164     } else if in_str.bytes().any(|c| match c as char {
165         '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' |
166         '\r' | '\n' | '*' | '?' | '[' | '#' | '~' | '=' | '%' => true,
167         _ => false
168     }) {
169         let mut out: Vec<u8> = Vec::new();
170         out.push('"' as u8);
171         for c in in_str.bytes() {
172             match c as char {
173                 '$' | '`' | '"' | '\\' => out.push('\\' as u8),
174                 _ => ()
175             }
176             out.push(c);
177         }
178         out.push('"' as u8);
179         unsafe { String::from_utf8_unchecked(out) }.into()
180     } else {
181         in_str.into()
182     }
183 }
184 
185 /// Convenience function that consumes an iterable of words and turns it into a single string,
186 /// quoting words when necessary. Consecutive words will be separated by a single space.
join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String187 pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String {
188     words.into_iter()
189         .map(quote)
190         .collect::<Vec<_>>()
191         .join(" ")
192 }
193 
194 #[cfg(test)]
195 static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[
196     ("foo$baz", Some(&["foo$baz"])),
197     ("foo baz", Some(&["foo", "baz"])),
198     ("foo\"bar\"baz", Some(&["foobarbaz"])),
199     ("foo \"bar\"baz", Some(&["foo", "barbaz"])),
200     ("   foo \nbar", Some(&["foo", "bar"])),
201     ("foo\\\nbar", Some(&["foobar"])),
202     ("\"foo\\\nbar\"", Some(&["foobar"])),
203     ("'baz\\$b'", Some(&["baz\\$b"])),
204     ("'baz\\\''", None),
205     ("\\", None),
206     ("\"\\", None),
207     ("'\\", None),
208     ("\"", None),
209     ("'", None),
210     ("foo #bar\nbaz", Some(&["foo", "baz"])),
211     ("foo #bar", Some(&["foo"])),
212     ("foo#bar", Some(&["foo#bar"])),
213     ("foo\"#bar", None),
214     ("'\\n'", Some(&["\\n"])),
215     ("'\\\\n'", Some(&["\\\\n"])),
216 ];
217 
218 #[test]
test_split()219 fn test_split() {
220     for &(input, output) in SPLIT_TEST_ITEMS {
221         assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect()));
222     }
223 }
224 
225 #[test]
test_lineno()226 fn test_lineno() {
227     let mut sh = Shlex::new("\nfoo\nbar");
228     while let Some(word) = sh.next() {
229         if word == "bar" {
230             assert_eq!(sh.line_no, 3);
231         }
232     }
233 }
234 
235 #[test]
test_quote()236 fn test_quote() {
237     assert_eq!(quote("foobar"), "foobar");
238     assert_eq!(quote("foo bar"), "\"foo bar\"");
239     assert_eq!(quote("\""), "\"\\\"\"");
240     assert_eq!(quote(""), "\"\"");
241 }
242 
243 #[test]
test_join()244 fn test_join() {
245     assert_eq!(join(vec![]), "");
246     assert_eq!(join(vec![""]), "\"\"");
247     assert_eq!(join(vec!["a", "b"]), "a b");
248     assert_eq!(join(vec!["foo bar", "baz"]), "\"foo bar\" baz");
249 }
250