• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::{
2     error::{ParseError, Reason},
3     expression::{ExprNode, Expression, ExpressionReq, Operator},
4     lexer::{Lexer, Token},
5     LicenseItem, LicenseReq, ParseMode,
6 };
7 use smallvec::SmallVec;
8 
9 impl Expression {
10     /// Given a license expression, attempts to parse and validate it as a valid
11     /// SPDX expression. Uses `ParseMode::Strict`.
12     ///
13     /// The validation can fail for many reasons:
14     /// * The expression contains invalid characters
15     /// * An unknown/invalid license or exception identifier was found. Only
16     ///     [SPDX short identifiers](https://spdx.org/ids) are allowed
17     /// * The expression contained unbalanced parentheses
18     /// * A license or exception immediately follows another license or exception, without
19     ///     a valid AND, OR, or WITH operator separating them
20     /// * An AND, OR, or WITH doesn't have a license or `)` preceding it
21     ///
22     /// ```
23     /// spdx::Expression::parse("MIT OR Apache-2.0 WITH LLVM-exception").unwrap();
24     /// ```
parse(original: &str) -> Result<Self, ParseError>25     pub fn parse(original: &str) -> Result<Self, ParseError> {
26         Self::parse_mode(original, ParseMode::STRICT)
27     }
28 
29     /// Canonicalizes the input expression into a form that can be parsed with
30     /// [`ParseMode::STRICT`]
31     ///
32     /// ## Transforms
33     ///
34     /// 1. '/' is replaced with ' OR '
35     /// 1. Lower-cased operators ('or', 'and', 'with') are upper-cased
36     /// 1. '+' is tranformed to `-or-later` for GNU licenses
37     /// 1. Invalid/imprecise license identifiers (eg. `apache2`) are replaced
38     ///     with their valid identifiers
39     ///
40     /// If the provided expression is not modified then `None` is returned
41     ///
42     /// Note that this only does fixup of otherwise valid expressions, passing
43     /// the resulting string to [`Expression::parse`] can still result in
44     /// additional parse errors, eg. unbalanced parentheses
45     ///
46     /// ```
47     /// assert_eq!(spdx::Expression::canonicalize("apache with LLVM-exception/gpl-3.0+").unwrap().unwrap(), "Apache-2.0 WITH LLVM-exception OR GPL-3.0-or-later");
48     /// ```
canonicalize(original: &str) -> Result<Option<String>, ParseError>49     pub fn canonicalize(original: &str) -> Result<Option<String>, ParseError> {
50         let mut can = String::with_capacity(original.len());
51 
52         let lexer = Lexer::new_mode(original, ParseMode::LAX);
53 
54         // Keep track if the last license id is a GNU license that uses the -or-later
55         // convention rather than the + like all other licenses
56         let mut last_is_gnu = false;
57         for tok in lexer {
58             let tok = tok?;
59 
60             match tok.token {
61                 Token::Spdx(id) => {
62                     last_is_gnu = id.is_gnu();
63                     can.push_str(id.name);
64                 }
65                 Token::And => can.push_str(" AND "),
66                 Token::Or => can.push_str(" OR "),
67                 Token::With => can.push_str(" WITH "),
68                 Token::Plus => {
69                     if last_is_gnu {
70                         can.push_str("-or-later");
71                     } else {
72                         can.push('+');
73                     }
74                 }
75                 Token::OpenParen => can.push('('),
76                 Token::CloseParen => can.push(')'),
77                 Token::Exception(exc) => can.push_str(exc.name),
78                 Token::LicenseRef { doc_ref, lic_ref } => {
79                     if let Some(dr) = doc_ref {
80                         can.push_str("DocumentRef-");
81                         can.push_str(dr);
82                         can.push(':');
83                     }
84 
85                     can.push_str("LicenseRef-");
86                     can.push_str(lic_ref);
87                 }
88             }
89         }
90 
91         Ok((can != original).then_some(can))
92     }
93 
94     /// Parses an expression with the specified `ParseMode`. With
95     /// `ParseMode::Lax` it permits some non-SPDX syntax, such as imprecise
96     /// license names and "/" used instead of "OR" in exprssions.
97     ///
98     /// ```
99     /// spdx::Expression::parse_mode(
100     ///     "mit/Apache-2.0 WITH LLVM-exception",
101     ///     spdx::ParseMode::LAX
102     /// ).unwrap();
103     /// ```
parse_mode(original: &str, mode: ParseMode) -> Result<Self, ParseError>104     pub fn parse_mode(original: &str, mode: ParseMode) -> Result<Self, ParseError> {
105         // Operator precedence in SPDX 2.1
106         // +
107         // WITH
108         // AND
109         // OR
110         #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
111         enum Op {
112             //Plus,
113             //With,
114             And,
115             Or,
116             Open,
117         }
118 
119         struct OpAndSpan {
120             op: Op,
121             span: std::ops::Range<usize>,
122         }
123 
124         let lexer = Lexer::new_mode(original, mode);
125         let mut op_stack = SmallVec::<[OpAndSpan; 3]>::new();
126         let mut expr_queue = SmallVec::<[ExprNode; 5]>::new();
127 
128         // Keep track of the last token to simplify validation of the token stream
129         let mut last_token: Option<Token<'_>> = None;
130 
131         let apply_op = |op: OpAndSpan, q: &mut SmallVec<[ExprNode; 5]>| {
132             let op = match op.op {
133                 Op::And => Operator::And,
134                 Op::Or => Operator::Or,
135                 Op::Open => unreachable!(),
136             };
137 
138             q.push(ExprNode::Op(op));
139             Ok(())
140         };
141 
142         let make_err_for_token = |last_token: Option<Token<'_>>, span: std::ops::Range<usize>| {
143             let expected: &[&str] = match last_token {
144                 None | Some(Token::And | Token::Or | Token::OpenParen) => &["<license>", "("],
145                 Some(Token::CloseParen) => &["AND", "OR"],
146                 Some(Token::Exception(_)) => &["AND", "OR", ")"],
147                 Some(Token::Spdx(_)) => &["AND", "OR", "WITH", ")", "+"],
148                 Some(Token::LicenseRef { .. } | Token::Plus) => &["AND", "OR", "WITH", ")"],
149                 Some(Token::With) => &["<exception>"],
150             };
151 
152             Err(ParseError {
153                 original: original.to_owned(),
154                 span,
155                 reason: Reason::Unexpected(expected),
156             })
157         };
158 
159         // Basic implementation of the https://en.wikipedia.org/wiki/Shunting-yard_algorithm
160         'outer: for tok in lexer {
161             let lt = tok?;
162             match &lt.token {
163                 Token::Spdx(id) => match last_token {
164                     None | Some(Token::And | Token::Or | Token::OpenParen) => {
165                         expr_queue.push(ExprNode::Req(ExpressionReq {
166                             req: LicenseReq::from(*id),
167                             span: lt.span.start as u32..lt.span.end as u32,
168                         }));
169                     }
170                     _ => return make_err_for_token(last_token, lt.span),
171                 },
172                 Token::LicenseRef { doc_ref, lic_ref } => match last_token {
173                     None | Some(Token::And | Token::Or | Token::OpenParen) => {
174                         expr_queue.push(ExprNode::Req(ExpressionReq {
175                             req: LicenseReq {
176                                 license: LicenseItem::Other {
177                                     doc_ref: doc_ref.map(String::from),
178                                     lic_ref: String::from(*lic_ref),
179                                 },
180                                 exception: None,
181                             },
182                             span: lt.span.start as u32..lt.span.end as u32,
183                         }));
184                     }
185                     _ => return make_err_for_token(last_token, lt.span),
186                 },
187                 Token::Plus => match last_token {
188                     Some(Token::Spdx(_)) => match expr_queue.last_mut().unwrap() {
189                         ExprNode::Req(ExpressionReq {
190                             req:
191                                 LicenseReq {
192                                     license: LicenseItem::Spdx { or_later, id },
193                                     ..
194                                 },
195                             ..
196                         }) => {
197                             // Handle GNU licenses differently, as they should *NOT* be used with the `+`
198                             if !mode.allow_postfix_plus_on_gpl && id.is_gnu() {
199                                 return Err(ParseError {
200                                     original: original.to_owned(),
201                                     span: lt.span,
202                                     reason: Reason::GnuNoPlus,
203                                 });
204                             }
205 
206                             *or_later = true;
207                         }
208                         _ => unreachable!(),
209                     },
210                     _ => return make_err_for_token(last_token, lt.span),
211                 },
212                 Token::With => match last_token {
213                     Some(Token::Spdx(_) | Token::LicenseRef { .. } | Token::Plus) => {}
214                     _ => return make_err_for_token(last_token, lt.span),
215                 },
216                 Token::Or | Token::And => match last_token {
217                     Some(
218                         Token::Spdx(_)
219                         | Token::LicenseRef { .. }
220                         | Token::CloseParen
221                         | Token::Exception(_)
222                         | Token::Plus,
223                     ) => {
224                         let new_op = match lt.token {
225                             Token::Or => Op::Or,
226                             Token::And => Op::And,
227                             _ => unreachable!(),
228                         };
229 
230                         while let Some(op) = op_stack.last() {
231                             match &op.op {
232                                 Op::Open => break,
233                                 top => {
234                                     if *top < new_op {
235                                         let top = op_stack.pop().unwrap();
236 
237                                         match top.op {
238                                             Op::And | Op::Or => apply_op(top, &mut expr_queue)?,
239                                             Op::Open => unreachable!(),
240                                         }
241                                     } else {
242                                         break;
243                                     }
244                                 }
245                             }
246                         }
247 
248                         op_stack.push(OpAndSpan {
249                             op: new_op,
250                             span: lt.span,
251                         });
252                     }
253                     _ => return make_err_for_token(last_token, lt.span),
254                 },
255                 Token::OpenParen => match last_token {
256                     None | Some(Token::And | Token::Or | Token::OpenParen) => {
257                         op_stack.push(OpAndSpan {
258                             op: Op::Open,
259                             span: lt.span,
260                         });
261                     }
262                     _ => return make_err_for_token(last_token, lt.span),
263                 },
264                 Token::CloseParen => {
265                     match last_token {
266                         Some(
267                             Token::Spdx(_)
268                             | Token::LicenseRef { .. }
269                             | Token::Plus
270                             | Token::Exception(_)
271                             | Token::CloseParen,
272                         ) => {
273                             while let Some(top) = op_stack.pop() {
274                                 match top.op {
275                                     Op::And | Op::Or => apply_op(top, &mut expr_queue)?,
276                                     Op::Open => {
277                                         // This is the only place we go back to the top of the outer loop,
278                                         // so make sure we correctly record this token
279                                         last_token = Some(Token::CloseParen);
280                                         continue 'outer;
281                                     }
282                                 }
283                             }
284 
285                             // We didn't have an opening parentheses if we get here
286                             return Err(ParseError {
287                                 original: original.to_owned(),
288                                 span: lt.span,
289                                 reason: Reason::UnopenedParens,
290                             });
291                         }
292                         _ => return make_err_for_token(last_token, lt.span),
293                     }
294                 }
295                 Token::Exception(exc) => match last_token {
296                     Some(Token::With) => match expr_queue.last_mut() {
297                         Some(ExprNode::Req(lic)) => {
298                             lic.req.exception = Some(*exc);
299                         }
300                         _ => unreachable!(),
301                     },
302                     _ => return make_err_for_token(last_token, lt.span),
303                 },
304             }
305 
306             last_token = Some(lt.token);
307         }
308 
309         // Validate that the terminating token is valid
310         match last_token {
311             Some(
312                 Token::Spdx(_)
313                 | Token::LicenseRef { .. }
314                 | Token::Exception(_)
315                 | Token::CloseParen
316                 | Token::Plus,
317             ) => {}
318             // We have to have at least one valid license requirement
319             None => {
320                 return Err(ParseError {
321                     original: original.to_owned(),
322                     span: 0..original.len(),
323                     reason: Reason::Empty,
324                 });
325             }
326             Some(_) => return make_err_for_token(last_token, original.len()..original.len()),
327         }
328 
329         while let Some(top) = op_stack.pop() {
330             match top.op {
331                 Op::And | Op::Or => apply_op(top, &mut expr_queue)?,
332                 Op::Open => {
333                     return Err(ParseError {
334                         original: original.to_owned(),
335                         span: top.span,
336                         reason: Reason::UnclosedParens,
337                     });
338                 }
339             }
340         }
341 
342         // TODO: Investigate using https://github.com/oli-obk/quine-mc_cluskey to simplify
343         // expressions, but not really critical. Just cool.
344 
345         Ok(Expression {
346             original: original.to_owned(),
347             expr: expr_queue,
348         })
349     }
350 }
351