1 use crate::{ 2 error::{ParseError, Reason}, 3 expression::{ExprNode, Expression, ExpressionReq, Operator}, 4 lexer::{Lexer, Token}, 5 LicenseItem, LicenseReq, ParseMode, 6 }; 7 use smallvec::SmallVec; 8 9 impl Expression { 10 /// Given a license expression, attempts to parse and validate it as a valid 11 /// SPDX expression. Uses `ParseMode::Strict`. 12 /// 13 /// The validation can fail for many reasons: 14 /// * The expression contains invalid characters 15 /// * An unknown/invalid license or exception identifier was found. Only 16 /// [SPDX short identifiers](https://spdx.org/ids) are allowed 17 /// * The expression contained unbalanced parentheses 18 /// * A license or exception immediately follows another license or exception, without 19 /// a valid AND, OR, or WITH operator separating them 20 /// * An AND, OR, or WITH doesn't have a license or `)` preceding it 21 /// 22 /// ``` 23 /// spdx::Expression::parse("MIT OR Apache-2.0 WITH LLVM-exception").unwrap(); 24 /// ``` parse(original: &str) -> Result<Self, ParseError>25 pub fn parse(original: &str) -> Result<Self, ParseError> { 26 Self::parse_mode(original, ParseMode::STRICT) 27 } 28 29 /// Canonicalizes the input expression into a form that can be parsed with 30 /// [`ParseMode::STRICT`] 31 /// 32 /// ## Transforms 33 /// 34 /// 1. '/' is replaced with ' OR ' 35 /// 1. Lower-cased operators ('or', 'and', 'with') are upper-cased 36 /// 1. '+' is tranformed to `-or-later` for GNU licenses 37 /// 1. Invalid/imprecise license identifiers (eg. `apache2`) are replaced 38 /// with their valid identifiers 39 /// 40 /// If the provided expression is not modified then `None` is returned 41 /// 42 /// Note that this only does fixup of otherwise valid expressions, passing 43 /// the resulting string to [`Expression::parse`] can still result in 44 /// additional parse errors, eg. unbalanced parentheses 45 /// 46 /// ``` 47 /// assert_eq!(spdx::Expression::canonicalize("apache with LLVM-exception/gpl-3.0+").unwrap().unwrap(), "Apache-2.0 WITH LLVM-exception OR GPL-3.0-or-later"); 48 /// ``` canonicalize(original: &str) -> Result<Option<String>, ParseError>49 pub fn canonicalize(original: &str) -> Result<Option<String>, ParseError> { 50 let mut can = String::with_capacity(original.len()); 51 52 let lexer = Lexer::new_mode(original, ParseMode::LAX); 53 54 // Keep track if the last license id is a GNU license that uses the -or-later 55 // convention rather than the + like all other licenses 56 let mut last_is_gnu = false; 57 for tok in lexer { 58 let tok = tok?; 59 60 match tok.token { 61 Token::Spdx(id) => { 62 last_is_gnu = id.is_gnu(); 63 can.push_str(id.name); 64 } 65 Token::And => can.push_str(" AND "), 66 Token::Or => can.push_str(" OR "), 67 Token::With => can.push_str(" WITH "), 68 Token::Plus => { 69 if last_is_gnu { 70 can.push_str("-or-later"); 71 } else { 72 can.push('+'); 73 } 74 } 75 Token::OpenParen => can.push('('), 76 Token::CloseParen => can.push(')'), 77 Token::Exception(exc) => can.push_str(exc.name), 78 Token::LicenseRef { doc_ref, lic_ref } => { 79 if let Some(dr) = doc_ref { 80 can.push_str("DocumentRef-"); 81 can.push_str(dr); 82 can.push(':'); 83 } 84 85 can.push_str("LicenseRef-"); 86 can.push_str(lic_ref); 87 } 88 } 89 } 90 91 Ok((can != original).then_some(can)) 92 } 93 94 /// Parses an expression with the specified `ParseMode`. With 95 /// `ParseMode::Lax` it permits some non-SPDX syntax, such as imprecise 96 /// license names and "/" used instead of "OR" in exprssions. 97 /// 98 /// ``` 99 /// spdx::Expression::parse_mode( 100 /// "mit/Apache-2.0 WITH LLVM-exception", 101 /// spdx::ParseMode::LAX 102 /// ).unwrap(); 103 /// ``` parse_mode(original: &str, mode: ParseMode) -> Result<Self, ParseError>104 pub fn parse_mode(original: &str, mode: ParseMode) -> Result<Self, ParseError> { 105 // Operator precedence in SPDX 2.1 106 // + 107 // WITH 108 // AND 109 // OR 110 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] 111 enum Op { 112 //Plus, 113 //With, 114 And, 115 Or, 116 Open, 117 } 118 119 struct OpAndSpan { 120 op: Op, 121 span: std::ops::Range<usize>, 122 } 123 124 let lexer = Lexer::new_mode(original, mode); 125 let mut op_stack = SmallVec::<[OpAndSpan; 3]>::new(); 126 let mut expr_queue = SmallVec::<[ExprNode; 5]>::new(); 127 128 // Keep track of the last token to simplify validation of the token stream 129 let mut last_token: Option<Token<'_>> = None; 130 131 let apply_op = |op: OpAndSpan, q: &mut SmallVec<[ExprNode; 5]>| { 132 let op = match op.op { 133 Op::And => Operator::And, 134 Op::Or => Operator::Or, 135 Op::Open => unreachable!(), 136 }; 137 138 q.push(ExprNode::Op(op)); 139 Ok(()) 140 }; 141 142 let make_err_for_token = |last_token: Option<Token<'_>>, span: std::ops::Range<usize>| { 143 let expected: &[&str] = match last_token { 144 None | Some(Token::And | Token::Or | Token::OpenParen) => &["<license>", "("], 145 Some(Token::CloseParen) => &["AND", "OR"], 146 Some(Token::Exception(_)) => &["AND", "OR", ")"], 147 Some(Token::Spdx(_)) => &["AND", "OR", "WITH", ")", "+"], 148 Some(Token::LicenseRef { .. } | Token::Plus) => &["AND", "OR", "WITH", ")"], 149 Some(Token::With) => &["<exception>"], 150 }; 151 152 Err(ParseError { 153 original: original.to_owned(), 154 span, 155 reason: Reason::Unexpected(expected), 156 }) 157 }; 158 159 // Basic implementation of the https://en.wikipedia.org/wiki/Shunting-yard_algorithm 160 'outer: for tok in lexer { 161 let lt = tok?; 162 match <.token { 163 Token::Spdx(id) => match last_token { 164 None | Some(Token::And | Token::Or | Token::OpenParen) => { 165 expr_queue.push(ExprNode::Req(ExpressionReq { 166 req: LicenseReq::from(*id), 167 span: lt.span.start as u32..lt.span.end as u32, 168 })); 169 } 170 _ => return make_err_for_token(last_token, lt.span), 171 }, 172 Token::LicenseRef { doc_ref, lic_ref } => match last_token { 173 None | Some(Token::And | Token::Or | Token::OpenParen) => { 174 expr_queue.push(ExprNode::Req(ExpressionReq { 175 req: LicenseReq { 176 license: LicenseItem::Other { 177 doc_ref: doc_ref.map(String::from), 178 lic_ref: String::from(*lic_ref), 179 }, 180 exception: None, 181 }, 182 span: lt.span.start as u32..lt.span.end as u32, 183 })); 184 } 185 _ => return make_err_for_token(last_token, lt.span), 186 }, 187 Token::Plus => match last_token { 188 Some(Token::Spdx(_)) => match expr_queue.last_mut().unwrap() { 189 ExprNode::Req(ExpressionReq { 190 req: 191 LicenseReq { 192 license: LicenseItem::Spdx { or_later, id }, 193 .. 194 }, 195 .. 196 }) => { 197 // Handle GNU licenses differently, as they should *NOT* be used with the `+` 198 if !mode.allow_postfix_plus_on_gpl && id.is_gnu() { 199 return Err(ParseError { 200 original: original.to_owned(), 201 span: lt.span, 202 reason: Reason::GnuNoPlus, 203 }); 204 } 205 206 *or_later = true; 207 } 208 _ => unreachable!(), 209 }, 210 _ => return make_err_for_token(last_token, lt.span), 211 }, 212 Token::With => match last_token { 213 Some(Token::Spdx(_) | Token::LicenseRef { .. } | Token::Plus) => {} 214 _ => return make_err_for_token(last_token, lt.span), 215 }, 216 Token::Or | Token::And => match last_token { 217 Some( 218 Token::Spdx(_) 219 | Token::LicenseRef { .. } 220 | Token::CloseParen 221 | Token::Exception(_) 222 | Token::Plus, 223 ) => { 224 let new_op = match lt.token { 225 Token::Or => Op::Or, 226 Token::And => Op::And, 227 _ => unreachable!(), 228 }; 229 230 while let Some(op) = op_stack.last() { 231 match &op.op { 232 Op::Open => break, 233 top => { 234 if *top < new_op { 235 let top = op_stack.pop().unwrap(); 236 237 match top.op { 238 Op::And | Op::Or => apply_op(top, &mut expr_queue)?, 239 Op::Open => unreachable!(), 240 } 241 } else { 242 break; 243 } 244 } 245 } 246 } 247 248 op_stack.push(OpAndSpan { 249 op: new_op, 250 span: lt.span, 251 }); 252 } 253 _ => return make_err_for_token(last_token, lt.span), 254 }, 255 Token::OpenParen => match last_token { 256 None | Some(Token::And | Token::Or | Token::OpenParen) => { 257 op_stack.push(OpAndSpan { 258 op: Op::Open, 259 span: lt.span, 260 }); 261 } 262 _ => return make_err_for_token(last_token, lt.span), 263 }, 264 Token::CloseParen => { 265 match last_token { 266 Some( 267 Token::Spdx(_) 268 | Token::LicenseRef { .. } 269 | Token::Plus 270 | Token::Exception(_) 271 | Token::CloseParen, 272 ) => { 273 while let Some(top) = op_stack.pop() { 274 match top.op { 275 Op::And | Op::Or => apply_op(top, &mut expr_queue)?, 276 Op::Open => { 277 // This is the only place we go back to the top of the outer loop, 278 // so make sure we correctly record this token 279 last_token = Some(Token::CloseParen); 280 continue 'outer; 281 } 282 } 283 } 284 285 // We didn't have an opening parentheses if we get here 286 return Err(ParseError { 287 original: original.to_owned(), 288 span: lt.span, 289 reason: Reason::UnopenedParens, 290 }); 291 } 292 _ => return make_err_for_token(last_token, lt.span), 293 } 294 } 295 Token::Exception(exc) => match last_token { 296 Some(Token::With) => match expr_queue.last_mut() { 297 Some(ExprNode::Req(lic)) => { 298 lic.req.exception = Some(*exc); 299 } 300 _ => unreachable!(), 301 }, 302 _ => return make_err_for_token(last_token, lt.span), 303 }, 304 } 305 306 last_token = Some(lt.token); 307 } 308 309 // Validate that the terminating token is valid 310 match last_token { 311 Some( 312 Token::Spdx(_) 313 | Token::LicenseRef { .. } 314 | Token::Exception(_) 315 | Token::CloseParen 316 | Token::Plus, 317 ) => {} 318 // We have to have at least one valid license requirement 319 None => { 320 return Err(ParseError { 321 original: original.to_owned(), 322 span: 0..original.len(), 323 reason: Reason::Empty, 324 }); 325 } 326 Some(_) => return make_err_for_token(last_token, original.len()..original.len()), 327 } 328 329 while let Some(top) = op_stack.pop() { 330 match top.op { 331 Op::And | Op::Or => apply_op(top, &mut expr_queue)?, 332 Op::Open => { 333 return Err(ParseError { 334 original: original.to_owned(), 335 span: top.span, 336 reason: Reason::UnclosedParens, 337 }); 338 } 339 } 340 } 341 342 // TODO: Investigate using https://github.com/oli-obk/quine-mc_cluskey to simplify 343 // expressions, but not really critical. Just cool. 344 345 Ok(Expression { 346 original: original.to_owned(), 347 expr: expr_queue, 348 }) 349 } 350 } 351