1 // Copyright (C) 2025 The Android Open Source Project 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 use std::{ 16 collections::{BTreeMap, BTreeSet}, 17 ffi::{OsStr, OsString}, 18 }; 19 20 use crate::{ 21 license_file_finder::is_findable, util::strip_punctuation, CrateLicenseSpecialCase, Error, 22 License, 23 }; 24 25 #[derive(Debug)] 26 pub(crate) struct ParsedLicense { 27 // The SPDX identifier, e.g. "Apache-2.0" 28 licensee: spdx::Licensee, 29 // The processed text of the license (lowercased, punctuation stripped, etc.) used for matching. 30 processed_text: Option<String>, 31 // A set of file names, any of which unambiguously identify the license. 32 file_names: BTreeSet<OsString>, 33 } 34 35 impl ParsedLicense { licensee(&self) -> &spdx::Licensee36 pub fn licensee(&self) -> &spdx::Licensee { 37 &self.licensee 38 } processed_text(&self) -> Option<&str>39 pub fn processed_text(&self) -> Option<&str> { 40 self.processed_text.as_deref() 41 } file_names(&self) -> &BTreeSet<OsString>42 pub fn file_names(&self) -> &BTreeSet<OsString> { 43 &self.file_names 44 } is_substring_of(&self, other: &str) -> bool45 pub fn is_substring_of(&self, other: &str) -> bool { 46 self.processed_text.as_ref().is_some_and(|text| other.contains(text.as_str())) 47 } 48 } 49 50 impl TryFrom<&License> for ParsedLicense { 51 type Error = crate::Error; 52 try_from(value: &License) -> Result<Self, Self::Error>53 fn try_from(value: &License) -> Result<Self, Self::Error> { 54 if value.text.is_none() && value.file_names.is_empty() { 55 return Err(Error::LicenseWithoutTextOrFileNames(value.name.to_string())); 56 } 57 58 let processed_text = value.text.map(strip_punctuation); 59 if processed_text.as_ref().is_some_and(String::is_empty) { 60 return Err(Error::EmptyLicenseText(value.name.to_string())); 61 } 62 63 let mut file_names = BTreeSet::new(); 64 for license_file in value.file_names { 65 if !is_findable(OsStr::new(license_file)) { 66 return Err(Error::LicenseFileNotFindable( 67 license_file.to_string(), 68 value.name.to_string(), 69 )); 70 } 71 file_names.insert(OsString::from(license_file.to_uppercase())); 72 } 73 74 Ok(ParsedLicense { 75 licensee: spdx::Licensee::parse(value.name)?, 76 processed_text: value.text.map(strip_punctuation), 77 file_names, 78 }) 79 } 80 } 81 82 #[derive(Debug)] 83 pub(crate) struct ParsedCrateLicenseSpecialCase { 84 // The name of the crate. 85 crate_name: &'static str, 86 // The incorrect or missing license expression in Cargo.toml 87 cargo_toml_license: Option<&'static str>, 88 // The corrected license expression. 89 corrected_license_expr: spdx::Expression, 90 } 91 92 impl TryFrom<&CrateLicenseSpecialCase> for ParsedCrateLicenseSpecialCase { 93 type Error = crate::Error; 94 try_from(value: &CrateLicenseSpecialCase) -> Result<Self, Self::Error>95 fn try_from(value: &CrateLicenseSpecialCase) -> Result<Self, Self::Error> { 96 Ok(ParsedCrateLicenseSpecialCase { 97 crate_name: value.crate_name, 98 cargo_toml_license: value.cargo_toml_license, 99 corrected_license_expr: spdx::Expression::parse(value.corrected_license_expr)?, 100 }) 101 } 102 } 103 104 #[derive(Debug)] 105 pub(crate) struct CrateLicenseSpecialCases { 106 special_cases: BTreeMap<&'static str, ParsedCrateLicenseSpecialCase>, 107 } 108 109 impl CrateLicenseSpecialCases { get_corrected_license( &self, crate_name: &str, cargo_toml_license: Option<&str>, ) -> Result<spdx::Expression, Error>110 pub fn get_corrected_license( 111 &self, 112 crate_name: &str, 113 cargo_toml_license: Option<&str>, 114 ) -> Result<spdx::Expression, Error> { 115 // Check special cases. 116 if let Some(special_case) = self.special_cases.get(crate_name) { 117 if special_case.cargo_toml_license != cargo_toml_license { 118 return Err(Error::LicenseExpressionSpecialCaseMismatch { 119 crate_name: crate_name.to_string(), 120 expected_license: special_case 121 .cargo_toml_license 122 .unwrap_or("<None>") 123 .to_string(), 124 cargo_toml_license: cargo_toml_license.unwrap_or("<None>").to_string(), 125 }); 126 } 127 return Ok(special_case.corrected_license_expr.clone()); 128 } 129 // Default. Look at the license field in Cargo.toml, and treat '/' as OR. 130 if let Some(lic) = cargo_toml_license { 131 Ok(spdx::Expression::parse(&lic.replace('/', " OR "))?) 132 } else { 133 Err(Error::MissingLicenseField(crate_name.to_string())) 134 } 135 } 136 } 137 138 impl TryFrom<&[CrateLicenseSpecialCase]> for CrateLicenseSpecialCases { 139 type Error = crate::Error; 140 try_from(value: &[CrateLicenseSpecialCase]) -> Result<Self, Self::Error>141 fn try_from(value: &[CrateLicenseSpecialCase]) -> Result<Self, Self::Error> { 142 // BTreeMap::from() doesn't care about duplicate keys, but having multiple special cases 143 // for the same crate is an error. 144 let mut special_cases = BTreeMap::new(); 145 for special_case in value { 146 let parsed_special_case = ParsedCrateLicenseSpecialCase::try_from(special_case)?; 147 if special_cases.insert(parsed_special_case.crate_name, parsed_special_case).is_some() { 148 return Err(Error::DuplicateCrateLicenseSpecialCase( 149 special_case.crate_name.to_string(), 150 )); 151 } 152 } 153 154 Ok(CrateLicenseSpecialCases { special_cases }) 155 } 156 } 157 158 #[cfg(test)] 159 mod parsed_license_tests { 160 use super::*; 161 162 #[test] invalid_license_name()163 fn invalid_license_name() { 164 assert!(matches!( 165 ParsedLicense::try_from(&License { name: "foo", text: None, file_names: &["LICENSE"] }), 166 Err(Error::LicenseParseError(_)) 167 )); 168 } 169 170 #[test] missing_both_text_and_file_name()171 fn missing_both_text_and_file_name() { 172 assert!(matches!( 173 ParsedLicense::try_from(&License { name: "MIT", text: None, file_names: &[] }), 174 Err(Error::LicenseWithoutTextOrFileNames(_)) 175 )); 176 } 177 178 #[test] empty_license_text()179 fn empty_license_text() { 180 assert!(matches!( 181 ParsedLicense::try_from(&License { 182 name: "MIT", 183 text: Some(" "), 184 file_names: &["LICENSE-MIT"] 185 }), 186 Err(Error::EmptyLicenseText(_)) 187 )); 188 } 189 190 #[test] is_substring_of()191 fn is_substring_of() { 192 let license: ParsedLicense = ParsedLicense::try_from(&License { 193 name: "MIT", 194 text: Some("foo"), 195 file_names: &["LICENSE-MIT"], 196 }) 197 .unwrap(); 198 assert!(license.is_substring_of("foobar")); 199 assert!(!license.is_substring_of("asdf")); 200 let license: ParsedLicense = ParsedLicense::try_from(&License { 201 name: "MIT", 202 text: None, 203 file_names: &["LICENSE-MIT"], 204 }) 205 .unwrap(); 206 assert!( 207 !license.is_substring_of("blah"), 208 "Missing license text is not a substring of anything" 209 ); 210 } 211 } 212 213 #[cfg(test)] 214 mod crate_license_special_case_tests { 215 use super::*; 216 217 #[test] unparseable_special_case()218 fn unparseable_special_case() { 219 assert!(matches!( 220 ParsedCrateLicenseSpecialCase::try_from(&CrateLicenseSpecialCase { 221 crate_name: "foo", 222 cargo_toml_license: None, 223 corrected_license_expr: "foo" 224 }), 225 Err(Error::LicenseParseError(_)) 226 )) 227 } 228 229 #[test] duplicate_special_cases()230 fn duplicate_special_cases() { 231 assert!(matches!( 232 CrateLicenseSpecialCases::try_from( 233 [ 234 CrateLicenseSpecialCase { 235 crate_name: "foo", 236 cargo_toml_license: None, 237 corrected_license_expr: "MIT" 238 }, 239 CrateLicenseSpecialCase { 240 crate_name: "foo", 241 cargo_toml_license: None, 242 corrected_license_expr: "MIT" 243 } 244 ] 245 .as_slice() 246 ), 247 Err(Error::DuplicateCrateLicenseSpecialCase(_)) 248 )); 249 } 250 251 #[test] get_corrected_license_expr()252 fn get_corrected_license_expr() { 253 let special_cases = CrateLicenseSpecialCases::try_from( 254 [CrateLicenseSpecialCase { 255 crate_name: "foo", 256 cargo_toml_license: None, 257 corrected_license_expr: "MIT", 258 }] 259 .as_slice(), 260 ) 261 .unwrap(); 262 assert_eq!( 263 special_cases.get_corrected_license("bar", Some("Apache-2.0")).unwrap(), 264 spdx::Expression::parse("Apache-2.0").unwrap(), 265 "No special case" 266 ); 267 assert_eq!( 268 special_cases.get_corrected_license("foo", None).unwrap(), 269 spdx::Expression::parse("MIT").unwrap(), 270 "Special case match" 271 ); 272 assert!( 273 matches!( 274 special_cases.get_corrected_license("foo", Some("MIT")), 275 Err(Error::LicenseExpressionSpecialCaseMismatch { 276 crate_name: _, 277 expected_license: _, 278 cargo_toml_license: _ 279 }) 280 ), 281 "Special case mismatch" 282 ); 283 assert_eq!( 284 special_cases.get_corrected_license("bar", Some("Apache-2.0/MIT")).unwrap(), 285 spdx::Expression::parse("Apache-2.0 OR MIT").unwrap(), 286 "/ treated as OR" 287 ); 288 } 289 } 290