• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2025 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 use std::{
16     collections::{BTreeMap, BTreeSet},
17     ffi::OsString,
18     path::Path,
19     sync::LazyLock,
20 };
21 
22 use itertools::Itertools;
23 use spdx::Licensee;
24 use textdistance::str::ratcliff_obershelp;
25 
26 use crate::{
27     license_data::{CRATE_LICENSE_SPECIAL_CASES, LICENSES, LICENSE_PREFERENCE},
28     util::{normalize_filename, strip_punctuation},
29     CrateLicenseSpecialCase, CrateLicenseSpecialCases, Error, License, LicenseTerms, ParsedLicense,
30 };
31 
32 #[derive(Debug)]
33 pub(crate) struct Licenses {
34     licenses: BTreeMap<Licensee, ParsedLicense>,
35     license_preference: Vec<Licensee>,
36     crate_license_special_cases: CrateLicenseSpecialCases,
37     license_file_names: BTreeMap<OsString, Licensee>,
38 }
39 
40 impl Licenses {
new( raw_licenses: &'static [License], license_preference: &[&str], crate_license_special_cases: &'static [CrateLicenseSpecialCase], ) -> Result<Licenses, Error>41     fn new(
42         raw_licenses: &'static [License],
43         license_preference: &[&str],
44         crate_license_special_cases: &'static [CrateLicenseSpecialCase],
45     ) -> Result<Licenses, Error> {
46         if raw_licenses.is_empty() {
47             return Err(Error::NoLicenses);
48         }
49 
50         let mut licenses = BTreeMap::new();
51         let mut license_file_names = BTreeMap::new();
52         for license in raw_licenses {
53             let parsed = ParsedLicense::try_from(license)?;
54             let licensee = parsed.licensee().clone();
55             for file_name in parsed.file_names() {
56                 if let Some(other) = license_file_names.insert(file_name.clone(), licensee.clone())
57                 {
58                     return Err(Error::DuplicateLicenseFileName {
59                         file_name: file_name.to_string_lossy().into_owned(),
60                         license: parsed.licensee().to_string(),
61                         other_license: other.to_string(),
62                     });
63                 }
64             }
65             if licenses.insert(licensee.clone(), parsed).is_some() {
66                 return Err(Error::DuplicateLicense(licensee.to_string()));
67             }
68         }
69 
70         let mut ranked_licenses = Vec::new();
71         for pref in license_preference {
72             let licensee = Licensee::parse(pref)?;
73             if !licenses.contains_key(&licensee) {
74                 return Err(Error::LicensePreferenceForUnknownLicense(pref.to_string()));
75             }
76             ranked_licenses.push(licensee);
77         }
78         let unranked_licenses = licenses
79             .keys()
80             .filter_map(|l| if !ranked_licenses.contains(l) { Some(l.clone()) } else { None })
81             .collect::<Vec<_>>();
82         let license_preference = ranked_licenses.into_iter().chain(unranked_licenses).collect();
83 
84         let licenses = Licenses {
85             licenses,
86             license_preference,
87             crate_license_special_cases: crate_license_special_cases.try_into()?,
88             license_file_names,
89         };
90         licenses.validate()?;
91         Ok(licenses)
92     }
93 
validate(&self) -> Result<(), Error>94     fn validate(&self) -> Result<(), Error> {
95         for (licensee, license) in &self.licenses {
96             // The license text can't be a substring of any other license text.
97             for (other_licensee, other_license) in &self.licenses {
98                 if licensee != other_licensee
99                     && license
100                         .processed_text()
101                         .is_some_and(|text| other_license.is_substring_of(text))
102                 {
103                     return Err(Error::AmbiguousLicenseText(
104                         other_license.licensee().to_string(),
105                         license.licensee().to_string(),
106                     ));
107                 }
108             }
109         }
110 
111         Ok(())
112     }
113 
114     /// Evaluate the SPDX license expression from Cargo.toml for a given crate.
115     /// Slashes such as "MIT/Apache-2.0" are interpreted as OR.
116     /// A limited set of exceptions are applied for crates where the license terms are
117     /// known to be missing or incorrect.
evaluate_crate_license( &self, crate_name: &str, cargo_toml_license: Option<&str>, ) -> Result<LicenseTerms, Error>118     pub fn evaluate_crate_license(
119         &self,
120         crate_name: &str,
121         cargo_toml_license: Option<&str>,
122     ) -> Result<LicenseTerms, Error> {
123         LicenseTerms::try_from(
124             self.crate_license_special_cases
125                 .get_corrected_license(crate_name, cargo_toml_license)?,
126             &self.license_preference,
127         )
128     }
129 
classify_file_name(&self, file: impl AsRef<Path>) -> Option<&Licensee>130     pub fn classify_file_name(&self, file: impl AsRef<Path>) -> Option<&Licensee> {
131         self.license_file_names.get(&normalize_filename(file))
132     }
133 
134     /// Classify file contents by exact substring match on the license text.
classify_file_contents(&self, contents: &str) -> BTreeSet<Licensee>135     pub fn classify_file_contents(&self, contents: &str) -> BTreeSet<Licensee> {
136         let contents = strip_punctuation(contents);
137 
138         let mut matches = BTreeSet::new();
139         for license in self.licenses.values() {
140             if license.is_substring_of(contents.as_str()) {
141                 matches.insert(license.licensee().clone());
142             }
143         }
144         matches
145     }
146 
classify_file_contents_fuzzy(&self, contents: &str) -> Option<Licensee>147     pub fn classify_file_contents_fuzzy(&self, contents: &str) -> Option<Licensee> {
148         let contents = strip_punctuation(contents);
149 
150         // Fuzzy match. This is expensive, so start with licenses that are closest in length to the file,
151         // and only return a single match at most.
152         for license in
153             self.licenses.values().filter(|l| l.processed_text().is_some()).sorted_by(|a, b| {
154                 let mut ra = a.processed_text().unwrap().len() as f32 / contents.len() as f32;
155                 let mut rb = b.processed_text().unwrap().len() as f32 / contents.len() as f32;
156                 if ra > 1.0 {
157                     ra = 1.0 / ra;
158                 }
159                 if rb > 1.0 {
160                     rb = 1.0 / rb;
161                 }
162                 rb.partial_cmp(&ra).unwrap()
163             })
164         {
165             if let Some(processed_text) = license.processed_text() {
166                 let similarity = ratcliff_obershelp(contents.as_str(), processed_text);
167                 if similarity > 0.95 {
168                     return Some(license.licensee().clone());
169                 }
170             }
171         }
172 
173         None
174     }
175 }
176 
177 pub(crate) static LICENSE_DATA: LazyLock<Licenses> = LazyLock::new(|| {
178     Licenses::new(LICENSES, LICENSE_PREFERENCE, CRATE_LICENSE_SPECIAL_CASES).unwrap()
179 });
180 
181 #[cfg(test)]
182 mod tests {
183     use std::collections::BTreeSet;
184 
185     use super::*;
186 
187     #[test]
static_data_sanity_test()188     fn static_data_sanity_test() {
189         assert_eq!(LICENSES.len(), LICENSE_DATA.licenses.len());
190         assert_eq!(LICENSE_DATA.license_preference.len(), LICENSE_DATA.licenses.len());
191     }
192 
193     #[test]
basic()194     fn basic() {
195         assert!(Licenses::new(
196             &[
197                 License { name: "Apache-2.0", text: None, file_names: &["LICENSE-APACHE"] },
198                 License { name: "MIT", text: None, file_names: &["LICENSE-MIT"] },
199                 License { name: "BSD-3-Clause", text: None, file_names: &["LICENSE-BSD-3-Clause"] },
200             ],
201             &["Apache-2.0", "MIT"],
202             &[],
203         )
204         .is_ok());
205     }
206 
207     #[test]
no_licenses()208     fn no_licenses() {
209         assert!(matches!(Licenses::new(&[], &[], &[]), Err(Error::NoLicenses)));
210     }
211 
212     #[test]
duplicate_license()213     fn duplicate_license() {
214         assert!(matches!(
215             Licenses::new(
216                 &[
217                     License { name: "MIT", text: None, file_names: &["LICENSE-foo"] },
218                     License { name: "MIT", text: None, file_names: &["LICENSE-bar"] }
219                 ],
220                 &[],
221                 &[],
222             ),
223             Err(Error::DuplicateLicense(_))
224         ));
225     }
226 
227     #[test]
license_text_substrings()228     fn license_text_substrings() {
229         assert!(matches!(
230             Licenses::new(
231                 &[
232                     License { name: "Apache-2.0", text: Some("foo"), file_names: &[] },
233                     License { name: "MIT", text: Some("foobar"), file_names: &[] }
234                 ],
235                 &[],
236                 &[],
237             ),
238             Err(Error::AmbiguousLicenseText(_, _,))
239         ));
240     }
241 
242     #[test]
duplicate_license_file_names()243     fn duplicate_license_file_names() {
244         assert!(matches!(
245             Licenses::new(
246                 &[
247                     License { name: "Apache-2.0", text: None, file_names: &["LICENSE"] },
248                     License { name: "MIT", text: None, file_names: &["LICENSE"] }
249                 ],
250                 &[],
251                 &[],
252             ),
253             Err(Error::DuplicateLicenseFileName { file_name: _, license: _, other_license: _ })
254         ));
255     }
256 
257     #[test]
unfindable_license_file()258     fn unfindable_license_file() {
259         assert!(matches!(
260             Licenses::new(&[License { name: "MIT", text: None, file_names: &["foo"] },], &[], &[],),
261             Err(Error::LicenseFileNotFindable(_, _))
262         ));
263     }
264 
265     #[test]
preference_for_unknown_license()266     fn preference_for_unknown_license() {
267         assert!(matches!(
268             Licenses::new(
269                 &[License { name: "MIT", text: None, file_names: &["LICENSE-MIT"] }],
270                 &["foo"],
271                 &[],
272             ),
273             Err(Error::LicenseParseError(_))
274         ));
275         assert!(matches!(
276             Licenses::new(
277                 &[License { name: "MIT", text: None, file_names: &["LICENSE-MIT"] }],
278                 &["Apache-2.0"],
279                 &[],
280             ),
281             Err(Error::LicensePreferenceForUnknownLicense(_))
282         ));
283     }
284 
285     #[test]
evaluate_crate_license()286     fn evaluate_crate_license() {
287         let licenses = Licenses::new(
288             &[
289                 License { name: "Apache-2.0", text: None, file_names: &["LICENSE-APACHE"] },
290                 License { name: "MIT", text: None, file_names: &["LICENSE-MIT"] },
291             ],
292             &["Apache-2.0", "MIT"],
293             &[],
294         )
295         .unwrap();
296         assert_eq!(
297             licenses.evaluate_crate_license("foo", Some("Apache-2.0 OR MIT")).unwrap(),
298             LicenseTerms {
299                 required: BTreeSet::from([Licensee::parse("Apache-2.0").unwrap().into_req()]),
300                 not_required: BTreeSet::from([Licensee::parse("MIT").unwrap().into_req()])
301             }
302         );
303         assert!(
304             matches!(
305                 licenses.evaluate_crate_license("foo", Some("BSD-3-Clause")),
306                 Err(Error::MinimizeError(_))
307             ),
308             "Unknown license"
309         );
310         assert!(
311             matches!(
312                 licenses.evaluate_crate_license("foo", None),
313                 Err(Error::MissingLicenseField(_))
314             ),
315             "No license and no special case"
316         );
317     }
318 }
319