• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2024 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 //! A crate for finding license files in crates that satisfy their SPDX license expressions.
16 
17 use std::{
18     collections::{BTreeMap, BTreeSet},
19     path::{Path, PathBuf},
20 };
21 
22 use file_classifier::Classifier;
23 use license_data::{CrateLicenseSpecialCase, License};
24 use license_terms::LicenseTerms;
25 use licenses::LICENSE_DATA;
26 use parsed_license_data::{CrateLicenseSpecialCases, ParsedLicense};
27 use spdx::LicenseReq;
28 
29 mod file_classifier;
30 mod license_data;
31 mod license_data_tests;
32 mod license_file_finder;
33 mod license_terms;
34 mod licenses;
35 mod parsed_license_data;
36 mod util;
37 
38 /// Error types for the 'license_checker' crate.
39 #[derive(thiserror::Error, Debug)]
40 pub enum Error {
41     /// Couldn't convert filesystem path to a string for globbing.
42     #[error("Couldn't convert filesystem path {0} to a string for globbing.")]
43     PathToString(PathBuf),
44     /// Glob error
45     #[error(transparent)]
46     GlobError(#[from] glob::GlobError),
47     /// Glob pattern error
48     #[error(transparent)]
49     PatternError(#[from] glob::PatternError),
50     /// Error stripping prefix from path
51     #[error(transparent)]
52     StripPrefixError(#[from] std::path::StripPrefixError),
53     /// License expression special case doesn't match what's in Cargo.toml
54     #[error("Found a license expression special case for crate {crate_name} but the Cargo.toml license field doesn't match. Expected '{expected_license}', found '{cargo_toml_license}'")]
55     LicenseExpressionSpecialCaseMismatch {
56         /// The name of the crate
57         crate_name: String,
58         /// The expected license expression in special case
59         expected_license: String,
60         /// The actual license expression in Cargo.toml
61         cargo_toml_license: String,
62     },
63     /// The crate doesn't have a license field in Cargo.toml, and no special case was found for this crate
64     #[error("Crate {0} doesn't have a license field in Cargo.toml, and no special case was found for this crate")]
65     MissingLicenseField(String),
66     /// Error parsing SPDX license expression
67     #[error(transparent)]
68     LicenseParseError(#[from] spdx::ParseError),
69     /// Error minimizing SPDX expression
70     #[error(transparent)]
71     MinimizeError(#[from] spdx::expression::MinimizeError),
72     /// Failed to read file
73     #[error("Failed to read {0}: {1}")]
74     FileReadError(PathBuf, std::io::Error),
75     /// The set of known licenses is empty.
76     #[error("The set of known licenses is empty")]
77     NoLicenses,
78     /// License with neither text nor file names.
79     #[error("License {0} has neither text nor file names")]
80     LicenseWithoutTextOrFileNames(String),
81     /// Duplicate license
82     #[error("Duplicate license {0}")]
83     DuplicateLicense(String),
84     /// Duplicate crate license special case
85     #[error("Duplicate license special case for crate {0}")]
86     DuplicateCrateLicenseSpecialCase(String),
87     /// The license text is empty.
88     #[error("The license text for {0} is empty")]
89     EmptyLicenseText(String),
90     /// License text is ambiguous because it is a substring of another license text.
91     #[error("The license text for {0} is a substring of the license text for {0}")]
92     AmbiguousLicenseText(String, String),
93     /// Duplicate license file name
94     #[error("The file name {file_name} matches multiple licenses: {license} and {other_license}")]
95     DuplicateLicenseFileName {
96         /// The name of the license file.
97         file_name: String,
98         /// The license associated with the filename.
99         license: String,
100         /// The other license that is also associated with the same filename.
101         other_license: String,
102     },
103     /// License file name not findable by any known license file glob patterns.
104     #[error(
105         "The license file name {0} for {1} is not findable by any known license file glob patterns"
106     )]
107     LicenseFileNotFindable(String, String),
108     /// Inexact license file name not findable by any known license file glob patterns.
109     #[error(
110         "The inexact license file name {0} is not findable by any known license file glob patterns"
111     )]
112     InexactLicenseFileNotFindable(String),
113     /// The list of license preferences contains an unknown license
114     #[error("The license preference list contains unknown license {0}")]
115     LicensePreferenceForUnknownLicense(String),
116 }
117 
118 /// The result of license file verification, containing a set of acceptable licenses, and the
119 /// corresponding license files, if present.
120 #[derive(Debug)]
121 pub struct LicenseState {
122     /// Unsatisfied licenses. These are licenses that are required by evaluation of SPDX license in
123     /// Cargo.toml, but for which no matching license file was found.
124     pub unsatisfied: BTreeSet<LicenseReq>,
125     /// Licenses for which a license file file was found, and the path to that file.
126     pub satisfied: BTreeMap<LicenseReq, PathBuf>,
127     /// License files which are unneeded. That is, they are for license terms we are not
128     /// required to follow, such as LICENSE-MIT in the case of "Apache-2.0 OR MIT".
129     pub unneeded: BTreeMap<LicenseReq, PathBuf>,
130     /// Unexpected license files. They don't correspond to any terms in Cargo.toml, and
131     /// indicate that the stated license terms may be incorrect.
132     pub unexpected: BTreeMap<LicenseReq, PathBuf>,
133 }
134 
135 impl LicenseState {
from(terms: LicenseTerms, file_classifiers: &Vec<Classifier>) -> LicenseState136     fn from(terms: LicenseTerms, file_classifiers: &Vec<Classifier>) -> LicenseState {
137         let mut state = LicenseState {
138             unsatisfied: terms.required,
139             satisfied: BTreeMap::new(),
140             unneeded: BTreeMap::new(),
141             unexpected: BTreeMap::new(),
142         };
143         let not_required = terms.not_required;
144 
145         for classifier in file_classifiers {
146             if let Some(licensee) = classifier.by_name() {
147                 let req = licensee.clone().into_req();
148                 if state.unsatisfied.remove(&req) {
149                     state.satisfied.insert(req.clone(), classifier.file_path().to_owned());
150                 } else if !state.satisfied.contains_key(&req) {
151                     if not_required.contains(&req) {
152                         state.unneeded.insert(req.clone(), classifier.file_path().to_owned());
153                     } else {
154                         state.unexpected.insert(req.clone(), classifier.file_path().to_owned());
155                     }
156                 }
157             }
158         }
159 
160         if !state.unsatisfied.is_empty() {
161             for classifier in file_classifiers {
162                 for licensee in classifier.by_content() {
163                     let req = licensee.clone().into_req();
164                     if state.unsatisfied.remove(&req) {
165                         state.satisfied.insert(req.clone(), classifier.file_path().to_owned());
166                     } else if !state.satisfied.contains_key(&req) && !not_required.contains(&req) {
167                         state.unexpected.insert(req.clone(), classifier.file_path().to_owned());
168                     }
169                 }
170                 if classifier.by_content().len() == 1 {
171                     let req = classifier.by_content().first().unwrap().clone().into_req();
172                     if !state.satisfied.contains_key(&req) && not_required.contains(&req) {
173                         state.unneeded.insert(req.clone(), classifier.file_path().to_owned());
174                     }
175                 }
176             }
177         }
178 
179         if !state.unsatisfied.is_empty() {
180             for classifier in file_classifiers {
181                 if classifier.by_name().is_some() || !classifier.by_content().is_empty() {
182                     continue;
183                 }
184                 if let Some(licensee) = classifier.by_content_fuzzy() {
185                     let req = licensee.clone().into_req();
186                     if state.unsatisfied.remove(&req) {
187                         state.satisfied.insert(req.clone(), classifier.file_path().to_owned());
188                         if state.unsatisfied.is_empty() {
189                             break;
190                         }
191                     }
192                 }
193             }
194         }
195 
196         state
197     }
198 }
199 
200 /// Evaluates the license expression for a crate at a given path and returns a minimal set of
201 /// acceptable licenses, and whether we could find a matching license file for each one.
202 ///
203 /// Returns an error if the licensing for the crate requires us to adopt unacceptable licenses.
find_licenses( crate_path: impl AsRef<Path>, crate_name: &str, cargo_toml_license: Option<&str>, ) -> Result<LicenseState, Error>204 pub fn find_licenses(
205     crate_path: impl AsRef<Path>,
206     crate_name: &str,
207     cargo_toml_license: Option<&str>,
208 ) -> Result<LicenseState, Error> {
209     let crate_path = crate_path.as_ref();
210 
211     let terms = LICENSE_DATA.evaluate_crate_license(crate_name, cargo_toml_license)?;
212     Ok(LicenseState::from(
213         terms,
214         &Classifier::new_vec(
215             crate_path.to_owned(),
216             license_file_finder::find_license_files(crate_path)?,
217         )?,
218     ))
219 }
220 
221 #[cfg(test)]
222 mod tests {
223     use super::*;
224 
225     mod license_req {
226         use spdx::{LicenseReq, Licensee};
227 
apache() -> LicenseReq228         pub(super) fn apache() -> LicenseReq {
229             Licensee::parse("Apache-2.0").unwrap().into_req()
230         }
mit() -> LicenseReq231         pub(super) fn mit() -> LicenseReq {
232             Licensee::parse("MIT").unwrap().into_req()
233         }
234     }
235 
236     mod license_terms {
237         use crate::{LicenseTerms, LICENSE_DATA};
238 
apache() -> LicenseTerms239         pub(super) fn apache() -> LicenseTerms {
240             LICENSE_DATA.evaluate_crate_license("foo", Some("Apache-2.0")).unwrap()
241         }
apache_or_mit() -> LicenseTerms242         pub(super) fn apache_or_mit() -> LicenseTerms {
243             LICENSE_DATA.evaluate_crate_license("foo", Some("Apache-2.0 OR MIT")).unwrap()
244         }
apache_or_bsd() -> LicenseTerms245         pub(super) fn apache_or_bsd() -> LicenseTerms {
246             LICENSE_DATA.evaluate_crate_license("foo", Some("Apache-2.0 OR BSD-3-Clause")).unwrap()
247         }
248     }
249 
250     mod classifiers {
251         use itertools::Itertools;
252 
253         use crate::Classifier;
254 
apache_by_name() -> Classifier255         pub(super) fn apache_by_name() -> Classifier {
256             Classifier::new("LICENSE-APACHE", "".to_string())
257         }
apache_by_content() -> Classifier258         pub(super) fn apache_by_content() -> Classifier {
259             Classifier::new("LICENSE", include_str!("licenses/Apache-2.0.txt").to_string())
260         }
unknown() -> Classifier261         pub(super) fn unknown() -> Classifier {
262             Classifier::new("LICENSE", "".to_string())
263         }
mit_by_name() -> Classifier264         pub(super) fn mit_by_name() -> Classifier {
265             Classifier::new("LICENSE-MIT", "".to_string())
266         }
apache_and_mit_concatenated() -> Classifier267         pub(super) fn apache_and_mit_concatenated() -> Classifier {
268             Classifier::new(
269                 "LICENSE",
270                 [include_str!("licenses/Apache-2.0.txt"), include_str!("licenses/MIT.txt")]
271                     .iter()
272                     .join("\n\n\n-----\n\n\n"),
273             )
274         }
bsd_fuzzy() -> Classifier275         pub(super) fn bsd_fuzzy() -> Classifier {
276             Classifier::new(
277                 "LICENSE",
278                 include_str!("testdata/BSD-3-Clause-bindgen.txt").to_string(),
279             )
280         }
bsd_inexact() -> Classifier281         pub(super) fn bsd_inexact() -> Classifier {
282             Classifier::new("LICENSE-BSD", "".to_string())
283         }
284     }
285 
286     #[test]
basic()287     fn basic() {
288         let state = LicenseState::from(
289             license_terms::apache_or_mit(),
290             &vec![classifiers::apache_by_name(), classifiers::mit_by_name()],
291         );
292         assert_eq!(
293             state.satisfied,
294             BTreeMap::from([(
295                 license_req::apache(),
296                 classifiers::apache_by_name().file_path().to_owned()
297             )])
298         );
299         assert!(state.unsatisfied.is_empty());
300         assert_eq!(
301             state.unneeded,
302             BTreeMap::from([(
303                 license_req::mit(),
304                 classifiers::mit_by_name().file_path().to_owned()
305             )])
306         );
307         assert!(state.unexpected.is_empty());
308     }
309 
310     #[test]
unsatisfied()311     fn unsatisfied() {
312         let state = LicenseState::from(license_terms::apache_or_mit(), &vec![]);
313         assert!(state.satisfied.is_empty());
314         assert_eq!(state.unsatisfied, BTreeSet::from([license_req::apache()]));
315         assert!(state.unneeded.is_empty());
316         assert!(state.unexpected.is_empty());
317     }
318 
319     #[test]
unexpected()320     fn unexpected() {
321         let state = LicenseState::from(
322             license_terms::apache(),
323             &vec![classifiers::apache_by_name(), classifiers::mit_by_name()],
324         );
325         assert_eq!(
326             state.satisfied,
327             BTreeMap::from([(
328                 license_req::apache(),
329                 classifiers::apache_by_name().file_path().to_owned()
330             )])
331         );
332         assert!(state.unsatisfied.is_empty());
333         assert!(state.unneeded.is_empty());
334         assert_eq!(
335             state.unexpected,
336             BTreeMap::from([(
337                 license_req::mit(),
338                 classifiers::mit_by_name().file_path().to_owned()
339             )])
340         );
341     }
342 
343     #[test]
name_preferred_to_content()344     fn name_preferred_to_content() {
345         let state = LicenseState::from(
346             license_terms::apache(),
347             &vec![classifiers::apache_by_content(), classifiers::apache_by_name()],
348         );
349         assert_eq!(
350             state.satisfied,
351             BTreeMap::from([(
352                 license_req::apache(),
353                 classifiers::apache_by_name().file_path().to_owned()
354             )])
355         );
356         assert!(state.unsatisfied.is_empty());
357         assert!(state.unneeded.is_empty());
358         assert!(state.unexpected.is_empty());
359     }
360 
361     #[test]
unknown_files_not_reported()362     fn unknown_files_not_reported() {
363         let state = LicenseState::from(
364             license_terms::apache(),
365             &vec![classifiers::apache_by_name(), classifiers::unknown()],
366         );
367         assert_eq!(
368             state.satisfied,
369             BTreeMap::from([(
370                 license_req::apache(),
371                 classifiers::apache_by_name().file_path().to_owned()
372             )])
373         );
374         assert!(state.unsatisfied.is_empty());
375         assert!(state.unneeded.is_empty());
376         assert!(state.unexpected.is_empty());
377     }
378 
379     #[test]
concatenated_licenses_not_reported_as_unexpected()380     fn concatenated_licenses_not_reported_as_unexpected() {
381         let state = LicenseState::from(
382             license_terms::apache_or_mit(),
383             &vec![classifiers::apache_and_mit_concatenated()],
384         );
385         assert_eq!(
386             state.satisfied,
387             BTreeMap::from([(
388                 license_req::apache(),
389                 classifiers::apache_and_mit_concatenated().file_path().to_owned()
390             )])
391         );
392         assert!(state.unsatisfied.is_empty());
393         assert!(state.unneeded.is_empty());
394         assert!(state.unexpected.is_empty());
395     }
396 
397     #[test]
fuzzy_classifications_not_reported_as_unneeded_or_unexpected()398     fn fuzzy_classifications_not_reported_as_unneeded_or_unexpected() {
399         let state = LicenseState::from(
400             license_terms::apache_or_bsd(),
401             &vec![classifiers::apache_by_name(), classifiers::bsd_fuzzy()],
402         );
403         assert_eq!(
404             state.satisfied,
405             BTreeMap::from([(
406                 license_req::apache(),
407                 classifiers::apache_by_name().file_path().to_owned()
408             )])
409         );
410         assert!(state.unsatisfied.is_empty());
411         assert!(state.unneeded.is_empty());
412         assert!(state.unexpected.is_empty());
413 
414         let state = LicenseState::from(
415             license_terms::apache(),
416             &vec![classifiers::apache_by_name(), classifiers::bsd_fuzzy()],
417         );
418         assert_eq!(
419             state.satisfied,
420             BTreeMap::from([(
421                 license_req::apache(),
422                 classifiers::apache_by_name().file_path().to_owned()
423             )])
424         );
425         assert!(state.unsatisfied.is_empty());
426         assert!(state.unneeded.is_empty());
427         assert!(state.unexpected.is_empty());
428     }
429 
430     #[test]
inexact_names_reported_as_unneeded_and_unexpected()431     fn inexact_names_reported_as_unneeded_and_unexpected() {
432         let state = LicenseState::from(
433             license_terms::apache_or_bsd(),
434             &vec![classifiers::apache_by_name(), classifiers::bsd_inexact()],
435         );
436         assert_eq!(
437             state.satisfied,
438             BTreeMap::from([(
439                 license_req::apache(),
440                 classifiers::apache_by_name().file_path().to_owned()
441             )])
442         );
443         assert!(state.unsatisfied.is_empty());
444         assert!(state.unneeded.is_empty());
445         assert!(state.unexpected.is_empty());
446 
447         let state = LicenseState::from(
448             license_terms::apache(),
449             &vec![classifiers::apache_by_name(), classifiers::bsd_fuzzy()],
450         );
451         assert_eq!(
452             state.satisfied,
453             BTreeMap::from([(
454                 license_req::apache(),
455                 classifiers::apache_by_name().file_path().to_owned()
456             )])
457         );
458         assert!(state.unsatisfied.is_empty());
459         assert!(state.unneeded.is_empty());
460         assert!(state.unexpected.is_empty());
461     }
462 }
463