1 // Copyright (C) 2024 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 //! A crate for finding license files in crates that satisfy their SPDX license expressions.
16
17 use std::{
18 collections::{BTreeMap, BTreeSet},
19 path::{Path, PathBuf},
20 };
21
22 use file_classifier::Classifier;
23 use license_data::{CrateLicenseSpecialCase, License};
24 use license_terms::LicenseTerms;
25 use licenses::LICENSE_DATA;
26 use parsed_license_data::{CrateLicenseSpecialCases, ParsedLicense};
27 use spdx::LicenseReq;
28
29 mod file_classifier;
30 mod license_data;
31 mod license_data_tests;
32 mod license_file_finder;
33 mod license_terms;
34 mod licenses;
35 mod parsed_license_data;
36 mod util;
37
38 /// Error types for the 'license_checker' crate.
39 #[derive(thiserror::Error, Debug)]
40 pub enum Error {
41 /// Couldn't convert filesystem path to a string for globbing.
42 #[error("Couldn't convert filesystem path {0} to a string for globbing.")]
43 PathToString(PathBuf),
44 /// Glob error
45 #[error(transparent)]
46 GlobError(#[from] glob::GlobError),
47 /// Glob pattern error
48 #[error(transparent)]
49 PatternError(#[from] glob::PatternError),
50 /// Error stripping prefix from path
51 #[error(transparent)]
52 StripPrefixError(#[from] std::path::StripPrefixError),
53 /// License expression special case doesn't match what's in Cargo.toml
54 #[error("Found a license expression special case for crate {crate_name} but the Cargo.toml license field doesn't match. Expected '{expected_license}', found '{cargo_toml_license}'")]
55 LicenseExpressionSpecialCaseMismatch {
56 /// The name of the crate
57 crate_name: String,
58 /// The expected license expression in special case
59 expected_license: String,
60 /// The actual license expression in Cargo.toml
61 cargo_toml_license: String,
62 },
63 /// The crate doesn't have a license field in Cargo.toml, and no special case was found for this crate
64 #[error("Crate {0} doesn't have a license field in Cargo.toml, and no special case was found for this crate")]
65 MissingLicenseField(String),
66 /// Error parsing SPDX license expression
67 #[error(transparent)]
68 LicenseParseError(#[from] spdx::ParseError),
69 /// Error minimizing SPDX expression
70 #[error(transparent)]
71 MinimizeError(#[from] spdx::expression::MinimizeError),
72 /// Failed to read file
73 #[error("Failed to read {0}: {1}")]
74 FileReadError(PathBuf, std::io::Error),
75 /// The set of known licenses is empty.
76 #[error("The set of known licenses is empty")]
77 NoLicenses,
78 /// License with neither text nor file names.
79 #[error("License {0} has neither text nor file names")]
80 LicenseWithoutTextOrFileNames(String),
81 /// Duplicate license
82 #[error("Duplicate license {0}")]
83 DuplicateLicense(String),
84 /// Duplicate crate license special case
85 #[error("Duplicate license special case for crate {0}")]
86 DuplicateCrateLicenseSpecialCase(String),
87 /// The license text is empty.
88 #[error("The license text for {0} is empty")]
89 EmptyLicenseText(String),
90 /// License text is ambiguous because it is a substring of another license text.
91 #[error("The license text for {0} is a substring of the license text for {0}")]
92 AmbiguousLicenseText(String, String),
93 /// Duplicate license file name
94 #[error("The file name {file_name} matches multiple licenses: {license} and {other_license}")]
95 DuplicateLicenseFileName {
96 /// The name of the license file.
97 file_name: String,
98 /// The license associated with the filename.
99 license: String,
100 /// The other license that is also associated with the same filename.
101 other_license: String,
102 },
103 /// License file name not findable by any known license file glob patterns.
104 #[error(
105 "The license file name {0} for {1} is not findable by any known license file glob patterns"
106 )]
107 LicenseFileNotFindable(String, String),
108 /// Inexact license file name not findable by any known license file glob patterns.
109 #[error(
110 "The inexact license file name {0} is not findable by any known license file glob patterns"
111 )]
112 InexactLicenseFileNotFindable(String),
113 /// The list of license preferences contains an unknown license
114 #[error("The license preference list contains unknown license {0}")]
115 LicensePreferenceForUnknownLicense(String),
116 }
117
118 /// The result of license file verification, containing a set of acceptable licenses, and the
119 /// corresponding license files, if present.
120 #[derive(Debug)]
121 pub struct LicenseState {
122 /// Unsatisfied licenses. These are licenses that are required by evaluation of SPDX license in
123 /// Cargo.toml, but for which no matching license file was found.
124 pub unsatisfied: BTreeSet<LicenseReq>,
125 /// Licenses for which a license file file was found, and the path to that file.
126 pub satisfied: BTreeMap<LicenseReq, PathBuf>,
127 /// License files which are unneeded. That is, they are for license terms we are not
128 /// required to follow, such as LICENSE-MIT in the case of "Apache-2.0 OR MIT".
129 pub unneeded: BTreeMap<LicenseReq, PathBuf>,
130 /// Unexpected license files. They don't correspond to any terms in Cargo.toml, and
131 /// indicate that the stated license terms may be incorrect.
132 pub unexpected: BTreeMap<LicenseReq, PathBuf>,
133 }
134
135 impl LicenseState {
from(terms: LicenseTerms, file_classifiers: &Vec<Classifier>) -> LicenseState136 fn from(terms: LicenseTerms, file_classifiers: &Vec<Classifier>) -> LicenseState {
137 let mut state = LicenseState {
138 unsatisfied: terms.required,
139 satisfied: BTreeMap::new(),
140 unneeded: BTreeMap::new(),
141 unexpected: BTreeMap::new(),
142 };
143 let not_required = terms.not_required;
144
145 for classifier in file_classifiers {
146 if let Some(licensee) = classifier.by_name() {
147 let req = licensee.clone().into_req();
148 if state.unsatisfied.remove(&req) {
149 state.satisfied.insert(req.clone(), classifier.file_path().to_owned());
150 } else if !state.satisfied.contains_key(&req) {
151 if not_required.contains(&req) {
152 state.unneeded.insert(req.clone(), classifier.file_path().to_owned());
153 } else {
154 state.unexpected.insert(req.clone(), classifier.file_path().to_owned());
155 }
156 }
157 }
158 }
159
160 if !state.unsatisfied.is_empty() {
161 for classifier in file_classifiers {
162 for licensee in classifier.by_content() {
163 let req = licensee.clone().into_req();
164 if state.unsatisfied.remove(&req) {
165 state.satisfied.insert(req.clone(), classifier.file_path().to_owned());
166 } else if !state.satisfied.contains_key(&req) && !not_required.contains(&req) {
167 state.unexpected.insert(req.clone(), classifier.file_path().to_owned());
168 }
169 }
170 if classifier.by_content().len() == 1 {
171 let req = classifier.by_content().first().unwrap().clone().into_req();
172 if !state.satisfied.contains_key(&req) && not_required.contains(&req) {
173 state.unneeded.insert(req.clone(), classifier.file_path().to_owned());
174 }
175 }
176 }
177 }
178
179 if !state.unsatisfied.is_empty() {
180 for classifier in file_classifiers {
181 if classifier.by_name().is_some() || !classifier.by_content().is_empty() {
182 continue;
183 }
184 if let Some(licensee) = classifier.by_content_fuzzy() {
185 let req = licensee.clone().into_req();
186 if state.unsatisfied.remove(&req) {
187 state.satisfied.insert(req.clone(), classifier.file_path().to_owned());
188 if state.unsatisfied.is_empty() {
189 break;
190 }
191 }
192 }
193 }
194 }
195
196 state
197 }
198 }
199
200 /// Evaluates the license expression for a crate at a given path and returns a minimal set of
201 /// acceptable licenses, and whether we could find a matching license file for each one.
202 ///
203 /// Returns an error if the licensing for the crate requires us to adopt unacceptable licenses.
find_licenses( crate_path: impl AsRef<Path>, crate_name: &str, cargo_toml_license: Option<&str>, ) -> Result<LicenseState, Error>204 pub fn find_licenses(
205 crate_path: impl AsRef<Path>,
206 crate_name: &str,
207 cargo_toml_license: Option<&str>,
208 ) -> Result<LicenseState, Error> {
209 let crate_path = crate_path.as_ref();
210
211 let terms = LICENSE_DATA.evaluate_crate_license(crate_name, cargo_toml_license)?;
212 Ok(LicenseState::from(
213 terms,
214 &Classifier::new_vec(
215 crate_path.to_owned(),
216 license_file_finder::find_license_files(crate_path)?,
217 )?,
218 ))
219 }
220
221 #[cfg(test)]
222 mod tests {
223 use super::*;
224
225 mod license_req {
226 use spdx::{LicenseReq, Licensee};
227
apache() -> LicenseReq228 pub(super) fn apache() -> LicenseReq {
229 Licensee::parse("Apache-2.0").unwrap().into_req()
230 }
mit() -> LicenseReq231 pub(super) fn mit() -> LicenseReq {
232 Licensee::parse("MIT").unwrap().into_req()
233 }
234 }
235
236 mod license_terms {
237 use crate::{LicenseTerms, LICENSE_DATA};
238
apache() -> LicenseTerms239 pub(super) fn apache() -> LicenseTerms {
240 LICENSE_DATA.evaluate_crate_license("foo", Some("Apache-2.0")).unwrap()
241 }
apache_or_mit() -> LicenseTerms242 pub(super) fn apache_or_mit() -> LicenseTerms {
243 LICENSE_DATA.evaluate_crate_license("foo", Some("Apache-2.0 OR MIT")).unwrap()
244 }
apache_or_bsd() -> LicenseTerms245 pub(super) fn apache_or_bsd() -> LicenseTerms {
246 LICENSE_DATA.evaluate_crate_license("foo", Some("Apache-2.0 OR BSD-3-Clause")).unwrap()
247 }
248 }
249
250 mod classifiers {
251 use itertools::Itertools;
252
253 use crate::Classifier;
254
apache_by_name() -> Classifier255 pub(super) fn apache_by_name() -> Classifier {
256 Classifier::new("LICENSE-APACHE", "".to_string())
257 }
apache_by_content() -> Classifier258 pub(super) fn apache_by_content() -> Classifier {
259 Classifier::new("LICENSE", include_str!("licenses/Apache-2.0.txt").to_string())
260 }
unknown() -> Classifier261 pub(super) fn unknown() -> Classifier {
262 Classifier::new("LICENSE", "".to_string())
263 }
mit_by_name() -> Classifier264 pub(super) fn mit_by_name() -> Classifier {
265 Classifier::new("LICENSE-MIT", "".to_string())
266 }
apache_and_mit_concatenated() -> Classifier267 pub(super) fn apache_and_mit_concatenated() -> Classifier {
268 Classifier::new(
269 "LICENSE",
270 [include_str!("licenses/Apache-2.0.txt"), include_str!("licenses/MIT.txt")]
271 .iter()
272 .join("\n\n\n-----\n\n\n"),
273 )
274 }
bsd_fuzzy() -> Classifier275 pub(super) fn bsd_fuzzy() -> Classifier {
276 Classifier::new(
277 "LICENSE",
278 include_str!("testdata/BSD-3-Clause-bindgen.txt").to_string(),
279 )
280 }
bsd_inexact() -> Classifier281 pub(super) fn bsd_inexact() -> Classifier {
282 Classifier::new("LICENSE-BSD", "".to_string())
283 }
284 }
285
286 #[test]
basic()287 fn basic() {
288 let state = LicenseState::from(
289 license_terms::apache_or_mit(),
290 &vec![classifiers::apache_by_name(), classifiers::mit_by_name()],
291 );
292 assert_eq!(
293 state.satisfied,
294 BTreeMap::from([(
295 license_req::apache(),
296 classifiers::apache_by_name().file_path().to_owned()
297 )])
298 );
299 assert!(state.unsatisfied.is_empty());
300 assert_eq!(
301 state.unneeded,
302 BTreeMap::from([(
303 license_req::mit(),
304 classifiers::mit_by_name().file_path().to_owned()
305 )])
306 );
307 assert!(state.unexpected.is_empty());
308 }
309
310 #[test]
unsatisfied()311 fn unsatisfied() {
312 let state = LicenseState::from(license_terms::apache_or_mit(), &vec![]);
313 assert!(state.satisfied.is_empty());
314 assert_eq!(state.unsatisfied, BTreeSet::from([license_req::apache()]));
315 assert!(state.unneeded.is_empty());
316 assert!(state.unexpected.is_empty());
317 }
318
319 #[test]
unexpected()320 fn unexpected() {
321 let state = LicenseState::from(
322 license_terms::apache(),
323 &vec![classifiers::apache_by_name(), classifiers::mit_by_name()],
324 );
325 assert_eq!(
326 state.satisfied,
327 BTreeMap::from([(
328 license_req::apache(),
329 classifiers::apache_by_name().file_path().to_owned()
330 )])
331 );
332 assert!(state.unsatisfied.is_empty());
333 assert!(state.unneeded.is_empty());
334 assert_eq!(
335 state.unexpected,
336 BTreeMap::from([(
337 license_req::mit(),
338 classifiers::mit_by_name().file_path().to_owned()
339 )])
340 );
341 }
342
343 #[test]
name_preferred_to_content()344 fn name_preferred_to_content() {
345 let state = LicenseState::from(
346 license_terms::apache(),
347 &vec![classifiers::apache_by_content(), classifiers::apache_by_name()],
348 );
349 assert_eq!(
350 state.satisfied,
351 BTreeMap::from([(
352 license_req::apache(),
353 classifiers::apache_by_name().file_path().to_owned()
354 )])
355 );
356 assert!(state.unsatisfied.is_empty());
357 assert!(state.unneeded.is_empty());
358 assert!(state.unexpected.is_empty());
359 }
360
361 #[test]
unknown_files_not_reported()362 fn unknown_files_not_reported() {
363 let state = LicenseState::from(
364 license_terms::apache(),
365 &vec![classifiers::apache_by_name(), classifiers::unknown()],
366 );
367 assert_eq!(
368 state.satisfied,
369 BTreeMap::from([(
370 license_req::apache(),
371 classifiers::apache_by_name().file_path().to_owned()
372 )])
373 );
374 assert!(state.unsatisfied.is_empty());
375 assert!(state.unneeded.is_empty());
376 assert!(state.unexpected.is_empty());
377 }
378
379 #[test]
concatenated_licenses_not_reported_as_unexpected()380 fn concatenated_licenses_not_reported_as_unexpected() {
381 let state = LicenseState::from(
382 license_terms::apache_or_mit(),
383 &vec![classifiers::apache_and_mit_concatenated()],
384 );
385 assert_eq!(
386 state.satisfied,
387 BTreeMap::from([(
388 license_req::apache(),
389 classifiers::apache_and_mit_concatenated().file_path().to_owned()
390 )])
391 );
392 assert!(state.unsatisfied.is_empty());
393 assert!(state.unneeded.is_empty());
394 assert!(state.unexpected.is_empty());
395 }
396
397 #[test]
fuzzy_classifications_not_reported_as_unneeded_or_unexpected()398 fn fuzzy_classifications_not_reported_as_unneeded_or_unexpected() {
399 let state = LicenseState::from(
400 license_terms::apache_or_bsd(),
401 &vec![classifiers::apache_by_name(), classifiers::bsd_fuzzy()],
402 );
403 assert_eq!(
404 state.satisfied,
405 BTreeMap::from([(
406 license_req::apache(),
407 classifiers::apache_by_name().file_path().to_owned()
408 )])
409 );
410 assert!(state.unsatisfied.is_empty());
411 assert!(state.unneeded.is_empty());
412 assert!(state.unexpected.is_empty());
413
414 let state = LicenseState::from(
415 license_terms::apache(),
416 &vec![classifiers::apache_by_name(), classifiers::bsd_fuzzy()],
417 );
418 assert_eq!(
419 state.satisfied,
420 BTreeMap::from([(
421 license_req::apache(),
422 classifiers::apache_by_name().file_path().to_owned()
423 )])
424 );
425 assert!(state.unsatisfied.is_empty());
426 assert!(state.unneeded.is_empty());
427 assert!(state.unexpected.is_empty());
428 }
429
430 #[test]
inexact_names_reported_as_unneeded_and_unexpected()431 fn inexact_names_reported_as_unneeded_and_unexpected() {
432 let state = LicenseState::from(
433 license_terms::apache_or_bsd(),
434 &vec![classifiers::apache_by_name(), classifiers::bsd_inexact()],
435 );
436 assert_eq!(
437 state.satisfied,
438 BTreeMap::from([(
439 license_req::apache(),
440 classifiers::apache_by_name().file_path().to_owned()
441 )])
442 );
443 assert!(state.unsatisfied.is_empty());
444 assert!(state.unneeded.is_empty());
445 assert!(state.unexpected.is_empty());
446
447 let state = LicenseState::from(
448 license_terms::apache(),
449 &vec![classifiers::apache_by_name(), classifiers::bsd_fuzzy()],
450 );
451 assert_eq!(
452 state.satisfied,
453 BTreeMap::from([(
454 license_req::apache(),
455 classifiers::apache_by_name().file_path().to_owned()
456 )])
457 );
458 assert!(state.unsatisfied.is_empty());
459 assert!(state.unneeded.is_empty());
460 assert!(state.unexpected.is_empty());
461 }
462 }
463