1// Copyright 2021 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package compliance 16 17import ( 18 "bufio" 19 "crypto/md5" 20 "fmt" 21 "io" 22 "io/fs" 23 "net/url" 24 "path/filepath" 25 "regexp" 26 "sort" 27 "strings" 28) 29 30const ( 31 noProjectName = "\u2205" 32) 33 34var ( 35 nameRegexp = regexp.MustCompile(`^\s*name\s*:\s*"(.*)"\s*$`) 36 descRegexp = regexp.MustCompile(`^\s*description\s*:\s*"(.*)"\s*$`) 37 versionRegexp = regexp.MustCompile(`^\s*version\s*:\s*"(.*)"\s*$`) 38 licensesPathRegexp = regexp.MustCompile(`licen[cs]es?/`) 39) 40 41// NoticeIndex transforms license metadata into license text hashes, library 42// names, and install paths indexing them for fast lookup/iteration. 43type NoticeIndex struct { 44 // lg identifies the license graph to which the index applies. 45 lg *LicenseGraph 46 // rs identifies the set of resolutions upon which the index is based. 47 rs ResolutionSet 48 // shipped identifies the set of target nodes shipped directly or as derivative works. 49 shipped *TargetNodeSet 50 // rootFS locates the root of the file system from which to read the files. 51 rootFS fs.FS 52 // hash maps license text filenames to content hashes 53 hash map[string]hash 54 // text maps content hashes to content 55 text map[hash][]byte 56 // hashLibInstall maps hashes to libraries to install paths. 57 hashLibInstall map[hash]map[string]map[string]struct{} 58 // installHashLib maps install paths to libraries to hashes. 59 installHashLib map[string]map[hash]map[string]struct{} 60 // libHash maps libraries to hashes. 61 libHash map[string]map[hash]struct{} 62 // targetHash maps target nodes to hashes. 63 targetHashes map[*TargetNode]map[hash]struct{} 64 // projectName maps project directory names to project name text. 65 projectName map[string]string 66 // files lists all the files accessed during indexing 67 files []string 68} 69 70// IndexLicenseTexts creates a hashed index of license texts for `lg` and `rs` 71// using the files rooted at `rootFS`. 72func IndexLicenseTexts(rootFS fs.FS, lg *LicenseGraph, rs ResolutionSet) (*NoticeIndex, error) { 73 if rs == nil { 74 rs = ResolveNotices(lg) 75 } 76 ni := &NoticeIndex{ 77 lg: lg, 78 rs: rs, 79 shipped: ShippedNodes(lg), 80 rootFS: rootFS, 81 hash: make(map[string]hash), 82 text: make(map[hash][]byte), 83 hashLibInstall: make(map[hash]map[string]map[string]struct{}), 84 installHashLib: make(map[string]map[hash]map[string]struct{}), 85 libHash: make(map[string]map[hash]struct{}), 86 targetHashes: make(map[*TargetNode]map[hash]struct{}), 87 projectName: make(map[string]string), 88 } 89 90 // index adds all license texts for `tn` to the index. 91 index := func(tn *TargetNode) (map[hash]struct{}, error) { 92 if hashes, ok := ni.targetHashes[tn]; ok { 93 return hashes, nil 94 } 95 hashes := make(map[hash]struct{}) 96 for _, text := range tn.LicenseTexts() { 97 fname := strings.SplitN(text, ":", 2)[0] 98 if _, ok := ni.hash[fname]; !ok { 99 err := ni.addText(fname) 100 if err != nil { 101 return nil, err 102 } 103 } 104 hash := ni.hash[fname] 105 if _, ok := hashes[hash]; !ok { 106 hashes[hash] = struct{}{} 107 } 108 } 109 ni.targetHashes[tn] = hashes 110 return hashes, nil 111 } 112 113 link := func(tn *TargetNode, hashes map[hash]struct{}, installPaths []string) { 114 for h := range hashes { 115 libName := ni.getLibName(tn, h) 116 if _, ok := ni.libHash[libName]; !ok { 117 ni.libHash[libName] = make(map[hash]struct{}) 118 } 119 if _, ok := ni.hashLibInstall[h]; !ok { 120 ni.hashLibInstall[h] = make(map[string]map[string]struct{}) 121 } 122 if _, ok := ni.libHash[libName][h]; !ok { 123 ni.libHash[libName][h] = struct{}{} 124 } 125 for _, installPath := range installPaths { 126 if _, ok := ni.installHashLib[installPath]; !ok { 127 ni.installHashLib[installPath] = make(map[hash]map[string]struct{}) 128 ni.installHashLib[installPath][h] = make(map[string]struct{}) 129 ni.installHashLib[installPath][h][libName] = struct{}{} 130 } else if _, ok = ni.installHashLib[installPath][h]; !ok { 131 ni.installHashLib[installPath][h] = make(map[string]struct{}) 132 ni.installHashLib[installPath][h][libName] = struct{}{} 133 } else if _, ok = ni.installHashLib[installPath][h][libName]; !ok { 134 ni.installHashLib[installPath][h][libName] = struct{}{} 135 } 136 if _, ok := ni.hashLibInstall[h]; !ok { 137 ni.hashLibInstall[h] = make(map[string]map[string]struct{}) 138 ni.hashLibInstall[h][libName] = make(map[string]struct{}) 139 ni.hashLibInstall[h][libName][installPath] = struct{}{} 140 } else if _, ok = ni.hashLibInstall[h][libName]; !ok { 141 ni.hashLibInstall[h][libName] = make(map[string]struct{}) 142 ni.hashLibInstall[h][libName][installPath] = struct{}{} 143 } else if _, ok = ni.hashLibInstall[h][libName][installPath]; !ok { 144 ni.hashLibInstall[h][libName][installPath] = struct{}{} 145 } 146 } 147 } 148 } 149 150 // returns error from walk below. 151 var err error 152 153 WalkTopDown(NoEdgeContext{}, lg, func(lg *LicenseGraph, tn *TargetNode, path TargetEdgePath) bool { 154 if err != nil { 155 return false 156 } 157 if !ni.shipped.Contains(tn) { 158 return false 159 } 160 installPaths := getInstallPaths(tn, path) 161 var hashes map[hash]struct{} 162 hashes, err = index(tn) 163 if err != nil { 164 return false 165 } 166 link(tn, hashes, installPaths) 167 if tn.IsContainer() { 168 return true 169 } 170 171 for _, r := range rs.Resolutions(tn) { 172 hashes, err = index(r.actsOn) 173 if err != nil { 174 return false 175 } 176 link(r.actsOn, hashes, installPaths) 177 } 178 return false 179 }) 180 181 if err != nil { 182 return nil, err 183 } 184 185 return ni, nil 186} 187 188// Hashes returns an ordered channel of the hashed license texts. 189func (ni *NoticeIndex) Hashes() chan hash { 190 c := make(chan hash) 191 go func() { 192 libs := make([]string, 0, len(ni.libHash)) 193 for libName := range ni.libHash { 194 libs = append(libs, libName) 195 } 196 sort.Strings(libs) 197 hashes := make(map[hash]struct{}) 198 for _, libName := range libs { 199 hl := make([]hash, 0, len(ni.libHash[libName])) 200 for h := range ni.libHash[libName] { 201 if _, ok := hashes[h]; ok { 202 continue 203 } 204 hashes[h] = struct{}{} 205 hl = append(hl, h) 206 } 207 if len(hl) > 0 { 208 sort.Sort(hashList{ni, libName, "", &hl}) 209 for _, h := range hl { 210 c <- h 211 } 212 } 213 } 214 close(c) 215 }() 216 return c 217} 218 219// InputNoticeFiles returns the list of files that were hashed during IndexLicenseTexts. 220func (ni *NoticeIndex) InputNoticeFiles() []string { 221 files := append([]string(nil), ni.files...) 222 sort.Strings(files) 223 return files 224} 225 226// HashLibs returns the ordered array of library names using the license text 227// hashed as `h`. 228func (ni *NoticeIndex) HashLibs(h hash) []string { 229 libs := make([]string, 0, len(ni.hashLibInstall[h])) 230 for libName := range ni.hashLibInstall[h] { 231 libs = append(libs, libName) 232 } 233 sort.Strings(libs) 234 return libs 235} 236 237// HashLibInstalls returns the ordered array of install paths referencing 238// library `libName` using the license text hashed as `h`. 239func (ni *NoticeIndex) HashLibInstalls(h hash, libName string) []string { 240 installs := make([]string, 0, len(ni.hashLibInstall[h][libName])) 241 for installPath := range ni.hashLibInstall[h][libName] { 242 installs = append(installs, installPath) 243 } 244 sort.Strings(installs) 245 return installs 246} 247 248// InstallPaths returns the ordered channel of indexed install paths. 249func (ni *NoticeIndex) InstallPaths() chan string { 250 c := make(chan string) 251 go func() { 252 paths := make([]string, 0, len(ni.installHashLib)) 253 for path := range ni.installHashLib { 254 paths = append(paths, path) 255 } 256 sort.Strings(paths) 257 for _, installPath := range paths { 258 c <- installPath 259 } 260 close(c) 261 }() 262 return c 263} 264 265// InstallHashes returns the ordered array of hashes attached to `installPath`. 266func (ni *NoticeIndex) InstallHashes(installPath string) []hash { 267 result := make([]hash, 0, len(ni.installHashLib[installPath])) 268 for h := range ni.installHashLib[installPath] { 269 result = append(result, h) 270 } 271 if len(result) > 0 { 272 sort.Sort(hashList{ni, "", installPath, &result}) 273 } 274 return result 275} 276 277// InstallHashLibs returns the ordered array of library names attached to 278// `installPath` as hash `h`. 279func (ni *NoticeIndex) InstallHashLibs(installPath string, h hash) []string { 280 result := make([]string, 0, len(ni.installHashLib[installPath][h])) 281 for libName := range ni.installHashLib[installPath][h] { 282 result = append(result, libName) 283 } 284 sort.Strings(result) 285 return result 286} 287 288// Libraries returns the ordered channel of indexed library names. 289func (ni *NoticeIndex) Libraries() chan string { 290 c := make(chan string) 291 go func() { 292 libs := make([]string, 0, len(ni.libHash)) 293 for lib := range ni.libHash { 294 libs = append(libs, lib) 295 } 296 sort.Strings(libs) 297 for _, lib := range libs { 298 c <- lib 299 } 300 close(c) 301 }() 302 return c 303} 304 305// HashText returns the file content of the license text hashed as `h`. 306func (ni *NoticeIndex) HashText(h hash) []byte { 307 return ni.text[h] 308} 309 310// getLibName returns the name of the library associated with `noticeFor`. 311func (ni *NoticeIndex) getLibName(noticeFor *TargetNode, h hash) string { 312 for _, text := range noticeFor.LicenseTexts() { 313 if !strings.Contains(text, ":") { 314 if ni.hash[text].key != h.key { 315 continue 316 } 317 ln := ni.checkMetadataForLicenseText(noticeFor, text) 318 if len(ln) > 0 { 319 return ln 320 } 321 continue 322 } 323 324 fields := strings.SplitN(text, ":", 2) 325 fname, pname := fields[0], fields[1] 326 if ni.hash[fname].key != h.key { 327 continue 328 } 329 330 ln, err := url.QueryUnescape(pname) 331 if err != nil { 332 continue 333 } 334 return ln 335 } 336 // use name from METADATA if available 337 ln := ni.checkMetadata(noticeFor) 338 if len(ln) > 0 { 339 return ln 340 } 341 // use package_name: from license{} module if available 342 pn := noticeFor.PackageName() 343 if len(pn) > 0 { 344 return pn 345 } 346 for _, p := range noticeFor.Projects() { 347 if strings.HasPrefix(p, "prebuilts/") { 348 for _, licenseText := range noticeFor.LicenseTexts() { 349 if !strings.HasPrefix(licenseText, "prebuilts/") { 350 continue 351 } 352 if !strings.Contains(licenseText, ":") { 353 if ni.hash[licenseText].key != h.key { 354 continue 355 } 356 } else { 357 fields := strings.SplitN(licenseText, ":", 2) 358 fname := fields[0] 359 if ni.hash[fname].key != h.key { 360 continue 361 } 362 } 363 for r, prefix := range SafePrebuiltPrefixes { 364 match := r.FindString(licenseText) 365 if len(match) == 0 { 366 continue 367 } 368 strip := SafePathPrefixes[prefix] 369 if strip { 370 // strip entire prefix 371 match = licenseText[len(match):] 372 } else { 373 // strip from prebuilts/ until safe prefix 374 match = licenseText[len(match)-len(prefix):] 375 } 376 // remove LICENSE or NOTICE or other filename 377 li := strings.LastIndex(match, "/") 378 if li > 0 { 379 match = match[:li] 380 } 381 // remove *licenses/ path segment and subdirectory if in path 382 if offsets := licensesPathRegexp.FindAllStringIndex(match, -1); offsets != nil && offsets[len(offsets)-1][0] > 0 { 383 match = match[:offsets[len(offsets)-1][0]] 384 li = strings.LastIndex(match, "/") 385 if li > 0 { 386 match = match[:li] 387 } 388 } 389 return match 390 } 391 break 392 } 393 } 394 for prefix, strip := range SafePathPrefixes { 395 if strings.HasPrefix(p, prefix) { 396 if strip { 397 return p[len(prefix):] 398 } else { 399 return p 400 } 401 } 402 } 403 } 404 // strip off [./]meta_lic from license metadata path and extract base name 405 n := noticeFor.name[:len(noticeFor.name)-9] 406 li := strings.LastIndex(n, "/") 407 if li > 0 { 408 n = n[li+1:] 409 } 410 fi := strings.Index(n, "@") 411 if fi > 0 { 412 n = n[:fi] 413 } 414 return n 415} 416 417// checkMetadata tries to look up a library name from a METADATA file associated with `noticeFor`. 418func (ni *NoticeIndex) checkMetadata(noticeFor *TargetNode) string { 419 for _, p := range noticeFor.Projects() { 420 if name, ok := ni.projectName[p]; ok { 421 if name == noProjectName { 422 continue 423 } 424 return name 425 } 426 name, err := ni.checkMetadataFile(filepath.Join(p, "METADATA")) 427 if err != nil { 428 ni.projectName[p] = noProjectName 429 continue 430 } 431 if len(name) == 0 { 432 ni.projectName[p] = noProjectName 433 continue 434 } 435 ni.projectName[p] = name 436 return name 437 } 438 return "" 439} 440 441// checkMetadataForLicenseText 442func (ni *NoticeIndex) checkMetadataForLicenseText(noticeFor *TargetNode, licenseText string) string { 443 p := "" 444 for _, proj := range noticeFor.Projects() { 445 if strings.HasPrefix(licenseText, proj) { 446 p = proj 447 } 448 } 449 if len(p) == 0 { 450 p = filepath.Dir(licenseText) 451 for { 452 fi, err := fs.Stat(ni.rootFS, filepath.Join(p, ".git")) 453 if err == nil && fi.IsDir() { 454 break 455 } 456 if strings.Contains(p, "/") && p != "/" { 457 p = filepath.Dir(p) 458 continue 459 } 460 return "" 461 } 462 } 463 if name, ok := ni.projectName[p]; ok { 464 if name == noProjectName { 465 return "" 466 } 467 return name 468 } 469 name, err := ni.checkMetadataFile(filepath.Join(p, "METADATA")) 470 if err == nil && len(name) > 0 { 471 ni.projectName[p] = name 472 return name 473 } 474 ni.projectName[p] = noProjectName 475 return "" 476} 477 478// checkMetadataFile tries to look up a library name from a METADATA file at `path`. 479func (ni *NoticeIndex) checkMetadataFile(path string) (string, error) { 480 f, err := ni.rootFS.Open(path) 481 if err != nil { 482 return "", err 483 } 484 name := "" 485 description := "" 486 version := "" 487 s := bufio.NewScanner(f) 488 for s.Scan() { 489 line := s.Text() 490 m := nameRegexp.FindStringSubmatch(line) 491 if m != nil { 492 if 1 < len(m) && m[1] != "" { 493 name = m[1] 494 } 495 if version != "" { 496 break 497 } 498 continue 499 } 500 m = versionRegexp.FindStringSubmatch(line) 501 if m != nil { 502 if 1 < len(m) && m[1] != "" { 503 version = m[1] 504 } 505 if name != "" { 506 break 507 } 508 continue 509 } 510 m = descRegexp.FindStringSubmatch(line) 511 if m != nil { 512 if 1 < len(m) && m[1] != "" { 513 description = m[1] 514 } 515 } 516 } 517 _ = s.Err() 518 _ = f.Close() 519 if name != "" { 520 if version != "" { 521 if version[0] == 'v' || version[0] == 'V' { 522 return name + "_" + version, nil 523 } else { 524 return name + "_v_" + version, nil 525 } 526 } 527 return name, nil 528 } 529 if description != "" { 530 return description, nil 531 } 532 return "", nil 533} 534 535// addText reads and indexes the content of a license text file. 536func (ni *NoticeIndex) addText(file string) error { 537 f, err := ni.rootFS.Open(filepath.Clean(file)) 538 if err != nil { 539 return fmt.Errorf("error opening license text file %q: %w", file, err) 540 } 541 542 // read the file 543 text, err := io.ReadAll(f) 544 if err != nil { 545 return fmt.Errorf("error reading license text file %q: %w", file, err) 546 } 547 548 hash := hash{fmt.Sprintf("%x", md5.Sum(text))} 549 ni.hash[file] = hash 550 if _, alreadyPresent := ni.text[hash]; !alreadyPresent { 551 ni.text[hash] = text 552 } 553 554 ni.files = append(ni.files, file) 555 556 return nil 557} 558 559// getInstallPaths returns the names of the used dependencies mapped to their 560// installed locations. 561func getInstallPaths(attachesTo *TargetNode, path TargetEdgePath) []string { 562 if len(path) == 0 { 563 installs := attachesTo.Installed() 564 if 0 == len(installs) { 565 installs = attachesTo.Built() 566 } 567 return installs 568 } 569 570 var getInstalls func(path TargetEdgePath) []string 571 572 getInstalls = func(path TargetEdgePath) []string { 573 // deps contains the output targets from the dependencies in the path 574 var deps []string 575 if len(path) > 1 { 576 // recursively get the targets from the sub-path skipping 1 path segment 577 deps = getInstalls(path[1:]) 578 } else { 579 // stop recursion at 1 path segment 580 deps = path[0].Dependency().TargetFiles() 581 } 582 size := 0 583 prefixes := path[0].Target().TargetFiles() 584 installMap := path[0].Target().InstallMap() 585 sources := path[0].Target().Sources() 586 for _, dep := range deps { 587 found := false 588 for _, source := range sources { 589 if strings.HasPrefix(dep, source) { 590 found = true 591 break 592 } 593 } 594 if !found { 595 continue 596 } 597 for _, im := range installMap { 598 if strings.HasPrefix(dep, im.FromPath) { 599 size += len(prefixes) 600 break 601 } 602 } 603 } 604 605 installs := make([]string, 0, size) 606 for _, dep := range deps { 607 found := false 608 for _, source := range sources { 609 if strings.HasPrefix(dep, source) { 610 found = true 611 break 612 } 613 } 614 if !found { 615 continue 616 } 617 for _, im := range installMap { 618 if strings.HasPrefix(dep, im.FromPath) { 619 for _, prefix := range prefixes { 620 installs = append(installs, prefix+im.ContainerPath+dep[len(im.FromPath):]) 621 } 622 break 623 } 624 } 625 } 626 return installs 627 } 628 allInstalls := getInstalls(path) 629 installs := path[0].Target().Installed() 630 if len(installs) == 0 { 631 return allInstalls 632 } 633 result := make([]string, 0, len(allInstalls)) 634 for _, install := range allInstalls { 635 for _, prefix := range installs { 636 if strings.HasPrefix(install, prefix) { 637 result = append(result, install) 638 } 639 } 640 } 641 return result 642} 643 644// hash is an opaque string derived from md5sum. 645type hash struct { 646 key string 647} 648 649// String returns the hexadecimal representation of the hash. 650func (h hash) String() string { 651 return h.key 652} 653 654// hashList orders an array of hashes 655type hashList struct { 656 ni *NoticeIndex 657 libName string 658 installPath string 659 hashes *[]hash 660} 661 662// Len returns the count of elements in the slice. 663func (l hashList) Len() int { return len(*l.hashes) } 664 665// Swap rearranges 2 elements of the slice so that each occupies the other's 666// former position. 667func (l hashList) Swap(i, j int) { (*l.hashes)[i], (*l.hashes)[j] = (*l.hashes)[j], (*l.hashes)[i] } 668 669// Less returns true when the `i`th element is lexicographically less than 670// the `j`th element. 671func (l hashList) Less(i, j int) bool { 672 var insti, instj int 673 if len(l.libName) > 0 { 674 insti = len(l.ni.hashLibInstall[(*l.hashes)[i]][l.libName]) 675 instj = len(l.ni.hashLibInstall[(*l.hashes)[j]][l.libName]) 676 } else { 677 libsi := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[i]) 678 libsj := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[j]) 679 libsis := strings.Join(libsi, " ") 680 libsjs := strings.Join(libsj, " ") 681 if libsis != libsjs { 682 return libsis < libsjs 683 } 684 } 685 if insti == instj { 686 leni := len(l.ni.text[(*l.hashes)[i]]) 687 lenj := len(l.ni.text[(*l.hashes)[j]]) 688 if leni == lenj { 689 // all else equal, just order by hash value 690 return (*l.hashes)[i].key < (*l.hashes)[j].key 691 } 692 // put shortest texts first within same # of installs 693 return leni < lenj 694 } 695 // reverse order of # installs so that most popular appears first 696 return instj < insti 697} 698