// Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package compliance import ( "crypto/md5" "fmt" "io" "io/fs" "net/url" "path/filepath" "regexp" "sort" "strings" "android/soong/tools/compliance/projectmetadata" ) var ( licensesPathRegexp = regexp.MustCompile(`licen[cs]es?/`) ) // NoticeIndex transforms license metadata into license text hashes, library // names, and install paths indexing them for fast lookup/iteration. type NoticeIndex struct { // lg identifies the license graph to which the index applies. lg *LicenseGraph // pmix indexes project metadata pmix *projectmetadata.Index // rs identifies the set of resolutions upon which the index is based. rs ResolutionSet // shipped identifies the set of target nodes shipped directly or as derivative works. shipped TargetNodeSet // rootFS locates the root of the file system from which to read the files. rootFS fs.FS // hash maps license text filenames to content hashes hash map[string]hash // text maps content hashes to content text map[hash][]byte // hashLibInstall maps hashes to libraries to install paths. hashLibInstall map[hash]map[string]map[string]struct{} // installHashLib maps install paths to libraries to hashes. installHashLib map[string]map[hash]map[string]struct{} // libHash maps libraries to hashes. libHash map[string]map[hash]struct{} // targetHash maps target nodes to hashes. targetHashes map[*TargetNode]map[hash]struct{} // projectName maps project directory names to project name text. projectName map[string]string // files lists all the files accessed during indexing files []string } // IndexLicenseTexts creates a hashed index of license texts for `lg` and `rs` // using the files rooted at `rootFS`. func IndexLicenseTexts(rootFS fs.FS, lg *LicenseGraph, rs ResolutionSet) (*NoticeIndex, error) { if rs == nil { rs = ResolveNotices(lg) } ni := &NoticeIndex{ lg: lg, pmix: projectmetadata.NewIndex(rootFS), rs: rs, shipped: ShippedNodes(lg), rootFS: rootFS, hash: make(map[string]hash), text: make(map[hash][]byte), hashLibInstall: make(map[hash]map[string]map[string]struct{}), installHashLib: make(map[string]map[hash]map[string]struct{}), libHash: make(map[string]map[hash]struct{}), targetHashes: make(map[*TargetNode]map[hash]struct{}), projectName: make(map[string]string), } // index adds all license texts for `tn` to the index. index := func(tn *TargetNode) (map[hash]struct{}, error) { if hashes, ok := ni.targetHashes[tn]; ok { return hashes, nil } hashes := make(map[hash]struct{}) for _, text := range tn.LicenseTexts() { fname := strings.SplitN(text, ":", 2)[0] if _, ok := ni.hash[fname]; !ok { err := ni.addText(fname) if err != nil { return nil, err } } hash := ni.hash[fname] if _, ok := hashes[hash]; !ok { hashes[hash] = struct{}{} } } ni.targetHashes[tn] = hashes return hashes, nil } link := func(tn *TargetNode, hashes map[hash]struct{}, installPaths []string) error { for h := range hashes { libName, err := ni.getLibName(tn, h) if err != nil { return err } if _, ok := ni.libHash[libName]; !ok { ni.libHash[libName] = make(map[hash]struct{}) } if _, ok := ni.hashLibInstall[h]; !ok { ni.hashLibInstall[h] = make(map[string]map[string]struct{}) } if _, ok := ni.libHash[libName][h]; !ok { ni.libHash[libName][h] = struct{}{} } for _, installPath := range installPaths { if _, ok := ni.installHashLib[installPath]; !ok { ni.installHashLib[installPath] = make(map[hash]map[string]struct{}) ni.installHashLib[installPath][h] = make(map[string]struct{}) ni.installHashLib[installPath][h][libName] = struct{}{} } else if _, ok = ni.installHashLib[installPath][h]; !ok { ni.installHashLib[installPath][h] = make(map[string]struct{}) ni.installHashLib[installPath][h][libName] = struct{}{} } else if _, ok = ni.installHashLib[installPath][h][libName]; !ok { ni.installHashLib[installPath][h][libName] = struct{}{} } if _, ok := ni.hashLibInstall[h]; !ok { ni.hashLibInstall[h] = make(map[string]map[string]struct{}) ni.hashLibInstall[h][libName] = make(map[string]struct{}) ni.hashLibInstall[h][libName][installPath] = struct{}{} } else if _, ok = ni.hashLibInstall[h][libName]; !ok { ni.hashLibInstall[h][libName] = make(map[string]struct{}) ni.hashLibInstall[h][libName][installPath] = struct{}{} } else if _, ok = ni.hashLibInstall[h][libName][installPath]; !ok { ni.hashLibInstall[h][libName][installPath] = struct{}{} } } } return nil } cacheMetadata := func(tn *TargetNode) { ni.pmix.MetadataForProjects(tn.Projects()...) } // returns error from walk below. var err error WalkTopDown(NoEdgeContext{}, lg, func(lg *LicenseGraph, tn *TargetNode, path TargetEdgePath) bool { if err != nil { return false } if !ni.shipped.Contains(tn) { return false } go cacheMetadata(tn) installPaths := getInstallPaths(tn, path) var hashes map[hash]struct{} hashes, err = index(tn) if err != nil { return false } err = link(tn, hashes, installPaths) if err != nil { return false } if tn.IsContainer() { return true } for _, r := range rs.Resolutions(tn) { hashes, err = index(r.actsOn) if err != nil { return false } err = link(r.actsOn, hashes, installPaths) if err != nil { return false } } return false }) if err != nil { return nil, err } return ni, nil } // Hashes returns an ordered channel of the hashed license texts. func (ni *NoticeIndex) Hashes() chan hash { c := make(chan hash) go func() { libs := make([]string, 0, len(ni.libHash)) for libName := range ni.libHash { libs = append(libs, libName) } sort.Strings(libs) hashes := make(map[hash]struct{}) for _, libName := range libs { hl := make([]hash, 0, len(ni.libHash[libName])) for h := range ni.libHash[libName] { if _, ok := hashes[h]; ok { continue } hashes[h] = struct{}{} hl = append(hl, h) } if len(hl) > 0 { sort.Sort(hashList{ni, libName, "", &hl}) for _, h := range hl { c <- h } } } close(c) }() return c } // InputFiles returns the complete list of files read during indexing. func (ni *NoticeIndex) InputFiles() []string { projectMeta := ni.pmix.AllMetadataFiles() files := make([]string, 0, len(ni.files) + len(ni.lg.targets) + len(projectMeta)) files = append(files, ni.files...) for f := range ni.lg.targets { files = append(files, f) } files = append(files, projectMeta...) return files } // HashLibs returns the ordered array of library names using the license text // hashed as `h`. func (ni *NoticeIndex) HashLibs(h hash) []string { libs := make([]string, 0, len(ni.hashLibInstall[h])) for libName := range ni.hashLibInstall[h] { libs = append(libs, libName) } sort.Strings(libs) return libs } // HashLibInstalls returns the ordered array of install paths referencing // library `libName` using the license text hashed as `h`. func (ni *NoticeIndex) HashLibInstalls(h hash, libName string) []string { installs := make([]string, 0, len(ni.hashLibInstall[h][libName])) for installPath := range ni.hashLibInstall[h][libName] { installs = append(installs, installPath) } sort.Strings(installs) return installs } // InstallPaths returns the ordered channel of indexed install paths. func (ni *NoticeIndex) InstallPaths() chan string { c := make(chan string) go func() { paths := make([]string, 0, len(ni.installHashLib)) for path := range ni.installHashLib { paths = append(paths, path) } sort.Strings(paths) for _, installPath := range paths { c <- installPath } close(c) }() return c } // InstallHashes returns the ordered array of hashes attached to `installPath`. func (ni *NoticeIndex) InstallHashes(installPath string) []hash { result := make([]hash, 0, len(ni.installHashLib[installPath])) for h := range ni.installHashLib[installPath] { result = append(result, h) } if len(result) > 0 { sort.Sort(hashList{ni, "", installPath, &result}) } return result } // InstallHashLibs returns the ordered array of library names attached to // `installPath` as hash `h`. func (ni *NoticeIndex) InstallHashLibs(installPath string, h hash) []string { result := make([]string, 0, len(ni.installHashLib[installPath][h])) for libName := range ni.installHashLib[installPath][h] { result = append(result, libName) } sort.Strings(result) return result } // Libraries returns the ordered channel of indexed library names. func (ni *NoticeIndex) Libraries() chan string { c := make(chan string) go func() { libs := make([]string, 0, len(ni.libHash)) for lib := range ni.libHash { libs = append(libs, lib) } sort.Strings(libs) for _, lib := range libs { c <- lib } close(c) }() return c } // HashText returns the file content of the license text hashed as `h`. func (ni *NoticeIndex) HashText(h hash) []byte { return ni.text[h] } // getLibName returns the name of the library associated with `noticeFor`. func (ni *NoticeIndex) getLibName(noticeFor *TargetNode, h hash) (string, error) { for _, text := range noticeFor.LicenseTexts() { if !strings.Contains(text, ":") { if ni.hash[text].key != h.key { continue } ln, err := ni.checkMetadataForLicenseText(noticeFor, text) if err != nil { return "", err } if len(ln) > 0 { return ln, nil } continue } fields := strings.SplitN(text, ":", 2) fname, pname := fields[0], fields[1] if ni.hash[fname].key != h.key { continue } ln, err := url.QueryUnescape(pname) if err != nil { continue } return ln, nil } // use name from METADATA if available ln, err := ni.checkMetadata(noticeFor) if err != nil { return "", err } if len(ln) > 0 { return ln, nil } // use package_name: from license{} module if available pn := noticeFor.PackageName() if len(pn) > 0 { return pn, nil } for _, p := range noticeFor.Projects() { if strings.HasPrefix(p, "prebuilts/") { for _, licenseText := range noticeFor.LicenseTexts() { if !strings.HasPrefix(licenseText, "prebuilts/") { continue } if !strings.Contains(licenseText, ":") { if ni.hash[licenseText].key != h.key { continue } } else { fields := strings.SplitN(licenseText, ":", 2) fname := fields[0] if ni.hash[fname].key != h.key { continue } } for _, safePrebuiltPrefix := range safePrebuiltPrefixes { match := safePrebuiltPrefix.re.FindString(licenseText) if len(match) == 0 { continue } if safePrebuiltPrefix.strip { // strip entire prefix match = licenseText[len(match):] } else { // strip from prebuilts/ until safe prefix match = licenseText[len(match)-len(safePrebuiltPrefix.prefix):] } // remove LICENSE or NOTICE or other filename li := strings.LastIndex(match, "/") if li > 0 { match = match[:li] } // remove *licenses/ path segment and subdirectory if in path if offsets := licensesPathRegexp.FindAllStringIndex(match, -1); offsets != nil && offsets[len(offsets)-1][0] > 0 { match = match[:offsets[len(offsets)-1][0]] li = strings.LastIndex(match, "/") if li > 0 { match = match[:li] } } return match, nil } break } } for _, safePathPrefix := range safePathPrefixes { if strings.HasPrefix(p, safePathPrefix.prefix) { if safePathPrefix.strip { return p[len(safePathPrefix.prefix):], nil } else { return p, nil } } } } // strip off [./]meta_lic from license metadata path and extract base name n := noticeFor.name[:len(noticeFor.name)-9] li := strings.LastIndex(n, "/") if li > 0 { n = n[li+1:] } fi := strings.Index(n, "@") if fi > 0 { n = n[:fi] } return n, nil } // checkMetadata tries to look up a library name from a METADATA file associated with `noticeFor`. func (ni *NoticeIndex) checkMetadata(noticeFor *TargetNode) (string, error) { pms, err := ni.pmix.MetadataForProjects(noticeFor.Projects()...) if err != nil { return "", err } for _, pm := range pms { name := pm.VersionedName() if name != "" { return name, nil } } return "", nil } // checkMetadataForLicenseText func (ni *NoticeIndex) checkMetadataForLicenseText(noticeFor *TargetNode, licenseText string) (string, error) { p := "" for _, proj := range noticeFor.Projects() { if strings.HasPrefix(licenseText, proj) { p = proj } } if len(p) == 0 { p = filepath.Dir(licenseText) for { fi, err := fs.Stat(ni.rootFS, filepath.Join(p, ".git")) if err == nil && fi.IsDir() { break } if strings.Contains(p, "/") && p != "/" { p = filepath.Dir(p) continue } return "", nil } } pms, err := ni.pmix.MetadataForProjects(p) if err != nil { return "", err } if pms == nil { return "", nil } return pms[0].VersionedName(), nil } // addText reads and indexes the content of a license text file. func (ni *NoticeIndex) addText(file string) error { f, err := ni.rootFS.Open(filepath.Clean(file)) if err != nil { return fmt.Errorf("error opening license text file %q: %w", file, err) } // read the file text, err := io.ReadAll(f) if err != nil { return fmt.Errorf("error reading license text file %q: %w", file, err) } hash := hash{fmt.Sprintf("%x", md5.Sum(text))} ni.hash[file] = hash if _, alreadyPresent := ni.text[hash]; !alreadyPresent { ni.text[hash] = text } ni.files = append(ni.files, file) return nil } // getInstallPaths returns the names of the used dependencies mapped to their // installed locations. func getInstallPaths(attachesTo *TargetNode, path TargetEdgePath) []string { if len(path) == 0 { installs := attachesTo.Installed() if 0 == len(installs) { installs = attachesTo.Built() } return installs } var getInstalls func(path TargetEdgePath) []string getInstalls = func(path TargetEdgePath) []string { // deps contains the output targets from the dependencies in the path var deps []string if len(path) > 1 { // recursively get the targets from the sub-path skipping 1 path segment deps = getInstalls(path[1:]) } else { // stop recursion at 1 path segment deps = path[0].Dependency().TargetFiles() } size := 0 prefixes := path[0].Target().TargetFiles() installMap := path[0].Target().InstallMap() sources := path[0].Target().Sources() for _, dep := range deps { found := false for _, source := range sources { if strings.HasPrefix(dep, source) { found = true break } } if !found { continue } for _, im := range installMap { if strings.HasPrefix(dep, im.FromPath) { size += len(prefixes) break } } } installs := make([]string, 0, size) for _, dep := range deps { found := false for _, source := range sources { if strings.HasPrefix(dep, source) { found = true break } } if !found { continue } for _, im := range installMap { if strings.HasPrefix(dep, im.FromPath) { for _, prefix := range prefixes { installs = append(installs, prefix+im.ContainerPath+dep[len(im.FromPath):]) } break } } } return installs } allInstalls := getInstalls(path) installs := path[0].Target().Installed() if len(installs) == 0 { return allInstalls } result := make([]string, 0, len(allInstalls)) for _, install := range allInstalls { for _, prefix := range installs { if strings.HasPrefix(install, prefix) { result = append(result, install) } } } return result } // hash is an opaque string derived from md5sum. type hash struct { key string } // String returns the hexadecimal representation of the hash. func (h hash) String() string { return h.key } // hashList orders an array of hashes type hashList struct { ni *NoticeIndex libName string installPath string hashes *[]hash } // Len returns the count of elements in the slice. func (l hashList) Len() int { return len(*l.hashes) } // Swap rearranges 2 elements of the slice so that each occupies the other's // former position. func (l hashList) Swap(i, j int) { (*l.hashes)[i], (*l.hashes)[j] = (*l.hashes)[j], (*l.hashes)[i] } // Less returns true when the `i`th element is lexicographically less than // the `j`th element. func (l hashList) Less(i, j int) bool { var insti, instj int if len(l.libName) > 0 { insti = len(l.ni.hashLibInstall[(*l.hashes)[i]][l.libName]) instj = len(l.ni.hashLibInstall[(*l.hashes)[j]][l.libName]) } else { libsi := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[i]) libsj := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[j]) libsis := strings.Join(libsi, " ") libsjs := strings.Join(libsj, " ") if libsis != libsjs { return libsis < libsjs } } if insti == instj { leni := len(l.ni.text[(*l.hashes)[i]]) lenj := len(l.ni.text[(*l.hashes)[j]]) if leni == lenj { // all else equal, just order by hash value return (*l.hashes)[i].key < (*l.hashes)[j].key } // put shortest texts first within same # of installs return leni < lenj } // reverse order of # installs so that most popular appears first return instj < insti }