• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package compliance
16
17import (
18	"bufio"
19	"crypto/md5"
20	"fmt"
21	"io"
22	"io/fs"
23	"net/url"
24	"path/filepath"
25	"regexp"
26	"sort"
27	"strings"
28)
29
30const (
31	noProjectName = "\u2205"
32)
33
34var (
35	nameRegexp         = regexp.MustCompile(`^\s*name\s*:\s*"(.*)"\s*$`)
36	descRegexp         = regexp.MustCompile(`^\s*description\s*:\s*"(.*)"\s*$`)
37	versionRegexp      = regexp.MustCompile(`^\s*version\s*:\s*"(.*)"\s*$`)
38	licensesPathRegexp = regexp.MustCompile(`licen[cs]es?/`)
39)
40
41// NoticeIndex transforms license metadata into license text hashes, library
42// names, and install paths indexing them for fast lookup/iteration.
43type NoticeIndex struct {
44	// lg identifies the license graph to which the index applies.
45	lg *LicenseGraph
46	// rs identifies the set of resolutions upon which the index is based.
47	rs ResolutionSet
48	// shipped identifies the set of target nodes shipped directly or as derivative works.
49	shipped *TargetNodeSet
50	// rootFS locates the root of the file system from which to read the files.
51	rootFS fs.FS
52	// hash maps license text filenames to content hashes
53	hash map[string]hash
54	// text maps content hashes to content
55	text map[hash][]byte
56	// hashLibInstall maps hashes to libraries to install paths.
57	hashLibInstall map[hash]map[string]map[string]struct{}
58	// installHashLib maps install paths to libraries to hashes.
59	installHashLib map[string]map[hash]map[string]struct{}
60	// libHash maps libraries to hashes.
61	libHash map[string]map[hash]struct{}
62	// targetHash maps target nodes to hashes.
63	targetHashes map[*TargetNode]map[hash]struct{}
64	// projectName maps project directory names to project name text.
65	projectName map[string]string
66	// files lists all the files accessed during indexing
67	files []string
68}
69
70// IndexLicenseTexts creates a hashed index of license texts for `lg` and `rs`
71// using the files rooted at `rootFS`.
72func IndexLicenseTexts(rootFS fs.FS, lg *LicenseGraph, rs ResolutionSet) (*NoticeIndex, error) {
73	if rs == nil {
74		rs = ResolveNotices(lg)
75	}
76	ni := &NoticeIndex{
77		lg:             lg,
78		rs:             rs,
79		shipped:        ShippedNodes(lg),
80		rootFS:         rootFS,
81		hash:           make(map[string]hash),
82		text:           make(map[hash][]byte),
83		hashLibInstall: make(map[hash]map[string]map[string]struct{}),
84		installHashLib: make(map[string]map[hash]map[string]struct{}),
85		libHash:        make(map[string]map[hash]struct{}),
86		targetHashes:   make(map[*TargetNode]map[hash]struct{}),
87		projectName:    make(map[string]string),
88	}
89
90	// index adds all license texts for `tn` to the index.
91	index := func(tn *TargetNode) (map[hash]struct{}, error) {
92		if hashes, ok := ni.targetHashes[tn]; ok {
93			return hashes, nil
94		}
95		hashes := make(map[hash]struct{})
96		for _, text := range tn.LicenseTexts() {
97			fname := strings.SplitN(text, ":", 2)[0]
98			if _, ok := ni.hash[fname]; !ok {
99				err := ni.addText(fname)
100				if err != nil {
101					return nil, err
102				}
103			}
104			hash := ni.hash[fname]
105			if _, ok := hashes[hash]; !ok {
106				hashes[hash] = struct{}{}
107			}
108		}
109		ni.targetHashes[tn] = hashes
110		return hashes, nil
111	}
112
113	link := func(tn *TargetNode, hashes map[hash]struct{}, installPaths []string) {
114		for h := range hashes {
115			libName := ni.getLibName(tn, h)
116			if _, ok := ni.libHash[libName]; !ok {
117				ni.libHash[libName] = make(map[hash]struct{})
118			}
119			if _, ok := ni.hashLibInstall[h]; !ok {
120				ni.hashLibInstall[h] = make(map[string]map[string]struct{})
121			}
122			if _, ok := ni.libHash[libName][h]; !ok {
123				ni.libHash[libName][h] = struct{}{}
124			}
125			for _, installPath := range installPaths {
126				if _, ok := ni.installHashLib[installPath]; !ok {
127					ni.installHashLib[installPath] = make(map[hash]map[string]struct{})
128					ni.installHashLib[installPath][h] = make(map[string]struct{})
129					ni.installHashLib[installPath][h][libName] = struct{}{}
130				} else if _, ok = ni.installHashLib[installPath][h]; !ok {
131					ni.installHashLib[installPath][h] = make(map[string]struct{})
132					ni.installHashLib[installPath][h][libName] = struct{}{}
133				} else if _, ok = ni.installHashLib[installPath][h][libName]; !ok {
134					ni.installHashLib[installPath][h][libName] = struct{}{}
135				}
136				if _, ok := ni.hashLibInstall[h]; !ok {
137					ni.hashLibInstall[h] = make(map[string]map[string]struct{})
138					ni.hashLibInstall[h][libName] = make(map[string]struct{})
139					ni.hashLibInstall[h][libName][installPath] = struct{}{}
140				} else if _, ok = ni.hashLibInstall[h][libName]; !ok {
141					ni.hashLibInstall[h][libName] = make(map[string]struct{})
142					ni.hashLibInstall[h][libName][installPath] = struct{}{}
143				} else if _, ok = ni.hashLibInstall[h][libName][installPath]; !ok {
144					ni.hashLibInstall[h][libName][installPath] = struct{}{}
145				}
146			}
147		}
148	}
149
150	// returns error from walk below.
151	var err error
152
153	WalkTopDown(NoEdgeContext{}, lg, func(lg *LicenseGraph, tn *TargetNode, path TargetEdgePath) bool {
154		if err != nil {
155			return false
156		}
157		if !ni.shipped.Contains(tn) {
158			return false
159		}
160		installPaths := getInstallPaths(tn, path)
161		var hashes map[hash]struct{}
162		hashes, err = index(tn)
163		if err != nil {
164			return false
165		}
166		link(tn, hashes, installPaths)
167		if tn.IsContainer() {
168			return true
169		}
170
171		for _, r := range rs.Resolutions(tn) {
172			hashes, err = index(r.actsOn)
173			if err != nil {
174				return false
175			}
176			link(r.actsOn, hashes, installPaths)
177		}
178		return false
179	})
180
181	if err != nil {
182		return nil, err
183	}
184
185	return ni, nil
186}
187
188// Hashes returns an ordered channel of the hashed license texts.
189func (ni *NoticeIndex) Hashes() chan hash {
190	c := make(chan hash)
191	go func() {
192		libs := make([]string, 0, len(ni.libHash))
193		for libName := range ni.libHash {
194			libs = append(libs, libName)
195		}
196		sort.Strings(libs)
197		hashes := make(map[hash]struct{})
198		for _, libName := range libs {
199			hl := make([]hash, 0, len(ni.libHash[libName]))
200			for h := range ni.libHash[libName] {
201				if _, ok := hashes[h]; ok {
202					continue
203				}
204				hashes[h] = struct{}{}
205				hl = append(hl, h)
206			}
207			if len(hl) > 0 {
208				sort.Sort(hashList{ni, libName, "", &hl})
209				for _, h := range hl {
210					c <- h
211				}
212			}
213		}
214		close(c)
215	}()
216	return c
217}
218
219// InputNoticeFiles returns the list of files that were hashed during IndexLicenseTexts.
220func (ni *NoticeIndex) InputNoticeFiles() []string {
221	files := append([]string(nil), ni.files...)
222	sort.Strings(files)
223	return files
224}
225
226// HashLibs returns the ordered array of library names using the license text
227// hashed as `h`.
228func (ni *NoticeIndex) HashLibs(h hash) []string {
229	libs := make([]string, 0, len(ni.hashLibInstall[h]))
230	for libName := range ni.hashLibInstall[h] {
231		libs = append(libs, libName)
232	}
233	sort.Strings(libs)
234	return libs
235}
236
237// HashLibInstalls returns the ordered array of install paths referencing
238// library `libName` using the license text hashed as `h`.
239func (ni *NoticeIndex) HashLibInstalls(h hash, libName string) []string {
240	installs := make([]string, 0, len(ni.hashLibInstall[h][libName]))
241	for installPath := range ni.hashLibInstall[h][libName] {
242		installs = append(installs, installPath)
243	}
244	sort.Strings(installs)
245	return installs
246}
247
248// InstallPaths returns the ordered channel of indexed install paths.
249func (ni *NoticeIndex) InstallPaths() chan string {
250	c := make(chan string)
251	go func() {
252		paths := make([]string, 0, len(ni.installHashLib))
253		for path := range ni.installHashLib {
254			paths = append(paths, path)
255		}
256		sort.Strings(paths)
257		for _, installPath := range paths {
258			c <- installPath
259		}
260		close(c)
261	}()
262	return c
263}
264
265// InstallHashes returns the ordered array of hashes attached to `installPath`.
266func (ni *NoticeIndex) InstallHashes(installPath string) []hash {
267	result := make([]hash, 0, len(ni.installHashLib[installPath]))
268	for h := range ni.installHashLib[installPath] {
269		result = append(result, h)
270	}
271	if len(result) > 0 {
272		sort.Sort(hashList{ni, "", installPath, &result})
273	}
274	return result
275}
276
277// InstallHashLibs returns the ordered array of library names attached to
278// `installPath` as hash `h`.
279func (ni *NoticeIndex) InstallHashLibs(installPath string, h hash) []string {
280	result := make([]string, 0, len(ni.installHashLib[installPath][h]))
281	for libName := range ni.installHashLib[installPath][h] {
282		result = append(result, libName)
283	}
284	sort.Strings(result)
285	return result
286}
287
288// Libraries returns the ordered channel of indexed library names.
289func (ni *NoticeIndex) Libraries() chan string {
290	c := make(chan string)
291	go func() {
292		libs := make([]string, 0, len(ni.libHash))
293		for lib := range ni.libHash {
294			libs = append(libs, lib)
295		}
296		sort.Strings(libs)
297		for _, lib := range libs {
298			c <- lib
299		}
300		close(c)
301	}()
302	return c
303}
304
305// HashText returns the file content of the license text hashed as `h`.
306func (ni *NoticeIndex) HashText(h hash) []byte {
307	return ni.text[h]
308}
309
310// getLibName returns the name of the library associated with `noticeFor`.
311func (ni *NoticeIndex) getLibName(noticeFor *TargetNode, h hash) string {
312	for _, text := range noticeFor.LicenseTexts() {
313		if !strings.Contains(text, ":") {
314			if ni.hash[text].key != h.key {
315				continue
316			}
317			ln := ni.checkMetadataForLicenseText(noticeFor, text)
318			if len(ln) > 0 {
319				return ln
320			}
321			continue
322		}
323
324		fields := strings.SplitN(text, ":", 2)
325		fname, pname := fields[0], fields[1]
326		if ni.hash[fname].key != h.key {
327			continue
328		}
329
330		ln, err := url.QueryUnescape(pname)
331		if err != nil {
332			continue
333		}
334		return ln
335	}
336	// use name from METADATA if available
337	ln := ni.checkMetadata(noticeFor)
338	if len(ln) > 0 {
339		return ln
340	}
341	// use package_name: from license{} module if available
342	pn := noticeFor.PackageName()
343	if len(pn) > 0 {
344		return pn
345	}
346	for _, p := range noticeFor.Projects() {
347		if strings.HasPrefix(p, "prebuilts/") {
348			for _, licenseText := range noticeFor.LicenseTexts() {
349				if !strings.HasPrefix(licenseText, "prebuilts/") {
350					continue
351				}
352				if !strings.Contains(licenseText, ":") {
353					if ni.hash[licenseText].key != h.key {
354						continue
355					}
356				} else {
357					fields := strings.SplitN(licenseText, ":", 2)
358					fname := fields[0]
359					if ni.hash[fname].key != h.key {
360						continue
361					}
362				}
363				for r, prefix := range SafePrebuiltPrefixes {
364					match := r.FindString(licenseText)
365					if len(match) == 0 {
366						continue
367					}
368					strip := SafePathPrefixes[prefix]
369					if strip {
370						// strip entire prefix
371						match = licenseText[len(match):]
372					} else {
373						// strip from prebuilts/ until safe prefix
374						match = licenseText[len(match)-len(prefix):]
375					}
376					// remove LICENSE or NOTICE or other filename
377					li := strings.LastIndex(match, "/")
378					if li > 0 {
379						match = match[:li]
380					}
381					// remove *licenses/ path segment and subdirectory if in path
382					if offsets := licensesPathRegexp.FindAllStringIndex(match, -1); offsets != nil && offsets[len(offsets)-1][0] > 0 {
383						match = match[:offsets[len(offsets)-1][0]]
384						li = strings.LastIndex(match, "/")
385						if li > 0 {
386							match = match[:li]
387						}
388					}
389					return match
390				}
391				break
392			}
393		}
394		for prefix, strip := range SafePathPrefixes {
395			if strings.HasPrefix(p, prefix) {
396				if strip {
397					return p[len(prefix):]
398				} else {
399					return p
400				}
401			}
402		}
403	}
404	// strip off [./]meta_lic from license metadata path and extract base name
405	n := noticeFor.name[:len(noticeFor.name)-9]
406	li := strings.LastIndex(n, "/")
407	if li > 0 {
408		n = n[li+1:]
409	}
410	fi := strings.Index(n, "@")
411	if fi > 0 {
412		n = n[:fi]
413	}
414	return n
415}
416
417// checkMetadata tries to look up a library name from a METADATA file associated with `noticeFor`.
418func (ni *NoticeIndex) checkMetadata(noticeFor *TargetNode) string {
419	for _, p := range noticeFor.Projects() {
420		if name, ok := ni.projectName[p]; ok {
421			if name == noProjectName {
422				continue
423			}
424			return name
425		}
426		name, err := ni.checkMetadataFile(filepath.Join(p, "METADATA"))
427		if err != nil {
428			ni.projectName[p] = noProjectName
429			continue
430		}
431		if len(name) == 0 {
432			ni.projectName[p] = noProjectName
433			continue
434		}
435		ni.projectName[p] = name
436		return name
437	}
438	return ""
439}
440
441// checkMetadataForLicenseText
442func (ni *NoticeIndex) checkMetadataForLicenseText(noticeFor *TargetNode, licenseText string) string {
443	p := ""
444	for _, proj := range noticeFor.Projects() {
445		if strings.HasPrefix(licenseText, proj) {
446			p = proj
447		}
448	}
449	if len(p) == 0 {
450		p = filepath.Dir(licenseText)
451		for {
452			fi, err := fs.Stat(ni.rootFS, filepath.Join(p, ".git"))
453			if err == nil && fi.IsDir() {
454				break
455			}
456			if strings.Contains(p, "/") && p != "/" {
457				p = filepath.Dir(p)
458				continue
459			}
460			return ""
461		}
462	}
463	if name, ok := ni.projectName[p]; ok {
464		if name == noProjectName {
465			return ""
466		}
467		return name
468	}
469	name, err := ni.checkMetadataFile(filepath.Join(p, "METADATA"))
470	if err == nil && len(name) > 0 {
471		ni.projectName[p] = name
472		return name
473	}
474	ni.projectName[p] = noProjectName
475	return ""
476}
477
478// checkMetadataFile tries to look up a library name from a METADATA file at `path`.
479func (ni *NoticeIndex) checkMetadataFile(path string) (string, error) {
480	f, err := ni.rootFS.Open(path)
481	if err != nil {
482		return "", err
483	}
484	name := ""
485	description := ""
486	version := ""
487	s := bufio.NewScanner(f)
488	for s.Scan() {
489		line := s.Text()
490		m := nameRegexp.FindStringSubmatch(line)
491		if m != nil {
492			if 1 < len(m) && m[1] != "" {
493				name = m[1]
494			}
495			if version != "" {
496				break
497			}
498			continue
499		}
500		m = versionRegexp.FindStringSubmatch(line)
501		if m != nil {
502			if 1 < len(m) && m[1] != "" {
503				version = m[1]
504			}
505			if name != "" {
506				break
507			}
508			continue
509		}
510		m = descRegexp.FindStringSubmatch(line)
511		if m != nil {
512			if 1 < len(m) && m[1] != "" {
513				description = m[1]
514			}
515		}
516	}
517	_ = s.Err()
518	_ = f.Close()
519	if name != "" {
520		if version != "" {
521			if version[0] == 'v' || version[0] == 'V' {
522				return name + "_" + version, nil
523			} else {
524				return name + "_v_" + version, nil
525			}
526		}
527		return name, nil
528	}
529	if description != "" {
530		return description, nil
531	}
532	return "", nil
533}
534
535// addText reads and indexes the content of a license text file.
536func (ni *NoticeIndex) addText(file string) error {
537	f, err := ni.rootFS.Open(filepath.Clean(file))
538	if err != nil {
539		return fmt.Errorf("error opening license text file %q: %w", file, err)
540	}
541
542	// read the file
543	text, err := io.ReadAll(f)
544	if err != nil {
545		return fmt.Errorf("error reading license text file %q: %w", file, err)
546	}
547
548	hash := hash{fmt.Sprintf("%x", md5.Sum(text))}
549	ni.hash[file] = hash
550	if _, alreadyPresent := ni.text[hash]; !alreadyPresent {
551		ni.text[hash] = text
552	}
553
554	ni.files = append(ni.files, file)
555
556	return nil
557}
558
559// getInstallPaths returns the names of the used dependencies mapped to their
560// installed locations.
561func getInstallPaths(attachesTo *TargetNode, path TargetEdgePath) []string {
562	if len(path) == 0 {
563		installs := attachesTo.Installed()
564		if 0 == len(installs) {
565			installs = attachesTo.Built()
566		}
567		return installs
568	}
569
570	var getInstalls func(path TargetEdgePath) []string
571
572	getInstalls = func(path TargetEdgePath) []string {
573		// deps contains the output targets from the dependencies in the path
574		var deps []string
575		if len(path) > 1 {
576			// recursively get the targets from the sub-path skipping 1 path segment
577			deps = getInstalls(path[1:])
578		} else {
579			// stop recursion at 1 path segment
580			deps = path[0].Dependency().TargetFiles()
581		}
582		size := 0
583		prefixes := path[0].Target().TargetFiles()
584		installMap := path[0].Target().InstallMap()
585		sources := path[0].Target().Sources()
586		for _, dep := range deps {
587			found := false
588			for _, source := range sources {
589				if strings.HasPrefix(dep, source) {
590					found = true
591					break
592				}
593			}
594			if !found {
595				continue
596			}
597			for _, im := range installMap {
598				if strings.HasPrefix(dep, im.FromPath) {
599					size += len(prefixes)
600					break
601				}
602			}
603		}
604
605		installs := make([]string, 0, size)
606		for _, dep := range deps {
607			found := false
608			for _, source := range sources {
609				if strings.HasPrefix(dep, source) {
610					found = true
611					break
612				}
613			}
614			if !found {
615				continue
616			}
617			for _, im := range installMap {
618				if strings.HasPrefix(dep, im.FromPath) {
619					for _, prefix := range prefixes {
620						installs = append(installs, prefix+im.ContainerPath+dep[len(im.FromPath):])
621					}
622					break
623				}
624			}
625		}
626		return installs
627	}
628	allInstalls := getInstalls(path)
629	installs := path[0].Target().Installed()
630	if len(installs) == 0 {
631		return allInstalls
632	}
633	result := make([]string, 0, len(allInstalls))
634	for _, install := range allInstalls {
635		for _, prefix := range installs {
636			if strings.HasPrefix(install, prefix) {
637				result = append(result, install)
638			}
639		}
640	}
641	return result
642}
643
644// hash is an opaque string derived from md5sum.
645type hash struct {
646	key string
647}
648
649// String returns the hexadecimal representation of the hash.
650func (h hash) String() string {
651	return h.key
652}
653
654// hashList orders an array of hashes
655type hashList struct {
656	ni          *NoticeIndex
657	libName     string
658	installPath string
659	hashes      *[]hash
660}
661
662// Len returns the count of elements in the slice.
663func (l hashList) Len() int { return len(*l.hashes) }
664
665// Swap rearranges 2 elements of the slice so that each occupies the other's
666// former position.
667func (l hashList) Swap(i, j int) { (*l.hashes)[i], (*l.hashes)[j] = (*l.hashes)[j], (*l.hashes)[i] }
668
669// Less returns true when the `i`th element is lexicographically less than
670// the `j`th element.
671func (l hashList) Less(i, j int) bool {
672	var insti, instj int
673	if len(l.libName) > 0 {
674		insti = len(l.ni.hashLibInstall[(*l.hashes)[i]][l.libName])
675		instj = len(l.ni.hashLibInstall[(*l.hashes)[j]][l.libName])
676	} else {
677		libsi := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[i])
678		libsj := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[j])
679		libsis := strings.Join(libsi, " ")
680		libsjs := strings.Join(libsj, " ")
681		if libsis != libsjs {
682			return libsis < libsjs
683		}
684	}
685	if insti == instj {
686		leni := len(l.ni.text[(*l.hashes)[i]])
687		lenj := len(l.ni.text[(*l.hashes)[j]])
688		if leni == lenj {
689			// all else equal, just order by hash value
690			return (*l.hashes)[i].key < (*l.hashes)[j].key
691		}
692		// put shortest texts first within same # of installs
693		return leni < lenj
694	}
695	// reverse order of # installs so that most popular appears first
696	return instj < insti
697}
698