• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package compliance
16
17import (
18	"fmt"
19	"io"
20	"io/fs"
21	"os"
22	"strings"
23	"sync"
24
25	"android/soong/compliance/license_metadata_proto"
26
27	"google.golang.org/protobuf/encoding/prototext"
28)
29
30var (
31	// ConcurrentReaders is the size of the task pool for limiting resource usage e.g. open files.
32	ConcurrentReaders = 5
33)
34
35type globalFS struct{}
36
37func (s globalFS) Open(name string) (fs.File, error) {
38	return os.Open(name)
39}
40
41var FS globalFS
42
43// GetFS returns a filesystem for accessing files under the OUT_DIR environment variable.
44func GetFS(outDir string) fs.FS {
45	if len(outDir) > 0 {
46		return os.DirFS(outDir)
47	}
48	return os.DirFS(".")
49}
50
51// result describes the outcome of reading and parsing a single license metadata file.
52type result struct {
53	// file identifies the path to the license metadata file
54	file string
55
56	// target contains the parsed metadata or nil if an error
57	target *TargetNode
58
59	// err is nil unless an error occurs
60	err error
61}
62
63// receiver coordinates the tasks for reading and parsing license metadata files.
64type receiver struct {
65	// lg accumulates the read metadata and becomes the final resulting LicenseGraph.
66	lg *LicenseGraph
67
68	// rootFS locates the root of the file system from which to read the files.
69	rootFS fs.FS
70
71	// stderr identifies the error output writer.
72	stderr io.Writer
73
74	// task provides a fixed-size task pool to limit concurrent open files etc.
75	task chan bool
76
77	// results returns one license metadata file result at a time.
78	results chan *result
79
80	// wg detects when done
81	wg sync.WaitGroup
82}
83
84// ReadLicenseGraph reads and parses `files` and their dependencies into a LicenseGraph.
85//
86// `files` become the root files of the graph for top-down walks of the graph.
87func ReadLicenseGraph(rootFS fs.FS, stderr io.Writer, files []string) (*LicenseGraph, error) {
88	if len(files) == 0 {
89		return nil, fmt.Errorf("no license metadata to analyze")
90	}
91	if ConcurrentReaders < 1 {
92		return nil, fmt.Errorf("need at least one task in pool")
93	}
94
95	lg := newLicenseGraph()
96	for _, f := range files {
97		if strings.HasSuffix(f, "meta_lic") {
98			lg.rootFiles = append(lg.rootFiles, f)
99		} else {
100			lg.rootFiles = append(lg.rootFiles, f+".meta_lic")
101		}
102	}
103
104	recv := &receiver{
105		lg:      lg,
106		rootFS:  rootFS,
107		stderr:  stderr,
108		task:    make(chan bool, ConcurrentReaders),
109		results: make(chan *result, ConcurrentReaders),
110		wg:      sync.WaitGroup{},
111	}
112	for i := 0; i < ConcurrentReaders; i++ {
113		recv.task <- true
114	}
115
116	readFiles := func() {
117		lg.mu.Lock()
118		// identify the metadata files to schedule reading tasks for
119		for _, f := range lg.rootFiles {
120			lg.targets[f] = nil
121		}
122		lg.mu.Unlock()
123
124		// schedule tasks to read the files
125		for _, f := range lg.rootFiles {
126			readFile(recv, f)
127		}
128
129		// schedule a task to wait until finished and close the channel.
130		go func() {
131			recv.wg.Wait()
132			close(recv.task)
133			close(recv.results)
134		}()
135	}
136	go readFiles()
137
138	// tasks to read license metadata files are scheduled; read and process results from channel
139	var err error
140	for recv.results != nil {
141		select {
142		case r, ok := <-recv.results:
143			if ok {
144				// handle errors by nil'ing ls, setting err, and clobbering results channel
145				if r.err != nil {
146					err = r.err
147					fmt.Fprintf(recv.stderr, "%s\n", err.Error())
148					lg = nil
149					recv.results = nil
150					continue
151				}
152
153				// record the parsed metadata (guarded by mutex)
154				recv.lg.mu.Lock()
155				lg.targets[r.target.name] = r.target
156				recv.lg.mu.Unlock()
157			} else {
158				// finished -- nil the results channel
159				recv.results = nil
160			}
161		}
162	}
163
164	if lg != nil {
165		esize := 0
166		for _, tn := range lg.targets {
167			esize += len(tn.proto.Deps)
168		}
169		lg.edges = make(TargetEdgeList, 0, esize)
170		for _, tn := range lg.targets {
171			tn.licenseConditions = LicenseConditionSetFromNames(tn, tn.proto.LicenseConditions...)
172			err = addDependencies(lg, tn)
173			if err != nil {
174				return nil, fmt.Errorf("error indexing dependencies for %q: %w", tn.name, err)
175			}
176			tn.proto.Deps = []*license_metadata_proto.AnnotatedDependency{}
177		}
178	}
179	return lg, err
180
181}
182
183// targetNode contains the license metadata for a node in the license graph.
184type targetNode struct {
185	proto license_metadata_proto.LicenseMetadata
186
187	// name is the path to the metadata file.
188	name string
189
190	// lg is the license graph the node belongs to.
191	lg *LicenseGraph
192
193	// edges identifies the dependencies of the target.
194	edges TargetEdgeList
195
196	// licenseConditions identifies the set of license conditions originating at the target node.
197	licenseConditions LicenseConditionSet
198
199	// resolution identifies the set of conditions resolved by acting on the target node.
200	resolution LicenseConditionSet
201}
202
203// addDependencies converts the proto AnnotatedDependencies into `edges`
204func addDependencies(lg *LicenseGraph, tn *TargetNode) error {
205	tn.edges = make(TargetEdgeList, 0, len(tn.proto.Deps))
206	for _, ad := range tn.proto.Deps {
207		dependency := ad.GetFile()
208		if len(dependency) == 0 {
209			return fmt.Errorf("missing dependency name")
210		}
211		dtn, ok := lg.targets[dependency]
212		if !ok {
213			return fmt.Errorf("unknown dependency name %q", dependency)
214		}
215		if dtn == nil {
216			return fmt.Errorf("nil dependency for name %q", dependency)
217		}
218		annotations := newEdgeAnnotations()
219		for _, a := range ad.Annotations {
220			// look up a common constant annotation string from a small map
221			// instead of creating 1000's of copies of the same 3 strings.
222			if ann, ok := RecognizedAnnotations[a]; ok {
223				annotations.annotations[ann] = struct{}{}
224			}
225		}
226		edge := &TargetEdge{tn, dtn, annotations}
227		lg.edges = append(lg.edges, edge)
228		tn.edges = append(tn.edges, edge)
229	}
230	return nil
231}
232
233// readFile is a task to read and parse a single license metadata file, and to schedule
234// additional tasks for reading and parsing dependencies as necessary.
235func readFile(recv *receiver, file string) {
236	recv.wg.Add(1)
237	<-recv.task
238	go func() {
239		f, err := recv.rootFS.Open(file)
240		if err != nil {
241			recv.results <- &result{file, nil, fmt.Errorf("error opening license metadata %q: %w", file, err)}
242			return
243		}
244
245		// read the file
246		data, err := io.ReadAll(f)
247		if err != nil {
248			recv.results <- &result{file, nil, fmt.Errorf("error reading license metadata %q: %w", file, err)}
249			return
250		}
251		f.Close()
252
253		tn := &TargetNode{lg: recv.lg, name: file}
254
255		err = prototext.Unmarshal(data, &tn.proto)
256		if err != nil {
257			recv.results <- &result{file, nil, fmt.Errorf("error license metadata %q: %w", file, err)}
258			return
259		}
260
261		// send result for this file and release task before scheduling dependencies,
262		// but do not signal done to WaitGroup until dependencies are scheduled.
263		recv.results <- &result{file, tn, nil}
264		recv.task <- true
265
266		// schedule tasks as necessary to read dependencies
267		for _, ad := range tn.proto.Deps {
268			dependency := ad.GetFile()
269			// decide, signal and record whether to schedule task in critical section
270			recv.lg.mu.Lock()
271			_, alreadyScheduled := recv.lg.targets[dependency]
272			if !alreadyScheduled {
273				recv.lg.targets[dependency] = nil
274			}
275			recv.lg.mu.Unlock()
276			// schedule task to read dependency file outside critical section
277			if !alreadyScheduled {
278				readFile(recv, dependency)
279			}
280		}
281
282		// signal task done after scheduling dependencies
283		recv.wg.Done()
284	}()
285}
286