1// Copyright 2021 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package compliance 16 17import ( 18 "fmt" 19 "io" 20 "io/fs" 21 "os" 22 "strings" 23 "sync" 24 25 "android/soong/compliance/license_metadata_proto" 26 27 "google.golang.org/protobuf/encoding/prototext" 28) 29 30var ( 31 // ConcurrentReaders is the size of the task pool for limiting resource usage e.g. open files. 32 ConcurrentReaders = 5 33) 34 35type globalFS struct{} 36 37func (s globalFS) Open(name string) (fs.File, error) { 38 return os.Open(name) 39} 40 41var FS globalFS 42 43// GetFS returns a filesystem for accessing files under the OUT_DIR environment variable. 44func GetFS(outDir string) fs.FS { 45 if len(outDir) > 0 { 46 return os.DirFS(outDir) 47 } 48 return os.DirFS(".") 49} 50 51// result describes the outcome of reading and parsing a single license metadata file. 52type result struct { 53 // file identifies the path to the license metadata file 54 file string 55 56 // target contains the parsed metadata or nil if an error 57 target *TargetNode 58 59 // err is nil unless an error occurs 60 err error 61} 62 63// receiver coordinates the tasks for reading and parsing license metadata files. 64type receiver struct { 65 // lg accumulates the read metadata and becomes the final resulting LicenseGraph. 66 lg *LicenseGraph 67 68 // rootFS locates the root of the file system from which to read the files. 69 rootFS fs.FS 70 71 // stderr identifies the error output writer. 72 stderr io.Writer 73 74 // task provides a fixed-size task pool to limit concurrent open files etc. 75 task chan bool 76 77 // results returns one license metadata file result at a time. 78 results chan *result 79 80 // wg detects when done 81 wg sync.WaitGroup 82} 83 84// ReadLicenseGraph reads and parses `files` and their dependencies into a LicenseGraph. 85// 86// `files` become the root files of the graph for top-down walks of the graph. 87func ReadLicenseGraph(rootFS fs.FS, stderr io.Writer, files []string) (*LicenseGraph, error) { 88 if len(files) == 0 { 89 return nil, fmt.Errorf("no license metadata to analyze") 90 } 91 if ConcurrentReaders < 1 { 92 return nil, fmt.Errorf("need at least one task in pool") 93 } 94 95 lg := newLicenseGraph() 96 for _, f := range files { 97 if strings.HasSuffix(f, "meta_lic") { 98 lg.rootFiles = append(lg.rootFiles, f) 99 } else { 100 lg.rootFiles = append(lg.rootFiles, f+".meta_lic") 101 } 102 } 103 104 recv := &receiver{ 105 lg: lg, 106 rootFS: rootFS, 107 stderr: stderr, 108 task: make(chan bool, ConcurrentReaders), 109 results: make(chan *result, ConcurrentReaders), 110 wg: sync.WaitGroup{}, 111 } 112 for i := 0; i < ConcurrentReaders; i++ { 113 recv.task <- true 114 } 115 116 readFiles := func() { 117 lg.mu.Lock() 118 // identify the metadata files to schedule reading tasks for 119 for _, f := range lg.rootFiles { 120 lg.targets[f] = nil 121 } 122 lg.mu.Unlock() 123 124 // schedule tasks to read the files 125 for _, f := range lg.rootFiles { 126 readFile(recv, f) 127 } 128 129 // schedule a task to wait until finished and close the channel. 130 go func() { 131 recv.wg.Wait() 132 close(recv.task) 133 close(recv.results) 134 }() 135 } 136 go readFiles() 137 138 // tasks to read license metadata files are scheduled; read and process results from channel 139 var err error 140 for recv.results != nil { 141 select { 142 case r, ok := <-recv.results: 143 if ok { 144 // handle errors by nil'ing ls, setting err, and clobbering results channel 145 if r.err != nil { 146 err = r.err 147 fmt.Fprintf(recv.stderr, "%s\n", err.Error()) 148 lg = nil 149 recv.results = nil 150 continue 151 } 152 153 // record the parsed metadata (guarded by mutex) 154 recv.lg.mu.Lock() 155 lg.targets[r.target.name] = r.target 156 recv.lg.mu.Unlock() 157 } else { 158 // finished -- nil the results channel 159 recv.results = nil 160 } 161 } 162 } 163 164 if lg != nil { 165 esize := 0 166 for _, tn := range lg.targets { 167 esize += len(tn.proto.Deps) 168 } 169 lg.edges = make(TargetEdgeList, 0, esize) 170 for _, tn := range lg.targets { 171 tn.licenseConditions = LicenseConditionSetFromNames(tn, tn.proto.LicenseConditions...) 172 err = addDependencies(lg, tn) 173 if err != nil { 174 return nil, fmt.Errorf("error indexing dependencies for %q: %w", tn.name, err) 175 } 176 tn.proto.Deps = []*license_metadata_proto.AnnotatedDependency{} 177 } 178 } 179 return lg, err 180 181} 182 183// targetNode contains the license metadata for a node in the license graph. 184type targetNode struct { 185 proto license_metadata_proto.LicenseMetadata 186 187 // name is the path to the metadata file. 188 name string 189 190 // lg is the license graph the node belongs to. 191 lg *LicenseGraph 192 193 // edges identifies the dependencies of the target. 194 edges TargetEdgeList 195 196 // licenseConditions identifies the set of license conditions originating at the target node. 197 licenseConditions LicenseConditionSet 198 199 // resolution identifies the set of conditions resolved by acting on the target node. 200 resolution LicenseConditionSet 201} 202 203// addDependencies converts the proto AnnotatedDependencies into `edges` 204func addDependencies(lg *LicenseGraph, tn *TargetNode) error { 205 tn.edges = make(TargetEdgeList, 0, len(tn.proto.Deps)) 206 for _, ad := range tn.proto.Deps { 207 dependency := ad.GetFile() 208 if len(dependency) == 0 { 209 return fmt.Errorf("missing dependency name") 210 } 211 dtn, ok := lg.targets[dependency] 212 if !ok { 213 return fmt.Errorf("unknown dependency name %q", dependency) 214 } 215 if dtn == nil { 216 return fmt.Errorf("nil dependency for name %q", dependency) 217 } 218 annotations := newEdgeAnnotations() 219 for _, a := range ad.Annotations { 220 // look up a common constant annotation string from a small map 221 // instead of creating 1000's of copies of the same 3 strings. 222 if ann, ok := RecognizedAnnotations[a]; ok { 223 annotations.annotations[ann] = struct{}{} 224 } 225 } 226 edge := &TargetEdge{tn, dtn, annotations} 227 lg.edges = append(lg.edges, edge) 228 tn.edges = append(tn.edges, edge) 229 } 230 return nil 231} 232 233// readFile is a task to read and parse a single license metadata file, and to schedule 234// additional tasks for reading and parsing dependencies as necessary. 235func readFile(recv *receiver, file string) { 236 recv.wg.Add(1) 237 <-recv.task 238 go func() { 239 f, err := recv.rootFS.Open(file) 240 if err != nil { 241 recv.results <- &result{file, nil, fmt.Errorf("error opening license metadata %q: %w", file, err)} 242 return 243 } 244 245 // read the file 246 data, err := io.ReadAll(f) 247 if err != nil { 248 recv.results <- &result{file, nil, fmt.Errorf("error reading license metadata %q: %w", file, err)} 249 return 250 } 251 f.Close() 252 253 tn := &TargetNode{lg: recv.lg, name: file} 254 255 err = prototext.Unmarshal(data, &tn.proto) 256 if err != nil { 257 recv.results <- &result{file, nil, fmt.Errorf("error license metadata %q: %w", file, err)} 258 return 259 } 260 261 // send result for this file and release task before scheduling dependencies, 262 // but do not signal done to WaitGroup until dependencies are scheduled. 263 recv.results <- &result{file, tn, nil} 264 recv.task <- true 265 266 // schedule tasks as necessary to read dependencies 267 for _, ad := range tn.proto.Deps { 268 dependency := ad.GetFile() 269 // decide, signal and record whether to schedule task in critical section 270 recv.lg.mu.Lock() 271 _, alreadyScheduled := recv.lg.targets[dependency] 272 if !alreadyScheduled { 273 recv.lg.targets[dependency] = nil 274 } 275 recv.lg.mu.Unlock() 276 // schedule task to read dependency file outside critical section 277 if !alreadyScheduled { 278 readFile(recv, dependency) 279 } 280 } 281 282 // signal task done after scheduling dependencies 283 recv.wg.Done() 284 }() 285} 286