// Copyright 2024 The BoringSSL Authors // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted, provided that the above // copyright notice and this permission notice appear in all copies. // // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. package main import ( "bytes" "cmp" "crypto/sha256" "fmt" "os/exec" "slices" "strings" "sync" ) type treeEntryMode int const ( treeEntryRegular treeEntryMode = iota treeEntryExecutable treeEntrySymlink ) func (m treeEntryMode) String() string { switch m { case treeEntryRegular: return "regular file" case treeEntryExecutable: return "executable file" case treeEntrySymlink: return "symbolic link" } panic(fmt.Sprintf("unknown mode %d", m)) } type treeEntry struct { path string mode treeEntryMode sha256 []byte } func sortTree(tree []treeEntry) { slices.SortFunc(tree, func(a, b treeEntry) int { return cmp.Compare(a.path, b.path) }) } func compareTrees(got, want []treeEntry) error { // Check for duplicate files. for i := 0; i < len(got)-1; i++ { if got[i].path == got[i+1].path { return fmt.Errorf("duplicate file %q in archive", got[i].path) } } // Check for differences between the two trees. for i := 0; i < len(got) && i < len(want); i++ { if got[i].path == want[i].path { if got[i].mode != want[i].mode { return fmt.Errorf("file %q was a %s but should have been a %s", got[i].path, got[i].mode, want[i].mode) } if !bytes.Equal(got[i].sha256, want[i].sha256) { return fmt.Errorf("hash of %q was %x but should have been %x", got[i].path, got[i].sha256, want[i].sha256) } } else if got[i].path < want[i].path { return fmt.Errorf("unexpected file %q", got[i].path) } else { return fmt.Errorf("missing file %q", want[i].path) } } if len(want) < len(got) { return fmt.Errorf("unexpected file %q", got[len(want)].path) } if len(got) < len(want) { return fmt.Errorf("missing file %q", want[len(got)].path) } return nil } type gitTreeEntry struct { path string mode treeEntryMode objectName string } func gitListTree(treeish string) ([]gitTreeEntry, error) { var stdout, stderr bytes.Buffer cmd := exec.Command("git", "ls-tree", "-r", "-z", treeish) cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { return nil, fmt.Errorf("error listing git tree %q: %w\n%s\n", treeish, err, stderr.String()) } lines := strings.Split(stdout.String(), "\x00") ret := make([]gitTreeEntry, 0, len(lines)) for _, line := range lines { if len(line) == 0 { continue } idx := strings.IndexByte(line, '\t') if idx < 0 { return nil, fmt.Errorf("could not parse ls-tree output %q", line) } info, path := line[:idx], line[idx+1:] infos := strings.Split(info, " ") if len(infos) != 3 { return nil, fmt.Errorf("could not parse ls-tree output %q", line) } perms, objectType, objectName := infos[0], infos[1], infos[2] if objectType != "blob" { return nil, fmt.Errorf("unexpected object type in ls-tree output %q", line) } var mode treeEntryMode switch perms { case "100644": mode = treeEntryRegular case "100755": mode = treeEntryExecutable case "120000": mode = treeEntrySymlink default: return nil, fmt.Errorf("unexpected file mode in ls-tree output %q", line) } ret = append(ret, gitTreeEntry{path: path, mode: mode, objectName: objectName}) } return ret, nil } func gitHashBlob(objectName string) ([]byte, error) { h := sha256.New() var stderr bytes.Buffer cmd := exec.Command("git", "cat-file", "blob", objectName) cmd.Stdout = h cmd.Stderr = &stderr if err := cmd.Run(); err != nil { return nil, fmt.Errorf("error hashing git object %q: %w\n%s\n", objectName, err, stderr.String()) } return h.Sum(nil), nil } func gitHashTree(s *stepPrinter, treeish string) ([]treeEntry, error) { gitTree, err := gitListTree(treeish) if err != nil { return nil, err } s.setTotal(len(gitTree)) // Hashing objects one by one is slow, so parallelize. Ideally we could // just use the object name, but git uses SHA-1, so checking a SHA-265 // hash seems prudent. var workerErr error var workerLock sync.Mutex var wg sync.WaitGroup jobs := make(chan gitTreeEntry, *numWorkers) results := make(chan treeEntry, *numWorkers) for i := 0; i < *numWorkers; i++ { wg.Add(1) go func() { defer wg.Done() for job := range jobs { workerLock.Lock() shouldStop := workerErr != nil workerLock.Unlock() if shouldStop { break } sha256, err := gitHashBlob(job.objectName) if err != nil { workerLock.Lock() if workerErr == nil { workerErr = err } workerLock.Unlock() break } results <- treeEntry{path: job.path, mode: job.mode, sha256: sha256} } }() } go func() { for _, job := range gitTree { jobs <- job } close(jobs) wg.Wait() close(results) }() tree := make([]treeEntry, 0, len(gitTree)) for result := range results { s.addProgress(1) tree = append(tree, result) } if workerErr != nil { return nil, workerErr } if len(tree) != len(gitTree) { panic("input and output sizes did not match") } sortTree(tree) return tree, nil }