1// Copyright 2024 The BoringSSL Authors 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15package main 16 17import ( 18 "bytes" 19 "cmp" 20 "crypto/sha256" 21 "fmt" 22 "os/exec" 23 "slices" 24 "strings" 25 "sync" 26) 27 28type treeEntryMode int 29 30const ( 31 treeEntryRegular treeEntryMode = iota 32 treeEntryExecutable 33 treeEntrySymlink 34) 35 36func (m treeEntryMode) String() string { 37 switch m { 38 case treeEntryRegular: 39 return "regular file" 40 case treeEntryExecutable: 41 return "executable file" 42 case treeEntrySymlink: 43 return "symbolic link" 44 } 45 panic(fmt.Sprintf("unknown mode %d", m)) 46} 47 48type treeEntry struct { 49 path string 50 mode treeEntryMode 51 sha256 []byte 52} 53 54func sortTree(tree []treeEntry) { 55 slices.SortFunc(tree, func(a, b treeEntry) int { return cmp.Compare(a.path, b.path) }) 56} 57 58func compareTrees(got, want []treeEntry) error { 59 // Check for duplicate files. 60 for i := 0; i < len(got)-1; i++ { 61 if got[i].path == got[i+1].path { 62 return fmt.Errorf("duplicate file %q in archive", got[i].path) 63 } 64 } 65 66 // Check for differences between the two trees. 67 for i := 0; i < len(got) && i < len(want); i++ { 68 if got[i].path == want[i].path { 69 if got[i].mode != want[i].mode { 70 return fmt.Errorf("file %q was a %s but should have been a %s", got[i].path, got[i].mode, want[i].mode) 71 } 72 if !bytes.Equal(got[i].sha256, want[i].sha256) { 73 return fmt.Errorf("hash of %q was %x but should have been %x", got[i].path, got[i].sha256, want[i].sha256) 74 } 75 } else if got[i].path < want[i].path { 76 return fmt.Errorf("unexpected file %q", got[i].path) 77 } else { 78 return fmt.Errorf("missing file %q", want[i].path) 79 } 80 } 81 if len(want) < len(got) { 82 return fmt.Errorf("unexpected file %q", got[len(want)].path) 83 } 84 if len(got) < len(want) { 85 return fmt.Errorf("missing file %q", want[len(got)].path) 86 } 87 return nil 88} 89 90type gitTreeEntry struct { 91 path string 92 mode treeEntryMode 93 objectName string 94} 95 96func gitListTree(treeish string) ([]gitTreeEntry, error) { 97 var stdout, stderr bytes.Buffer 98 cmd := exec.Command("git", "ls-tree", "-r", "-z", treeish) 99 cmd.Stdout = &stdout 100 cmd.Stderr = &stderr 101 if err := cmd.Run(); err != nil { 102 return nil, fmt.Errorf("error listing git tree %q: %w\n%s\n", treeish, err, stderr.String()) 103 } 104 lines := strings.Split(stdout.String(), "\x00") 105 ret := make([]gitTreeEntry, 0, len(lines)) 106 for _, line := range lines { 107 if len(line) == 0 { 108 continue 109 } 110 111 idx := strings.IndexByte(line, '\t') 112 if idx < 0 { 113 return nil, fmt.Errorf("could not parse ls-tree output %q", line) 114 } 115 116 info, path := line[:idx], line[idx+1:] 117 infos := strings.Split(info, " ") 118 if len(infos) != 3 { 119 return nil, fmt.Errorf("could not parse ls-tree output %q", line) 120 } 121 122 perms, objectType, objectName := infos[0], infos[1], infos[2] 123 if objectType != "blob" { 124 return nil, fmt.Errorf("unexpected object type in ls-tree output %q", line) 125 } 126 127 var mode treeEntryMode 128 switch perms { 129 case "100644": 130 mode = treeEntryRegular 131 case "100755": 132 mode = treeEntryExecutable 133 case "120000": 134 mode = treeEntrySymlink 135 default: 136 return nil, fmt.Errorf("unexpected file mode in ls-tree output %q", line) 137 } 138 139 ret = append(ret, gitTreeEntry{path: path, mode: mode, objectName: objectName}) 140 } 141 return ret, nil 142} 143 144func gitHashBlob(objectName string) ([]byte, error) { 145 h := sha256.New() 146 var stderr bytes.Buffer 147 cmd := exec.Command("git", "cat-file", "blob", objectName) 148 cmd.Stdout = h 149 cmd.Stderr = &stderr 150 if err := cmd.Run(); err != nil { 151 return nil, fmt.Errorf("error hashing git object %q: %w\n%s\n", objectName, err, stderr.String()) 152 } 153 return h.Sum(nil), nil 154} 155 156func gitHashTree(s *stepPrinter, treeish string) ([]treeEntry, error) { 157 gitTree, err := gitListTree(treeish) 158 if err != nil { 159 return nil, err 160 } 161 162 s.setTotal(len(gitTree)) 163 164 // Hashing objects one by one is slow, so parallelize. Ideally we could 165 // just use the object name, but git uses SHA-1, so checking a SHA-265 166 // hash seems prudent. 167 var workerErr error 168 var workerLock sync.Mutex 169 170 var wg sync.WaitGroup 171 jobs := make(chan gitTreeEntry, *numWorkers) 172 results := make(chan treeEntry, *numWorkers) 173 for i := 0; i < *numWorkers; i++ { 174 wg.Add(1) 175 go func() { 176 defer wg.Done() 177 for job := range jobs { 178 workerLock.Lock() 179 shouldStop := workerErr != nil 180 workerLock.Unlock() 181 if shouldStop { 182 break 183 } 184 185 sha256, err := gitHashBlob(job.objectName) 186 if err != nil { 187 workerLock.Lock() 188 if workerErr == nil { 189 workerErr = err 190 } 191 workerLock.Unlock() 192 break 193 } 194 195 results <- treeEntry{path: job.path, mode: job.mode, sha256: sha256} 196 } 197 }() 198 } 199 200 go func() { 201 for _, job := range gitTree { 202 jobs <- job 203 } 204 close(jobs) 205 wg.Wait() 206 close(results) 207 }() 208 209 tree := make([]treeEntry, 0, len(gitTree)) 210 for result := range results { 211 s.addProgress(1) 212 tree = append(tree, result) 213 } 214 215 if workerErr != nil { 216 return nil, workerErr 217 } 218 219 if len(tree) != len(gitTree) { 220 panic("input and output sizes did not match") 221 } 222 223 sortTree(tree) 224 return tree, nil 225} 226