• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package codehost
6
7import (
8	"bytes"
9	"context"
10	"crypto/sha256"
11	"encoding/base64"
12	"errors"
13	"fmt"
14	"io"
15	"io/fs"
16	"net/url"
17	"os"
18	"os/exec"
19	"path/filepath"
20	"runtime"
21	"slices"
22	"sort"
23	"strconv"
24	"strings"
25	"sync"
26	"time"
27
28	"cmd/go/internal/base"
29	"cmd/go/internal/lockedfile"
30	"cmd/go/internal/par"
31	"cmd/go/internal/web"
32
33	"golang.org/x/mod/semver"
34)
35
36// LocalGitRepo is like Repo but accepts both Git remote references
37// and paths to repositories on the local file system.
38func LocalGitRepo(ctx context.Context, remote string) (Repo, error) {
39	return newGitRepoCached(ctx, remote, true)
40}
41
42// A notExistError wraps another error to retain its original text
43// but makes it opaquely equivalent to fs.ErrNotExist.
44type notExistError struct {
45	err error
46}
47
48func (e notExistError) Error() string   { return e.err.Error() }
49func (notExistError) Is(err error) bool { return err == fs.ErrNotExist }
50
51const gitWorkDirType = "git3"
52
53var gitRepoCache par.ErrCache[gitCacheKey, Repo]
54
55type gitCacheKey struct {
56	remote  string
57	localOK bool
58}
59
60func newGitRepoCached(ctx context.Context, remote string, localOK bool) (Repo, error) {
61	return gitRepoCache.Do(gitCacheKey{remote, localOK}, func() (Repo, error) {
62		return newGitRepo(ctx, remote, localOK)
63	})
64}
65
66func newGitRepo(ctx context.Context, remote string, localOK bool) (Repo, error) {
67	r := &gitRepo{remote: remote}
68	if strings.Contains(remote, "://") {
69		// This is a remote path.
70		var err error
71		r.dir, r.mu.Path, err = WorkDir(ctx, gitWorkDirType, r.remote)
72		if err != nil {
73			return nil, err
74		}
75
76		unlock, err := r.mu.Lock()
77		if err != nil {
78			return nil, err
79		}
80		defer unlock()
81
82		if _, err := os.Stat(filepath.Join(r.dir, "objects")); err != nil {
83			if _, err := Run(ctx, r.dir, "git", "init", "--bare"); err != nil {
84				os.RemoveAll(r.dir)
85				return nil, err
86			}
87			// We could just say git fetch https://whatever later,
88			// but this lets us say git fetch origin instead, which
89			// is a little nicer. More importantly, using a named remote
90			// avoids a problem with Git LFS. See golang.org/issue/25605.
91			if _, err := Run(ctx, r.dir, "git", "remote", "add", "origin", "--", r.remote); err != nil {
92				os.RemoveAll(r.dir)
93				return nil, err
94			}
95			if runtime.GOOS == "windows" {
96				// Git for Windows by default does not support paths longer than
97				// MAX_PATH (260 characters) because that may interfere with navigation
98				// in some Windows programs. However, cmd/go should be able to handle
99				// long paths just fine, and we expect people to use 'go clean' to
100				// manipulate the module cache, so it should be harmless to set here,
101				// and in some cases may be necessary in order to download modules with
102				// long branch names.
103				//
104				// See https://github.com/git-for-windows/git/wiki/Git-cannot-create-a-file-or-directory-with-a-long-path.
105				if _, err := Run(ctx, r.dir, "git", "config", "core.longpaths", "true"); err != nil {
106					os.RemoveAll(r.dir)
107					return nil, err
108				}
109			}
110		}
111		r.remoteURL = r.remote
112		r.remote = "origin"
113	} else {
114		// Local path.
115		// Disallow colon (not in ://) because sometimes
116		// that's rcp-style host:path syntax and sometimes it's not (c:\work).
117		// The go command has always insisted on URL syntax for ssh.
118		if strings.Contains(remote, ":") {
119			return nil, fmt.Errorf("git remote cannot use host:path syntax")
120		}
121		if !localOK {
122			return nil, fmt.Errorf("git remote must not be local directory")
123		}
124		r.local = true
125		info, err := os.Stat(remote)
126		if err != nil {
127			return nil, err
128		}
129		if !info.IsDir() {
130			return nil, fmt.Errorf("%s exists but is not a directory", remote)
131		}
132		r.dir = remote
133		r.mu.Path = r.dir + ".lock"
134	}
135	return r, nil
136}
137
138type gitRepo struct {
139	ctx context.Context
140
141	remote, remoteURL string
142	local             bool
143	dir               string
144
145	mu lockedfile.Mutex // protects fetchLevel and git repo state
146
147	fetchLevel int
148
149	statCache par.ErrCache[string, *RevInfo]
150
151	refsOnce sync.Once
152	// refs maps branch and tag refs (e.g., "HEAD", "refs/heads/master")
153	// to commits (e.g., "37ffd2e798afde829a34e8955b716ab730b2a6d6")
154	refs    map[string]string
155	refsErr error
156
157	localTagsOnce sync.Once
158	localTags     sync.Map // map[string]bool
159}
160
161const (
162	// How much have we fetched into the git repo (in this process)?
163	fetchNone = iota // nothing yet
164	fetchSome        // shallow fetches of individual hashes
165	fetchAll         // "fetch -t origin": get all remote branches and tags
166)
167
168// loadLocalTags loads tag references from the local git cache
169// into the map r.localTags.
170func (r *gitRepo) loadLocalTags(ctx context.Context) {
171	// The git protocol sends all known refs and ls-remote filters them on the client side,
172	// so we might as well record both heads and tags in one shot.
173	// Most of the time we only care about tags but sometimes we care about heads too.
174	out, err := Run(ctx, r.dir, "git", "tag", "-l")
175	if err != nil {
176		return
177	}
178
179	for _, line := range strings.Split(string(out), "\n") {
180		if line != "" {
181			r.localTags.Store(line, true)
182		}
183	}
184}
185
186func (r *gitRepo) CheckReuse(ctx context.Context, old *Origin, subdir string) error {
187	if old == nil {
188		return fmt.Errorf("missing origin")
189	}
190	if old.VCS != "git" || old.URL != r.remoteURL {
191		return fmt.Errorf("origin moved from %v %q to %v %q", old.VCS, old.URL, "git", r.remoteURL)
192	}
193	if old.Subdir != subdir {
194		return fmt.Errorf("origin moved from %v %q %q to %v %q %q", old.VCS, old.URL, old.Subdir, "git", r.remoteURL, subdir)
195	}
196
197	// Note: Can have Hash with no Ref and no TagSum and no RepoSum,
198	// meaning the Hash simply has to remain in the repo.
199	// In that case we assume it does in the absence of any real way to check.
200	// But if neither Hash nor TagSum is present, we have nothing to check,
201	// which we take to mean we didn't record enough information to be sure.
202	if old.Hash == "" && old.TagSum == "" && old.RepoSum == "" {
203		return fmt.Errorf("non-specific origin")
204	}
205
206	r.loadRefs(ctx)
207	if r.refsErr != nil {
208		return r.refsErr
209	}
210
211	if old.Ref != "" {
212		hash, ok := r.refs[old.Ref]
213		if !ok {
214			return fmt.Errorf("ref %q deleted", old.Ref)
215		}
216		if hash != old.Hash {
217			return fmt.Errorf("ref %q moved from %s to %s", old.Ref, old.Hash, hash)
218		}
219	}
220	if old.TagSum != "" {
221		tags, err := r.Tags(ctx, old.TagPrefix)
222		if err != nil {
223			return err
224		}
225		if tags.Origin.TagSum != old.TagSum {
226			return fmt.Errorf("tags changed")
227		}
228	}
229	if old.RepoSum != "" {
230		if r.repoSum(r.refs) != old.RepoSum {
231			return fmt.Errorf("refs changed")
232		}
233	}
234	return nil
235}
236
237// loadRefs loads heads and tags references from the remote into the map r.refs.
238// The result is cached in memory.
239func (r *gitRepo) loadRefs(ctx context.Context) (map[string]string, error) {
240	r.refsOnce.Do(func() {
241		// The git protocol sends all known refs and ls-remote filters them on the client side,
242		// so we might as well record both heads and tags in one shot.
243		// Most of the time we only care about tags but sometimes we care about heads too.
244		release, err := base.AcquireNet()
245		if err != nil {
246			r.refsErr = err
247			return
248		}
249		out, gitErr := Run(ctx, r.dir, "git", "ls-remote", "-q", r.remote)
250		release()
251
252		if gitErr != nil {
253			if rerr, ok := gitErr.(*RunError); ok {
254				if bytes.Contains(rerr.Stderr, []byte("fatal: could not read Username")) {
255					rerr.HelpText = "Confirm the import path was entered correctly.\nIf this is a private repository, see https://golang.org/doc/faq#git_https for additional information."
256				}
257			}
258
259			// If the remote URL doesn't exist at all, ideally we should treat the whole
260			// repository as nonexistent by wrapping the error in a notExistError.
261			// For HTTP and HTTPS, that's easy to detect: we'll try to fetch the URL
262			// ourselves and see what code it serves.
263			if u, err := url.Parse(r.remoteURL); err == nil && (u.Scheme == "http" || u.Scheme == "https") {
264				if _, err := web.GetBytes(u); errors.Is(err, fs.ErrNotExist) {
265					gitErr = notExistError{gitErr}
266				}
267			}
268
269			r.refsErr = gitErr
270			return
271		}
272
273		refs := make(map[string]string)
274		for _, line := range strings.Split(string(out), "\n") {
275			f := strings.Fields(line)
276			if len(f) != 2 {
277				continue
278			}
279			if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") {
280				refs[f[1]] = f[0]
281			}
282		}
283		for ref, hash := range refs {
284			if k, found := strings.CutSuffix(ref, "^{}"); found { // record unwrapped annotated tag as value of tag
285				refs[k] = hash
286				delete(refs, ref)
287			}
288		}
289		r.refs = refs
290	})
291	return r.refs, r.refsErr
292}
293
294func (r *gitRepo) Tags(ctx context.Context, prefix string) (*Tags, error) {
295	refs, err := r.loadRefs(ctx)
296	if err != nil {
297		return nil, err
298	}
299
300	tags := &Tags{
301		Origin: &Origin{
302			VCS:       "git",
303			URL:       r.remoteURL,
304			TagPrefix: prefix,
305		},
306		List: []Tag{},
307	}
308	for ref, hash := range refs {
309		if !strings.HasPrefix(ref, "refs/tags/") {
310			continue
311		}
312		tag := ref[len("refs/tags/"):]
313		if !strings.HasPrefix(tag, prefix) {
314			continue
315		}
316		tags.List = append(tags.List, Tag{tag, hash})
317	}
318	sort.Slice(tags.List, func(i, j int) bool {
319		return tags.List[i].Name < tags.List[j].Name
320	})
321
322	dir := prefix[:strings.LastIndex(prefix, "/")+1]
323	h := sha256.New()
324	for _, tag := range tags.List {
325		if isOriginTag(strings.TrimPrefix(tag.Name, dir)) {
326			fmt.Fprintf(h, "%q %s\n", tag.Name, tag.Hash)
327		}
328	}
329	tags.Origin.TagSum = "t1:" + base64.StdEncoding.EncodeToString(h.Sum(nil))
330	return tags, nil
331}
332
333// repoSum returns a checksum of the entire repo state,
334// which can be checked (as Origin.RepoSum) to cache
335// the absence of a specific module version.
336// The caller must supply refs, the result of a successful r.loadRefs.
337func (r *gitRepo) repoSum(refs map[string]string) string {
338	var list []string
339	for ref := range refs {
340		list = append(list, ref)
341	}
342	sort.Strings(list)
343	h := sha256.New()
344	for _, ref := range list {
345		fmt.Fprintf(h, "%q %s\n", ref, refs[ref])
346	}
347	return "r1:" + base64.StdEncoding.EncodeToString(h.Sum(nil))
348}
349
350// unknownRevisionInfo returns a RevInfo containing an Origin containing a RepoSum of refs,
351// for use when returning an UnknownRevisionError.
352func (r *gitRepo) unknownRevisionInfo(refs map[string]string) *RevInfo {
353	return &RevInfo{
354		Origin: &Origin{
355			VCS:     "git",
356			URL:     r.remoteURL,
357			RepoSum: r.repoSum(refs),
358		},
359	}
360}
361
362func (r *gitRepo) Latest(ctx context.Context) (*RevInfo, error) {
363	refs, err := r.loadRefs(ctx)
364	if err != nil {
365		return nil, err
366	}
367	if refs["HEAD"] == "" {
368		return nil, ErrNoCommits
369	}
370	statInfo, err := r.Stat(ctx, refs["HEAD"])
371	if err != nil {
372		return nil, err
373	}
374
375	// Stat may return cached info, so make a copy to modify here.
376	info := new(RevInfo)
377	*info = *statInfo
378	info.Origin = new(Origin)
379	if statInfo.Origin != nil {
380		*info.Origin = *statInfo.Origin
381	}
382	info.Origin.Ref = "HEAD"
383	info.Origin.Hash = refs["HEAD"]
384
385	return info, nil
386}
387
388// findRef finds some ref name for the given hash,
389// for use when the server requires giving a ref instead of a hash.
390// There may be multiple ref names for a given hash,
391// in which case this returns some name - it doesn't matter which.
392func (r *gitRepo) findRef(ctx context.Context, hash string) (ref string, ok bool) {
393	refs, err := r.loadRefs(ctx)
394	if err != nil {
395		return "", false
396	}
397	for ref, h := range refs {
398		if h == hash {
399			return ref, true
400		}
401	}
402	return "", false
403}
404
405// minHashDigits is the minimum number of digits to require
406// before accepting a hex digit sequence as potentially identifying
407// a specific commit in a git repo. (Of course, users can always
408// specify more digits, and many will paste in all 40 digits,
409// but many of git's commands default to printing short hashes
410// as 7 digits.)
411const minHashDigits = 7
412
413// stat stats the given rev in the local repository,
414// or else it fetches more info from the remote repository and tries again.
415func (r *gitRepo) stat(ctx context.Context, rev string) (info *RevInfo, err error) {
416	if r.local {
417		return r.statLocal(ctx, rev, rev)
418	}
419
420	// Fast path: maybe rev is a hash we already have locally.
421	didStatLocal := false
422	if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
423		if info, err := r.statLocal(ctx, rev, rev); err == nil {
424			return info, nil
425		}
426		didStatLocal = true
427	}
428
429	// Maybe rev is a tag we already have locally.
430	// (Note that we're excluding branches, which can be stale.)
431	r.localTagsOnce.Do(func() { r.loadLocalTags(ctx) })
432	if _, ok := r.localTags.Load(rev); ok {
433		return r.statLocal(ctx, rev, "refs/tags/"+rev)
434	}
435
436	// Maybe rev is the name of a tag or branch on the remote server.
437	// Or maybe it's the prefix of a hash of a named ref.
438	// Try to resolve to both a ref (git name) and full (40-hex-digit) commit hash.
439	refs, err := r.loadRefs(ctx)
440	if err != nil {
441		return nil, err
442	}
443	// loadRefs may return an error if git fails, for example segfaults, or
444	// could not load a private repo, but defer checking to the else block
445	// below, in case we already have the rev in question in the local cache.
446	var ref, hash string
447	if refs["refs/tags/"+rev] != "" {
448		ref = "refs/tags/" + rev
449		hash = refs[ref]
450		// Keep rev as is: tags are assumed not to change meaning.
451	} else if refs["refs/heads/"+rev] != "" {
452		ref = "refs/heads/" + rev
453		hash = refs[ref]
454		rev = hash // Replace rev, because meaning of refs/heads/foo can change.
455	} else if rev == "HEAD" && refs["HEAD"] != "" {
456		ref = "HEAD"
457		hash = refs[ref]
458		rev = hash // Replace rev, because meaning of HEAD can change.
459	} else if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
460		// At the least, we have a hash prefix we can look up after the fetch below.
461		// Maybe we can map it to a full hash using the known refs.
462		prefix := rev
463		// Check whether rev is prefix of known ref hash.
464		for k, h := range refs {
465			if strings.HasPrefix(h, prefix) {
466				if hash != "" && hash != h {
467					// Hash is an ambiguous hash prefix.
468					// More information will not change that.
469					return nil, fmt.Errorf("ambiguous revision %s", rev)
470				}
471				if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash.
472					ref = k
473				}
474				rev = h
475				hash = h
476			}
477		}
478		if hash == "" && len(rev) == 40 { // Didn't find a ref, but rev is a full hash.
479			hash = rev
480		}
481	} else {
482		return r.unknownRevisionInfo(refs), &UnknownRevisionError{Rev: rev}
483	}
484
485	defer func() {
486		if info != nil {
487			info.Origin.Hash = info.Name
488			// There's a ref = hash below; don't write that hash down as Origin.Ref.
489			if ref != info.Origin.Hash {
490				info.Origin.Ref = ref
491			}
492		}
493	}()
494
495	// Protect r.fetchLevel and the "fetch more and more" sequence.
496	unlock, err := r.mu.Lock()
497	if err != nil {
498		return nil, err
499	}
500	defer unlock()
501
502	// Perhaps r.localTags did not have the ref when we loaded local tags,
503	// but we've since done fetches that pulled down the hash we need
504	// (or already have the hash we need, just without its tag).
505	// Either way, try a local stat before falling back to network I/O.
506	if !didStatLocal {
507		if info, err := r.statLocal(ctx, rev, hash); err == nil {
508			tag, fromTag := strings.CutPrefix(ref, "refs/tags/")
509			if fromTag && !slices.Contains(info.Tags, tag) {
510				// The local repo includes the commit hash we want, but it is missing
511				// the corresponding tag. Add that tag and try again.
512				_, err := Run(ctx, r.dir, "git", "tag", tag, hash)
513				if err != nil {
514					return nil, err
515				}
516				r.localTags.Store(tag, true)
517				return r.statLocal(ctx, rev, ref)
518			}
519			return info, err
520		}
521	}
522
523	// If we know a specific commit we need and its ref, fetch it.
524	// We do NOT fetch arbitrary hashes (when we don't know the ref)
525	// because we want to avoid ever importing a commit that isn't
526	// reachable from refs/tags/* or refs/heads/* or HEAD.
527	// Both Gerrit and GitHub expose every CL/PR as a named ref,
528	// and we don't want those commits masquerading as being real
529	// pseudo-versions in the main repo.
530	if r.fetchLevel <= fetchSome && ref != "" && hash != "" && !r.local {
531		r.fetchLevel = fetchSome
532		var refspec string
533		if ref == "HEAD" {
534			// Fetch the hash but give it a local name (refs/dummy),
535			// because that triggers the fetch behavior of creating any
536			// other known remote tags for the hash. We never use
537			// refs/dummy (it's not refs/tags/dummy) and it will be
538			// overwritten in the next command, and that's fine.
539			ref = hash
540			refspec = hash + ":refs/dummy"
541		} else {
542			// If we do know the ref name, save the mapping locally
543			// so that (if it is a tag) it can show up in localTags
544			// on a future call. Also, some servers refuse to allow
545			// full hashes in ref specs, so prefer a ref name if known.
546			refspec = ref + ":" + ref
547		}
548
549		release, err := base.AcquireNet()
550		if err != nil {
551			return nil, err
552		}
553		// We explicitly set protocol.version=2 for this command to work around
554		// an apparent Git bug introduced in Git 2.21 (commit 61c771),
555		// which causes the handler for protocol version 1 to sometimes miss
556		// tags that point to the requested commit (see https://go.dev/issue/56881).
557		_, err = Run(ctx, r.dir, "git", "-c", "protocol.version=2", "fetch", "-f", "--depth=1", r.remote, refspec)
558		release()
559
560		if err == nil {
561			return r.statLocal(ctx, rev, ref)
562		}
563		// Don't try to be smart about parsing the error.
564		// It's too complex and varies too much by git version.
565		// No matter what went wrong, fall back to a complete fetch.
566	}
567
568	// Last resort.
569	// Fetch all heads and tags and hope the hash we want is in the history.
570	if err := r.fetchRefsLocked(ctx); err != nil {
571		return nil, err
572	}
573
574	return r.statLocal(ctx, rev, rev)
575}
576
577// fetchRefsLocked fetches all heads and tags from the origin, along with the
578// ancestors of those commits.
579//
580// We only fetch heads and tags, not arbitrary other commits: we don't want to
581// pull in off-branch commits (such as rejected GitHub pull requests) that the
582// server may be willing to provide. (See the comments within the stat method
583// for more detail.)
584//
585// fetchRefsLocked requires that r.mu remain locked for the duration of the call.
586func (r *gitRepo) fetchRefsLocked(ctx context.Context) error {
587	if r.fetchLevel < fetchAll {
588		// NOTE: To work around a bug affecting Git clients up to at least 2.23.0
589		// (2019-08-16), we must first expand the set of local refs, and only then
590		// unshallow the repository as a separate fetch operation. (See
591		// golang.org/issue/34266 and
592		// https://github.com/git/git/blob/4c86140027f4a0d2caaa3ab4bd8bfc5ce3c11c8a/transport.c#L1303-L1309.)
593
594		release, err := base.AcquireNet()
595		if err != nil {
596			return err
597		}
598		defer release()
599
600		if _, err := Run(ctx, r.dir, "git", "fetch", "-f", r.remote, "refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil {
601			return err
602		}
603
604		if _, err := os.Stat(filepath.Join(r.dir, "shallow")); err == nil {
605			if _, err := Run(ctx, r.dir, "git", "fetch", "--unshallow", "-f", r.remote); err != nil {
606				return err
607			}
608		}
609
610		r.fetchLevel = fetchAll
611	}
612	return nil
613}
614
615// statLocal returns a new RevInfo describing rev in the local git repository.
616// It uses version as info.Version.
617func (r *gitRepo) statLocal(ctx context.Context, version, rev string) (*RevInfo, error) {
618	out, err := Run(ctx, r.dir, "git", "-c", "log.showsignature=false", "log", "--no-decorate", "-n1", "--format=format:%H %ct %D", rev, "--")
619	if err != nil {
620		// Return info with Origin.RepoSum if possible to allow caching of negative lookup.
621		var info *RevInfo
622		if refs, err := r.loadRefs(ctx); err == nil {
623			info = r.unknownRevisionInfo(refs)
624		}
625		return info, &UnknownRevisionError{Rev: rev}
626	}
627	f := strings.Fields(string(out))
628	if len(f) < 2 {
629		return nil, fmt.Errorf("unexpected response from git log: %q", out)
630	}
631	hash := f[0]
632	if strings.HasPrefix(hash, version) {
633		version = hash // extend to full hash
634	}
635	t, err := strconv.ParseInt(f[1], 10, 64)
636	if err != nil {
637		return nil, fmt.Errorf("invalid time from git log: %q", out)
638	}
639
640	info := &RevInfo{
641		Origin: &Origin{
642			VCS:  "git",
643			URL:  r.remoteURL,
644			Hash: hash,
645		},
646		Name:    hash,
647		Short:   ShortenSHA1(hash),
648		Time:    time.Unix(t, 0).UTC(),
649		Version: hash,
650	}
651	if !strings.HasPrefix(hash, rev) {
652		info.Origin.Ref = rev
653	}
654
655	// Add tags. Output looks like:
656	//	ede458df7cd0fdca520df19a33158086a8a68e81 1523994202 HEAD -> master, tag: v1.2.4-annotated, tag: v1.2.3, origin/master, origin/HEAD
657	for i := 2; i < len(f); i++ {
658		if f[i] == "tag:" {
659			i++
660			if i < len(f) {
661				info.Tags = append(info.Tags, strings.TrimSuffix(f[i], ","))
662			}
663		}
664	}
665	sort.Strings(info.Tags)
666
667	// Used hash as info.Version above.
668	// Use caller's suggested version if it appears in the tag list
669	// (filters out branch names, HEAD).
670	for _, tag := range info.Tags {
671		if version == tag {
672			info.Version = version
673		}
674	}
675
676	return info, nil
677}
678
679func (r *gitRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) {
680	if rev == "latest" {
681		return r.Latest(ctx)
682	}
683	return r.statCache.Do(rev, func() (*RevInfo, error) {
684		return r.stat(ctx, rev)
685	})
686}
687
688func (r *gitRepo) ReadFile(ctx context.Context, rev, file string, maxSize int64) ([]byte, error) {
689	// TODO: Could use git cat-file --batch.
690	info, err := r.Stat(ctx, rev) // download rev into local git repo
691	if err != nil {
692		return nil, err
693	}
694	out, err := Run(ctx, r.dir, "git", "cat-file", "blob", info.Name+":"+file)
695	if err != nil {
696		return nil, fs.ErrNotExist
697	}
698	return out, nil
699}
700
701func (r *gitRepo) RecentTag(ctx context.Context, rev, prefix string, allowed func(tag string) bool) (tag string, err error) {
702	info, err := r.Stat(ctx, rev)
703	if err != nil {
704		return "", err
705	}
706	rev = info.Name // expand hash prefixes
707
708	// describe sets tag and err using 'git for-each-ref' and reports whether the
709	// result is definitive.
710	describe := func() (definitive bool) {
711		var out []byte
712		out, err = Run(ctx, r.dir, "git", "for-each-ref", "--format", "%(refname)", "refs/tags", "--merged", rev)
713		if err != nil {
714			return true
715		}
716
717		// prefixed tags aren't valid semver tags so compare without prefix, but only tags with correct prefix
718		var highest string
719		for _, line := range strings.Split(string(out), "\n") {
720			line = strings.TrimSpace(line)
721			// git do support lstrip in for-each-ref format, but it was added in v2.13.0. Stripping here
722			// instead gives support for git v2.7.0.
723			if !strings.HasPrefix(line, "refs/tags/") {
724				continue
725			}
726			line = line[len("refs/tags/"):]
727
728			if !strings.HasPrefix(line, prefix) {
729				continue
730			}
731			if !allowed(line) {
732				continue
733			}
734
735			semtag := line[len(prefix):]
736			if semver.Compare(semtag, highest) > 0 {
737				highest = semtag
738			}
739		}
740
741		if highest != "" {
742			tag = prefix + highest
743		}
744
745		return tag != "" && !AllHex(tag)
746	}
747
748	if describe() {
749		return tag, err
750	}
751
752	// Git didn't find a version tag preceding the requested rev.
753	// See whether any plausible tag exists.
754	tags, err := r.Tags(ctx, prefix+"v")
755	if err != nil {
756		return "", err
757	}
758	if len(tags.List) == 0 {
759		return "", nil
760	}
761
762	// There are plausible tags, but we don't know if rev is a descendent of any of them.
763	// Fetch the history to find out.
764
765	unlock, err := r.mu.Lock()
766	if err != nil {
767		return "", err
768	}
769	defer unlock()
770
771	if err := r.fetchRefsLocked(ctx); err != nil {
772		return "", err
773	}
774
775	// If we've reached this point, we have all of the commits that are reachable
776	// from all heads and tags.
777	//
778	// The only refs we should be missing are those that are no longer reachable
779	// (or never were reachable) from any branch or tag, including the master
780	// branch, and we don't want to resolve them anyway (they're probably
781	// unreachable for a reason).
782	//
783	// Try one last time in case some other goroutine fetched rev while we were
784	// waiting on the lock.
785	describe()
786	return tag, err
787}
788
789func (r *gitRepo) DescendsFrom(ctx context.Context, rev, tag string) (bool, error) {
790	// The "--is-ancestor" flag was added to "git merge-base" in version 1.8.0, so
791	// this won't work with Git 1.7.1. According to golang.org/issue/28550, cmd/go
792	// already doesn't work with Git 1.7.1, so at least it's not a regression.
793	//
794	// git merge-base --is-ancestor exits with status 0 if rev is an ancestor, or
795	// 1 if not.
796	_, err := Run(ctx, r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev)
797
798	// Git reports "is an ancestor" with exit code 0 and "not an ancestor" with
799	// exit code 1.
800	// Unfortunately, if we've already fetched rev with a shallow history, git
801	// merge-base has been observed to report a false-negative, so don't stop yet
802	// even if the exit code is 1!
803	if err == nil {
804		return true, nil
805	}
806
807	// See whether the tag and rev even exist.
808	tags, err := r.Tags(ctx, tag)
809	if err != nil {
810		return false, err
811	}
812	if len(tags.List) == 0 {
813		return false, nil
814	}
815
816	// NOTE: r.stat is very careful not to fetch commits that we shouldn't know
817	// about, like rejected GitHub pull requests, so don't try to short-circuit
818	// that here.
819	if _, err = r.stat(ctx, rev); err != nil {
820		return false, err
821	}
822
823	// Now fetch history so that git can search for a path.
824	unlock, err := r.mu.Lock()
825	if err != nil {
826		return false, err
827	}
828	defer unlock()
829
830	if r.fetchLevel < fetchAll {
831		// Fetch the complete history for all refs and heads. It would be more
832		// efficient to only fetch the history from rev to tag, but that's much more
833		// complicated, and any kind of shallow fetch is fairly likely to trigger
834		// bugs in JGit servers and/or the go command anyway.
835		if err := r.fetchRefsLocked(ctx); err != nil {
836			return false, err
837		}
838	}
839
840	_, err = Run(ctx, r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev)
841	if err == nil {
842		return true, nil
843	}
844	if ee, ok := err.(*RunError).Err.(*exec.ExitError); ok && ee.ExitCode() == 1 {
845		return false, nil
846	}
847	return false, err
848}
849
850func (r *gitRepo) ReadZip(ctx context.Context, rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) {
851	// TODO: Use maxSize or drop it.
852	args := []string{}
853	if subdir != "" {
854		args = append(args, "--", subdir)
855	}
856	info, err := r.Stat(ctx, rev) // download rev into local git repo
857	if err != nil {
858		return nil, err
859	}
860
861	unlock, err := r.mu.Lock()
862	if err != nil {
863		return nil, err
864	}
865	defer unlock()
866
867	if err := ensureGitAttributes(r.dir); err != nil {
868		return nil, err
869	}
870
871	// Incredibly, git produces different archives depending on whether
872	// it is running on a Windows system or not, in an attempt to normalize
873	// text file line endings. Setting -c core.autocrlf=input means only
874	// translate files on the way into the repo, not on the way out (archive).
875	// The -c core.eol=lf should be unnecessary but set it anyway.
876	archive, err := Run(ctx, r.dir, "git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", info.Name, args)
877	if err != nil {
878		if bytes.Contains(err.(*RunError).Stderr, []byte("did not match any files")) {
879			return nil, fs.ErrNotExist
880		}
881		return nil, err
882	}
883
884	return io.NopCloser(bytes.NewReader(archive)), nil
885}
886
887// ensureGitAttributes makes sure export-subst and export-ignore features are
888// disabled for this repo. This is intended to be run prior to running git
889// archive so that zip files are generated that produce consistent ziphashes
890// for a given revision, independent of variables such as git version and the
891// size of the repo.
892//
893// See: https://github.com/golang/go/issues/27153
894func ensureGitAttributes(repoDir string) (err error) {
895	const attr = "\n* -export-subst -export-ignore\n"
896
897	d := repoDir + "/info"
898	p := d + "/attributes"
899
900	if err := os.MkdirAll(d, 0755); err != nil {
901		return err
902	}
903
904	f, err := os.OpenFile(p, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666)
905	if err != nil {
906		return err
907	}
908	defer func() {
909		closeErr := f.Close()
910		if closeErr != nil {
911			err = closeErr
912		}
913	}()
914
915	b, err := io.ReadAll(f)
916	if err != nil {
917		return err
918	}
919	if !bytes.HasSuffix(b, []byte(attr)) {
920		_, err := f.WriteString(attr)
921		return err
922	}
923
924	return nil
925}
926