Source file src/cmd/go/internal/modfetch/repo.go

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package modfetch
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"io"
    11  	"io/fs"
    12  	"os"
    13  	"strconv"
    14  	"time"
    15  
    16  	"cmd/go/internal/cfg"
    17  	"cmd/go/internal/modfetch/codehost"
    18  	"cmd/go/internal/vcs"
    19  	web "cmd/go/internal/web"
    20  	"cmd/internal/par"
    21  
    22  	"golang.org/x/mod/module"
    23  )
    24  
    25  const traceRepo = false // trace all repo actions, for debugging
    26  
    27  // A Repo represents a repository storing all versions of a single module.
    28  // It must be safe for simultaneous use by multiple goroutines.
    29  type Repo interface {
    30  	// ModulePath returns the module path.
    31  	ModulePath() string
    32  
    33  	// CheckReuse checks whether the validation criteria in the origin
    34  	// are still satisfied on the server corresponding to this module.
    35  	// If so, the caller can reuse any cached Versions or RevInfo containing
    36  	// this origin rather than redownloading those from the server.
    37  	CheckReuse(ctx context.Context, old *codehost.Origin) error
    38  
    39  	// Versions lists all known versions with the given prefix.
    40  	// Pseudo-versions are not included.
    41  	//
    42  	// Versions should be returned sorted in semver order
    43  	// (implementations can use semver.Sort).
    44  	//
    45  	// Versions returns a non-nil error only if there was a problem
    46  	// fetching the list of versions: it may return an empty list
    47  	// along with a nil error if the list of matching versions
    48  	// is known to be empty.
    49  	//
    50  	// If the underlying repository does not exist,
    51  	// Versions returns an error matching errors.Is(_, os.NotExist).
    52  	Versions(ctx context.Context, prefix string) (*Versions, error)
    53  
    54  	// Stat returns information about the revision rev.
    55  	// A revision can be any identifier known to the underlying service:
    56  	// commit hash, branch, tag, and so on.
    57  	Stat(ctx context.Context, rev string) (*RevInfo, error)
    58  
    59  	// Latest returns the latest revision on the default branch,
    60  	// whatever that means in the underlying source code repository.
    61  	// It is only used when there are no tagged versions.
    62  	Latest(ctx context.Context) (*RevInfo, error)
    63  
    64  	// GoMod returns the go.mod file for the given version.
    65  	GoMod(ctx context.Context, version string) (data []byte, err error)
    66  
    67  	// Zip writes a zip file for the given version to dst.
    68  	Zip(ctx context.Context, dst io.Writer, version string) error
    69  }
    70  
    71  // A Versions describes the available versions in a module repository.
    72  type Versions struct {
    73  	Origin *codehost.Origin `json:",omitempty"` // origin information for reuse
    74  
    75  	List []string // semver versions
    76  }
    77  
    78  // A RevInfo describes a single revision in a module repository.
    79  type RevInfo struct {
    80  	Version string    // suggested version string for this revision
    81  	Time    time.Time // commit time
    82  
    83  	// These fields are used for Stat of arbitrary rev,
    84  	// but they are not recorded when talking about module versions.
    85  	Name  string `json:"-"` // complete ID in underlying repository
    86  	Short string `json:"-"` // shortened ID, for use in pseudo-version
    87  
    88  	Origin *codehost.Origin `json:",omitempty"` // provenance for reuse
    89  }
    90  
    91  // Re: module paths, import paths, repository roots, and lookups
    92  //
    93  // A module is a collection of Go packages stored in a file tree
    94  // with a go.mod file at the root of the tree.
    95  // The go.mod defines the module path, which is the import path
    96  // corresponding to the root of the file tree.
    97  // The import path of a directory within that file tree is the module path
    98  // joined with the name of the subdirectory relative to the root.
    99  //
   100  // For example, the module with path rsc.io/qr corresponds to the
   101  // file tree in the repository https://github.com/rsc/qr.
   102  // That file tree has a go.mod that says "module rsc.io/qr".
   103  // The package in the root directory has import path "rsc.io/qr".
   104  // The package in the gf256 subdirectory has import path "rsc.io/qr/gf256".
   105  // In this example, "rsc.io/qr" is both a module path and an import path.
   106  // But "rsc.io/qr/gf256" is only an import path, not a module path:
   107  // it names an importable package, but not a module.
   108  //
   109  // As a special case to incorporate code written before modules were
   110  // introduced, if a path p resolves using the pre-module "go get" lookup
   111  // to the root of a source code repository without a go.mod file,
   112  // that repository is treated as if it had a go.mod in its root directory
   113  // declaring module path p.
   114  //
   115  // The presentation so far ignores the fact that a source code repository
   116  // has many different versions of a file tree, and those versions may
   117  // differ in whether a particular go.mod exists and what it contains.
   118  // In fact there is a well-defined mapping only from a module path, version
   119  // pair - often written path@version - to a particular file tree.
   120  // For example rsc.io/qr@v0.1.0 depends on the "implicit go.mod at root of
   121  // repository" rule, while rsc.io/qr@v0.2.0 has an explicit go.mod.
   122  // Because the "go get" import paths rsc.io/qr and github.com/rsc/qr
   123  // both redirect to the Git repository https://github.com/rsc/qr,
   124  // github.com/rsc/qr@v0.1.0 is the same file tree as rsc.io/qr@v0.1.0
   125  // but a different module (a different name). In contrast, since v0.2.0
   126  // of that repository has an explicit go.mod that declares path rsc.io/qr,
   127  // github.com/rsc/qr@v0.2.0 is an invalid module path, version pair.
   128  // Before modules, import comments would have had the same effect.
   129  //
   130  // The set of import paths associated with a given module path is
   131  // clearly not fixed: at the least, new directories with new import paths
   132  // can always be added. But another potential operation is to split a
   133  // subtree out of a module into its own module. If done carefully,
   134  // this operation can be done while preserving compatibility for clients.
   135  // For example, suppose that we want to split rsc.io/qr/gf256 into its
   136  // own module, so that there would be two modules rsc.io/qr and rsc.io/qr/gf256.
   137  // Then we can simultaneously issue rsc.io/qr v0.3.0 (dropping the gf256 subdirectory)
   138  // and rsc.io/qr/gf256 v0.1.0, including in their respective go.mod
   139  // cyclic requirements pointing at each other: rsc.io/qr v0.3.0 requires
   140  // rsc.io/qr/gf256 v0.1.0 and vice versa. Then a build can be
   141  // using an older rsc.io/qr module that includes the gf256 package, but if
   142  // it adds a requirement on either the newer rsc.io/qr or the newer
   143  // rsc.io/qr/gf256 module, it will automatically add the requirement
   144  // on the complementary half, ensuring both that rsc.io/qr/gf256 is
   145  // available for importing by the build and also that it is only defined
   146  // by a single module. The gf256 package could move back into the
   147  // original by another simultaneous release of rsc.io/qr v0.4.0 including
   148  // the gf256 subdirectory and an rsc.io/qr/gf256 v0.2.0 with no code
   149  // in its root directory, along with a new requirement cycle.
   150  // The ability to shift module boundaries in this way is expected to be
   151  // important in large-scale program refactorings, similar to the ones
   152  // described in https://talks.golang.org/2016/refactor.article.
   153  //
   154  // The possibility of shifting module boundaries reemphasizes
   155  // that you must know both the module path and its version
   156  // to determine the set of packages provided directly by that module.
   157  //
   158  // On top of all this, it is possible for a single code repository
   159  // to contain multiple modules, either in branches or subdirectories,
   160  // as a limited kind of monorepo. For example rsc.io/qr/v2,
   161  // the v2.x.x continuation of rsc.io/qr, is expected to be found
   162  // in v2-tagged commits in https://github.com/rsc/qr, either
   163  // in the root or in a v2 subdirectory, disambiguated by go.mod.
   164  // Again the precise file tree corresponding to a module
   165  // depends on which version we are considering.
   166  //
   167  // It is also possible for the underlying repository to change over time,
   168  // without changing the module path. If I copy the github repo over
   169  // to https://bitbucket.org/rsc/qr and update https://rsc.io/qr?go-get=1,
   170  // then clients of all versions should start fetching from bitbucket
   171  // instead of github. That is, in contrast to the exact file tree,
   172  // the location of the source code repository associated with a module path
   173  // does not depend on the module version. (This is by design, as the whole
   174  // point of these redirects is to allow package authors to establish a stable
   175  // name that can be updated as code moves from one service to another.)
   176  //
   177  // All of this is important background for the lookup APIs defined in this
   178  // file.
   179  //
   180  // The Lookup function takes a module path and returns a Repo representing
   181  // that module path. Lookup can do only a little with the path alone.
   182  // It can check that the path is well-formed (see semver.CheckPath)
   183  // and it can check that the path can be resolved to a target repository.
   184  // To avoid version control access except when absolutely necessary,
   185  // Lookup does not attempt to connect to the repository itself.
   186  
   187  // The Lookup cache is used cache the work done by Lookup.
   188  // It is important that the global functions of this package that access it do not
   189  // do so after they return.
   190  var lookupCache = new(par.Cache[lookupCacheKey, Repo])
   191  
   192  type lookupCacheKey struct {
   193  	proxy, path string
   194  }
   195  
   196  // Lookup returns the module with the given module path,
   197  // fetched through the given proxy.
   198  //
   199  // The distinguished proxy "direct" indicates that the path should be fetched
   200  // from its origin, and "noproxy" indicates that the patch should be fetched
   201  // directly only if GONOPROXY matches the given path.
   202  //
   203  // For the distinguished proxy "off", Lookup always returns a Repo that returns
   204  // a non-nil error for every method call.
   205  //
   206  // A successful return does not guarantee that the module
   207  // has any defined versions.
   208  func Lookup(ctx context.Context, proxy, path string) Repo {
   209  	if traceRepo {
   210  		defer logCall("Lookup(%q, %q)", proxy, path)()
   211  	}
   212  
   213  	return lookupCache.Do(lookupCacheKey{proxy, path}, func() Repo {
   214  		return newCachingRepo(ctx, path, func(ctx context.Context) (Repo, error) {
   215  			r, err := lookup(ctx, proxy, path)
   216  			if err == nil && traceRepo {
   217  				r = newLoggingRepo(r)
   218  			}
   219  			return r, err
   220  		})
   221  	})
   222  }
   223  
   224  var lookupLocalCache = new(par.Cache[string, Repo]) // path, Repo
   225  
   226  // LookupLocal returns a Repo that accesses local VCS information.
   227  //
   228  // codeRoot is the module path of the root module in the repository.
   229  // path is the module path of the module being looked up.
   230  // dir is the file system path of the repository containing the module.
   231  func LookupLocal(ctx context.Context, codeRoot string, path string, dir string) Repo {
   232  	if traceRepo {
   233  		defer logCall("LookupLocal(%q)", path)()
   234  	}
   235  
   236  	return lookupLocalCache.Do(path, func() Repo {
   237  		return newCachingRepo(ctx, path, func(ctx context.Context) (Repo, error) {
   238  			repoDir, vcsCmd, err := vcs.FromDir(dir, "", true)
   239  			if err != nil {
   240  				return nil, err
   241  			}
   242  			code, err := lookupCodeRepo(ctx, &vcs.RepoRoot{Repo: repoDir, Root: repoDir, VCS: vcsCmd}, true)
   243  			if err != nil {
   244  				return nil, err
   245  			}
   246  			r, err := newCodeRepo(code, codeRoot, "", path)
   247  			if err == nil && traceRepo {
   248  				r = newLoggingRepo(r)
   249  			}
   250  			return r, err
   251  		})
   252  	})
   253  }
   254  
   255  // lookup returns the module with the given module path.
   256  func lookup(ctx context.Context, proxy, path string) (r Repo, err error) {
   257  	if cfg.BuildMod == "vendor" {
   258  		return nil, errLookupDisabled
   259  	}
   260  
   261  	switch path {
   262  	case "go", "toolchain":
   263  		return &toolchainRepo{path, Lookup(ctx, proxy, "golang.org/toolchain")}, nil
   264  	}
   265  
   266  	if module.MatchPrefixPatterns(cfg.GONOPROXY, path) {
   267  		switch proxy {
   268  		case "noproxy", "direct":
   269  			return lookupDirect(ctx, path)
   270  		default:
   271  			return nil, errNoproxy
   272  		}
   273  	}
   274  
   275  	switch proxy {
   276  	case "off":
   277  		return errRepo{path, errProxyOff}, nil
   278  	case "direct":
   279  		return lookupDirect(ctx, path)
   280  	case "noproxy":
   281  		return nil, errUseProxy
   282  	default:
   283  		return newProxyRepo(proxy, path)
   284  	}
   285  }
   286  
   287  type lookupDisabledError struct{}
   288  
   289  func (lookupDisabledError) Error() string {
   290  	if cfg.BuildModReason == "" {
   291  		return fmt.Sprintf("module lookup disabled by -mod=%s", cfg.BuildMod)
   292  	}
   293  	return fmt.Sprintf("module lookup disabled by -mod=%s\n\t(%s)", cfg.BuildMod, cfg.BuildModReason)
   294  }
   295  
   296  var errLookupDisabled error = lookupDisabledError{}
   297  
   298  var (
   299  	errProxyOff       = notExistErrorf("module lookup disabled by GOPROXY=off")
   300  	errNoproxy  error = notExistErrorf("disabled by GOPRIVATE/GONOPROXY")
   301  	errUseProxy error = notExistErrorf("path does not match GOPRIVATE/GONOPROXY")
   302  )
   303  
   304  func lookupDirect(ctx context.Context, path string) (Repo, error) {
   305  	security := web.SecureOnly
   306  
   307  	if module.MatchPrefixPatterns(cfg.GOINSECURE, path) {
   308  		security = web.Insecure
   309  	}
   310  	rr, err := vcs.RepoRootForImportPath(path, vcs.PreferMod, security)
   311  	if err != nil {
   312  		// We don't know where to find code for a module with this path.
   313  		return nil, notExistError{err: err}
   314  	}
   315  
   316  	if rr.VCS.Name == "mod" {
   317  		// Fetch module from proxy with base URL rr.Repo.
   318  		return newProxyRepo(rr.Repo, path)
   319  	}
   320  
   321  	code, err := lookupCodeRepo(ctx, rr, false)
   322  	if err != nil {
   323  		return nil, err
   324  	}
   325  	return newCodeRepo(code, rr.Root, rr.SubDir, path)
   326  }
   327  
   328  func lookupCodeRepo(ctx context.Context, rr *vcs.RepoRoot, local bool) (codehost.Repo, error) {
   329  	code, err := codehost.NewRepo(ctx, rr.VCS.Cmd, rr.Repo, local)
   330  	if err != nil {
   331  		if _, ok := err.(*codehost.VCSError); ok {
   332  			return nil, err
   333  		}
   334  		return nil, fmt.Errorf("lookup %s: %v", rr.Root, err)
   335  	}
   336  	return code, nil
   337  }
   338  
   339  // A loggingRepo is a wrapper around an underlying Repo
   340  // that prints a log message at the start and end of each call.
   341  // It can be inserted when debugging.
   342  type loggingRepo struct {
   343  	r Repo
   344  }
   345  
   346  func newLoggingRepo(r Repo) *loggingRepo {
   347  	return &loggingRepo{r}
   348  }
   349  
   350  // logCall prints a log message using format and args and then
   351  // also returns a function that will print the same message again,
   352  // along with the elapsed time.
   353  // Typical usage is:
   354  //
   355  //	defer logCall("hello %s", arg)()
   356  //
   357  // Note the final ().
   358  func logCall(format string, args ...any) func() {
   359  	start := time.Now()
   360  	fmt.Fprintf(os.Stderr, "+++ %s\n", fmt.Sprintf(format, args...))
   361  	return func() {
   362  		fmt.Fprintf(os.Stderr, "%.3fs %s\n", time.Since(start).Seconds(), fmt.Sprintf(format, args...))
   363  	}
   364  }
   365  
   366  func (l *loggingRepo) ModulePath() string {
   367  	return l.r.ModulePath()
   368  }
   369  
   370  func (l *loggingRepo) CheckReuse(ctx context.Context, old *codehost.Origin) (err error) {
   371  	defer func() {
   372  		logCall("CheckReuse[%s]: %v", l.r.ModulePath(), err)
   373  	}()
   374  	return l.r.CheckReuse(ctx, old)
   375  }
   376  
   377  func (l *loggingRepo) Versions(ctx context.Context, prefix string) (*Versions, error) {
   378  	defer logCall("Repo[%s]: Versions(%q)", l.r.ModulePath(), prefix)()
   379  	return l.r.Versions(ctx, prefix)
   380  }
   381  
   382  func (l *loggingRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) {
   383  	defer logCall("Repo[%s]: Stat(%q)", l.r.ModulePath(), rev)()
   384  	return l.r.Stat(ctx, rev)
   385  }
   386  
   387  func (l *loggingRepo) Latest(ctx context.Context) (*RevInfo, error) {
   388  	defer logCall("Repo[%s]: Latest()", l.r.ModulePath())()
   389  	return l.r.Latest(ctx)
   390  }
   391  
   392  func (l *loggingRepo) GoMod(ctx context.Context, version string) ([]byte, error) {
   393  	defer logCall("Repo[%s]: GoMod(%q)", l.r.ModulePath(), version)()
   394  	return l.r.GoMod(ctx, version)
   395  }
   396  
   397  func (l *loggingRepo) Zip(ctx context.Context, dst io.Writer, version string) error {
   398  	dstName := "_"
   399  	if dst, ok := dst.(interface{ Name() string }); ok {
   400  		dstName = strconv.Quote(dst.Name())
   401  	}
   402  	defer logCall("Repo[%s]: Zip(%s, %q)", l.r.ModulePath(), dstName, version)()
   403  	return l.r.Zip(ctx, dst, version)
   404  }
   405  
   406  // errRepo is a Repo that returns the same error for all operations.
   407  //
   408  // It is useful in conjunction with caching, since cache hits will not attempt
   409  // the prohibited operations.
   410  type errRepo struct {
   411  	modulePath string
   412  	err        error
   413  }
   414  
   415  func (r errRepo) ModulePath() string { return r.modulePath }
   416  
   417  func (r errRepo) CheckReuse(ctx context.Context, old *codehost.Origin) error     { return r.err }
   418  func (r errRepo) Versions(ctx context.Context, prefix string) (*Versions, error) { return nil, r.err }
   419  func (r errRepo) Stat(ctx context.Context, rev string) (*RevInfo, error)         { return nil, r.err }
   420  func (r errRepo) Latest(ctx context.Context) (*RevInfo, error)                   { return nil, r.err }
   421  func (r errRepo) GoMod(ctx context.Context, version string) ([]byte, error)      { return nil, r.err }
   422  func (r errRepo) Zip(ctx context.Context, dst io.Writer, version string) error   { return r.err }
   423  
   424  // A notExistError is like fs.ErrNotExist, but with a custom message
   425  type notExistError struct {
   426  	err error
   427  }
   428  
   429  func notExistErrorf(format string, args ...any) error {
   430  	return notExistError{fmt.Errorf(format, args...)}
   431  }
   432  
   433  func (e notExistError) Error() string {
   434  	return e.err.Error()
   435  }
   436  
   437  func (notExistError) Is(target error) bool {
   438  	return target == fs.ErrNotExist
   439  }
   440  
   441  func (e notExistError) Unwrap() error {
   442  	return e.err
   443  }
   444  

View as plain text