Source file src/cmd/go/internal/modfetch/repo.go
1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package modfetch 6 7 import ( 8 "context" 9 "fmt" 10 "io" 11 "io/fs" 12 "os" 13 "strconv" 14 "time" 15 16 "cmd/go/internal/cfg" 17 "cmd/go/internal/modfetch/codehost" 18 "cmd/go/internal/vcs" 19 web "cmd/go/internal/web" 20 "cmd/internal/par" 21 22 "golang.org/x/mod/module" 23 ) 24 25 const traceRepo = false // trace all repo actions, for debugging 26 27 // A Repo represents a repository storing all versions of a single module. 28 // It must be safe for simultaneous use by multiple goroutines. 29 type Repo interface { 30 // ModulePath returns the module path. 31 ModulePath() string 32 33 // CheckReuse checks whether the validation criteria in the origin 34 // are still satisfied on the server corresponding to this module. 35 // If so, the caller can reuse any cached Versions or RevInfo containing 36 // this origin rather than redownloading those from the server. 37 CheckReuse(ctx context.Context, old *codehost.Origin) error 38 39 // Versions lists all known versions with the given prefix. 40 // Pseudo-versions are not included. 41 // 42 // Versions should be returned sorted in semver order 43 // (implementations can use semver.Sort). 44 // 45 // Versions returns a non-nil error only if there was a problem 46 // fetching the list of versions: it may return an empty list 47 // along with a nil error if the list of matching versions 48 // is known to be empty. 49 // 50 // If the underlying repository does not exist, 51 // Versions returns an error matching errors.Is(_, os.NotExist). 52 Versions(ctx context.Context, prefix string) (*Versions, error) 53 54 // Stat returns information about the revision rev. 55 // A revision can be any identifier known to the underlying service: 56 // commit hash, branch, tag, and so on. 57 Stat(ctx context.Context, rev string) (*RevInfo, error) 58 59 // Latest returns the latest revision on the default branch, 60 // whatever that means in the underlying source code repository. 61 // It is only used when there are no tagged versions. 62 Latest(ctx context.Context) (*RevInfo, error) 63 64 // GoMod returns the go.mod file for the given version. 65 GoMod(ctx context.Context, version string) (data []byte, err error) 66 67 // Zip writes a zip file for the given version to dst. 68 Zip(ctx context.Context, dst io.Writer, version string) error 69 } 70 71 // A Versions describes the available versions in a module repository. 72 type Versions struct { 73 Origin *codehost.Origin `json:",omitempty"` // origin information for reuse 74 75 List []string // semver versions 76 } 77 78 // A RevInfo describes a single revision in a module repository. 79 type RevInfo struct { 80 Version string // suggested version string for this revision 81 Time time.Time // commit time 82 83 // These fields are used for Stat of arbitrary rev, 84 // but they are not recorded when talking about module versions. 85 Name string `json:"-"` // complete ID in underlying repository 86 Short string `json:"-"` // shortened ID, for use in pseudo-version 87 88 Origin *codehost.Origin `json:",omitempty"` // provenance for reuse 89 } 90 91 // Re: module paths, import paths, repository roots, and lookups 92 // 93 // A module is a collection of Go packages stored in a file tree 94 // with a go.mod file at the root of the tree. 95 // The go.mod defines the module path, which is the import path 96 // corresponding to the root of the file tree. 97 // The import path of a directory within that file tree is the module path 98 // joined with the name of the subdirectory relative to the root. 99 // 100 // For example, the module with path rsc.io/qr corresponds to the 101 // file tree in the repository https://github.com/rsc/qr. 102 // That file tree has a go.mod that says "module rsc.io/qr". 103 // The package in the root directory has import path "rsc.io/qr". 104 // The package in the gf256 subdirectory has import path "rsc.io/qr/gf256". 105 // In this example, "rsc.io/qr" is both a module path and an import path. 106 // But "rsc.io/qr/gf256" is only an import path, not a module path: 107 // it names an importable package, but not a module. 108 // 109 // As a special case to incorporate code written before modules were 110 // introduced, if a path p resolves using the pre-module "go get" lookup 111 // to the root of a source code repository without a go.mod file, 112 // that repository is treated as if it had a go.mod in its root directory 113 // declaring module path p. 114 // 115 // The presentation so far ignores the fact that a source code repository 116 // has many different versions of a file tree, and those versions may 117 // differ in whether a particular go.mod exists and what it contains. 118 // In fact there is a well-defined mapping only from a module path, version 119 // pair - often written path@version - to a particular file tree. 120 // For example rsc.io/qr@v0.1.0 depends on the "implicit go.mod at root of 121 // repository" rule, while rsc.io/qr@v0.2.0 has an explicit go.mod. 122 // Because the "go get" import paths rsc.io/qr and github.com/rsc/qr 123 // both redirect to the Git repository https://github.com/rsc/qr, 124 // github.com/rsc/qr@v0.1.0 is the same file tree as rsc.io/qr@v0.1.0 125 // but a different module (a different name). In contrast, since v0.2.0 126 // of that repository has an explicit go.mod that declares path rsc.io/qr, 127 // github.com/rsc/qr@v0.2.0 is an invalid module path, version pair. 128 // Before modules, import comments would have had the same effect. 129 // 130 // The set of import paths associated with a given module path is 131 // clearly not fixed: at the least, new directories with new import paths 132 // can always be added. But another potential operation is to split a 133 // subtree out of a module into its own module. If done carefully, 134 // this operation can be done while preserving compatibility for clients. 135 // For example, suppose that we want to split rsc.io/qr/gf256 into its 136 // own module, so that there would be two modules rsc.io/qr and rsc.io/qr/gf256. 137 // Then we can simultaneously issue rsc.io/qr v0.3.0 (dropping the gf256 subdirectory) 138 // and rsc.io/qr/gf256 v0.1.0, including in their respective go.mod 139 // cyclic requirements pointing at each other: rsc.io/qr v0.3.0 requires 140 // rsc.io/qr/gf256 v0.1.0 and vice versa. Then a build can be 141 // using an older rsc.io/qr module that includes the gf256 package, but if 142 // it adds a requirement on either the newer rsc.io/qr or the newer 143 // rsc.io/qr/gf256 module, it will automatically add the requirement 144 // on the complementary half, ensuring both that rsc.io/qr/gf256 is 145 // available for importing by the build and also that it is only defined 146 // by a single module. The gf256 package could move back into the 147 // original by another simultaneous release of rsc.io/qr v0.4.0 including 148 // the gf256 subdirectory and an rsc.io/qr/gf256 v0.2.0 with no code 149 // in its root directory, along with a new requirement cycle. 150 // The ability to shift module boundaries in this way is expected to be 151 // important in large-scale program refactorings, similar to the ones 152 // described in https://talks.golang.org/2016/refactor.article. 153 // 154 // The possibility of shifting module boundaries reemphasizes 155 // that you must know both the module path and its version 156 // to determine the set of packages provided directly by that module. 157 // 158 // On top of all this, it is possible for a single code repository 159 // to contain multiple modules, either in branches or subdirectories, 160 // as a limited kind of monorepo. For example rsc.io/qr/v2, 161 // the v2.x.x continuation of rsc.io/qr, is expected to be found 162 // in v2-tagged commits in https://github.com/rsc/qr, either 163 // in the root or in a v2 subdirectory, disambiguated by go.mod. 164 // Again the precise file tree corresponding to a module 165 // depends on which version we are considering. 166 // 167 // It is also possible for the underlying repository to change over time, 168 // without changing the module path. If I copy the github repo over 169 // to https://bitbucket.org/rsc/qr and update https://rsc.io/qr?go-get=1, 170 // then clients of all versions should start fetching from bitbucket 171 // instead of github. That is, in contrast to the exact file tree, 172 // the location of the source code repository associated with a module path 173 // does not depend on the module version. (This is by design, as the whole 174 // point of these redirects is to allow package authors to establish a stable 175 // name that can be updated as code moves from one service to another.) 176 // 177 // All of this is important background for the lookup APIs defined in this 178 // file. 179 // 180 // The Lookup function takes a module path and returns a Repo representing 181 // that module path. Lookup can do only a little with the path alone. 182 // It can check that the path is well-formed (see semver.CheckPath) 183 // and it can check that the path can be resolved to a target repository. 184 // To avoid version control access except when absolutely necessary, 185 // Lookup does not attempt to connect to the repository itself. 186 187 // The Lookup cache is used cache the work done by Lookup. 188 // It is important that the global functions of this package that access it do not 189 // do so after they return. 190 var lookupCache = new(par.Cache[lookupCacheKey, Repo]) 191 192 type lookupCacheKey struct { 193 proxy, path string 194 } 195 196 // Lookup returns the module with the given module path, 197 // fetched through the given proxy. 198 // 199 // The distinguished proxy "direct" indicates that the path should be fetched 200 // from its origin, and "noproxy" indicates that the patch should be fetched 201 // directly only if GONOPROXY matches the given path. 202 // 203 // For the distinguished proxy "off", Lookup always returns a Repo that returns 204 // a non-nil error for every method call. 205 // 206 // A successful return does not guarantee that the module 207 // has any defined versions. 208 func Lookup(ctx context.Context, proxy, path string) Repo { 209 if traceRepo { 210 defer logCall("Lookup(%q, %q)", proxy, path)() 211 } 212 213 return lookupCache.Do(lookupCacheKey{proxy, path}, func() Repo { 214 return newCachingRepo(ctx, path, func(ctx context.Context) (Repo, error) { 215 r, err := lookup(ctx, proxy, path) 216 if err == nil && traceRepo { 217 r = newLoggingRepo(r) 218 } 219 return r, err 220 }) 221 }) 222 } 223 224 var lookupLocalCache = new(par.Cache[string, Repo]) // path, Repo 225 226 // LookupLocal returns a Repo that accesses local VCS information. 227 // 228 // codeRoot is the module path of the root module in the repository. 229 // path is the module path of the module being looked up. 230 // dir is the file system path of the repository containing the module. 231 func LookupLocal(ctx context.Context, codeRoot string, path string, dir string) Repo { 232 if traceRepo { 233 defer logCall("LookupLocal(%q)", path)() 234 } 235 236 return lookupLocalCache.Do(path, func() Repo { 237 return newCachingRepo(ctx, path, func(ctx context.Context) (Repo, error) { 238 repoDir, vcsCmd, err := vcs.FromDir(dir, "", true) 239 if err != nil { 240 return nil, err 241 } 242 code, err := lookupCodeRepo(ctx, &vcs.RepoRoot{Repo: repoDir, Root: repoDir, VCS: vcsCmd}, true) 243 if err != nil { 244 return nil, err 245 } 246 r, err := newCodeRepo(code, codeRoot, "", path) 247 if err == nil && traceRepo { 248 r = newLoggingRepo(r) 249 } 250 return r, err 251 }) 252 }) 253 } 254 255 // lookup returns the module with the given module path. 256 func lookup(ctx context.Context, proxy, path string) (r Repo, err error) { 257 if cfg.BuildMod == "vendor" { 258 return nil, errLookupDisabled 259 } 260 261 switch path { 262 case "go", "toolchain": 263 return &toolchainRepo{path, Lookup(ctx, proxy, "golang.org/toolchain")}, nil 264 } 265 266 if module.MatchPrefixPatterns(cfg.GONOPROXY, path) { 267 switch proxy { 268 case "noproxy", "direct": 269 return lookupDirect(ctx, path) 270 default: 271 return nil, errNoproxy 272 } 273 } 274 275 switch proxy { 276 case "off": 277 return errRepo{path, errProxyOff}, nil 278 case "direct": 279 return lookupDirect(ctx, path) 280 case "noproxy": 281 return nil, errUseProxy 282 default: 283 return newProxyRepo(proxy, path) 284 } 285 } 286 287 type lookupDisabledError struct{} 288 289 func (lookupDisabledError) Error() string { 290 if cfg.BuildModReason == "" { 291 return fmt.Sprintf("module lookup disabled by -mod=%s", cfg.BuildMod) 292 } 293 return fmt.Sprintf("module lookup disabled by -mod=%s\n\t(%s)", cfg.BuildMod, cfg.BuildModReason) 294 } 295 296 var errLookupDisabled error = lookupDisabledError{} 297 298 var ( 299 errProxyOff = notExistErrorf("module lookup disabled by GOPROXY=off") 300 errNoproxy error = notExistErrorf("disabled by GOPRIVATE/GONOPROXY") 301 errUseProxy error = notExistErrorf("path does not match GOPRIVATE/GONOPROXY") 302 ) 303 304 func lookupDirect(ctx context.Context, path string) (Repo, error) { 305 security := web.SecureOnly 306 307 if module.MatchPrefixPatterns(cfg.GOINSECURE, path) { 308 security = web.Insecure 309 } 310 rr, err := vcs.RepoRootForImportPath(path, vcs.PreferMod, security) 311 if err != nil { 312 // We don't know where to find code for a module with this path. 313 return nil, notExistError{err: err} 314 } 315 316 if rr.VCS.Name == "mod" { 317 // Fetch module from proxy with base URL rr.Repo. 318 return newProxyRepo(rr.Repo, path) 319 } 320 321 code, err := lookupCodeRepo(ctx, rr, false) 322 if err != nil { 323 return nil, err 324 } 325 return newCodeRepo(code, rr.Root, rr.SubDir, path) 326 } 327 328 func lookupCodeRepo(ctx context.Context, rr *vcs.RepoRoot, local bool) (codehost.Repo, error) { 329 code, err := codehost.NewRepo(ctx, rr.VCS.Cmd, rr.Repo, local) 330 if err != nil { 331 if _, ok := err.(*codehost.VCSError); ok { 332 return nil, err 333 } 334 return nil, fmt.Errorf("lookup %s: %v", rr.Root, err) 335 } 336 return code, nil 337 } 338 339 // A loggingRepo is a wrapper around an underlying Repo 340 // that prints a log message at the start and end of each call. 341 // It can be inserted when debugging. 342 type loggingRepo struct { 343 r Repo 344 } 345 346 func newLoggingRepo(r Repo) *loggingRepo { 347 return &loggingRepo{r} 348 } 349 350 // logCall prints a log message using format and args and then 351 // also returns a function that will print the same message again, 352 // along with the elapsed time. 353 // Typical usage is: 354 // 355 // defer logCall("hello %s", arg)() 356 // 357 // Note the final (). 358 func logCall(format string, args ...any) func() { 359 start := time.Now() 360 fmt.Fprintf(os.Stderr, "+++ %s\n", fmt.Sprintf(format, args...)) 361 return func() { 362 fmt.Fprintf(os.Stderr, "%.3fs %s\n", time.Since(start).Seconds(), fmt.Sprintf(format, args...)) 363 } 364 } 365 366 func (l *loggingRepo) ModulePath() string { 367 return l.r.ModulePath() 368 } 369 370 func (l *loggingRepo) CheckReuse(ctx context.Context, old *codehost.Origin) (err error) { 371 defer func() { 372 logCall("CheckReuse[%s]: %v", l.r.ModulePath(), err) 373 }() 374 return l.r.CheckReuse(ctx, old) 375 } 376 377 func (l *loggingRepo) Versions(ctx context.Context, prefix string) (*Versions, error) { 378 defer logCall("Repo[%s]: Versions(%q)", l.r.ModulePath(), prefix)() 379 return l.r.Versions(ctx, prefix) 380 } 381 382 func (l *loggingRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) { 383 defer logCall("Repo[%s]: Stat(%q)", l.r.ModulePath(), rev)() 384 return l.r.Stat(ctx, rev) 385 } 386 387 func (l *loggingRepo) Latest(ctx context.Context) (*RevInfo, error) { 388 defer logCall("Repo[%s]: Latest()", l.r.ModulePath())() 389 return l.r.Latest(ctx) 390 } 391 392 func (l *loggingRepo) GoMod(ctx context.Context, version string) ([]byte, error) { 393 defer logCall("Repo[%s]: GoMod(%q)", l.r.ModulePath(), version)() 394 return l.r.GoMod(ctx, version) 395 } 396 397 func (l *loggingRepo) Zip(ctx context.Context, dst io.Writer, version string) error { 398 dstName := "_" 399 if dst, ok := dst.(interface{ Name() string }); ok { 400 dstName = strconv.Quote(dst.Name()) 401 } 402 defer logCall("Repo[%s]: Zip(%s, %q)", l.r.ModulePath(), dstName, version)() 403 return l.r.Zip(ctx, dst, version) 404 } 405 406 // errRepo is a Repo that returns the same error for all operations. 407 // 408 // It is useful in conjunction with caching, since cache hits will not attempt 409 // the prohibited operations. 410 type errRepo struct { 411 modulePath string 412 err error 413 } 414 415 func (r errRepo) ModulePath() string { return r.modulePath } 416 417 func (r errRepo) CheckReuse(ctx context.Context, old *codehost.Origin) error { return r.err } 418 func (r errRepo) Versions(ctx context.Context, prefix string) (*Versions, error) { return nil, r.err } 419 func (r errRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) { return nil, r.err } 420 func (r errRepo) Latest(ctx context.Context) (*RevInfo, error) { return nil, r.err } 421 func (r errRepo) GoMod(ctx context.Context, version string) ([]byte, error) { return nil, r.err } 422 func (r errRepo) Zip(ctx context.Context, dst io.Writer, version string) error { return r.err } 423 424 // A notExistError is like fs.ErrNotExist, but with a custom message 425 type notExistError struct { 426 err error 427 } 428 429 func notExistErrorf(format string, args ...any) error { 430 return notExistError{fmt.Errorf(format, args...)} 431 } 432 433 func (e notExistError) Error() string { 434 return e.err.Error() 435 } 436 437 func (notExistError) Is(target error) bool { 438 return target == fs.ErrNotExist 439 } 440 441 func (e notExistError) Unwrap() error { 442 return e.err 443 } 444