Skip to content

git url: support CSV form (also support verifying a tag with a commit hash) #5903

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions client/llb/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,15 @@ func Git(url, ref string, opts ...GitOption) State {
addCap(&gi.Constraints, pb.CapSourceGitMountSSHSock)
}

commitHash := gi.CommitHash
if commitHash == "" && remote != nil && remote.Fragment != nil {
commitHash = remote.Fragment.CommitHash
}
if commitHash != "" {
attrs[pb.AttrCommitHash] = commitHash
addCap(&gi.Constraints, pb.CapSourceGitCommitHash)
}

addCap(&gi.Constraints, pb.CapSourceGit)

source := NewSource("git://"+id, attrs, gi.Constraints)
Expand All @@ -345,6 +354,7 @@ type GitInfo struct {
addAuthCap bool
KnownSSHHosts string
MountSSHSock string
CommitHash string
}

func KeepGitDir() GitOption {
Expand Down Expand Up @@ -373,6 +383,12 @@ func MountSSHSock(sshID string) GitOption {
})
}

func CommitHash(v string) GitOption {
return gitOptionFunc(func(gi *GitInfo) {
gi.CommitHash = v
})
}

// AuthOption can be used with either HTTP or Git sources.
type AuthOption interface {
GitOption
Expand Down
146 changes: 81 additions & 65 deletions frontend/dockerfile/dockerfile2llb/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -1464,7 +1464,11 @@ func dispatchCopy(d *dispatchState, cfg copyConfig) error {
if len(cfg.params.SourcePaths) != 1 {
return errors.New("checksum can't be specified for multiple sources")
}
if !isHTTPSource(cfg.params.SourcePaths[0]) {
ok, err := isHTTPSource(cfg.params.SourcePaths[0])
if err != nil {
return err
}
if !ok {
return errors.New("checksum can't be specified for non-HTTP(S) sources")
}
}
Expand Down Expand Up @@ -1509,7 +1513,7 @@ func dispatchCopy(d *dispatchState, cfg copyConfig) error {
if gitRef.SubDir != "" {
commit += ":" + gitRef.SubDir
}
gitOptions := []llb.GitOption{llb.WithCustomName(pgName)}
gitOptions := []llb.GitOption{llb.WithCustomName(pgName), llb.CommitHash(gitRef.CommitHash)}
if cfg.keepGitDir {
gitOptions = append(gitOptions, llb.KeepGitDir())
}
Expand All @@ -1523,77 +1527,83 @@ func dispatchCopy(d *dispatchState, cfg copyConfig) error {
} else {
a = a.Copy(st, "/", dest, opts...)
}
} else if isHTTPSource(src) {
if !cfg.isAddCommand {
return errors.New("source can't be a URL for COPY")
} else {
isHTTPSourceV, err := isHTTPSource(src)
if err != nil {
return err
}
if isHTTPSourceV {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Use git show --ignore-all-space to review)

if !cfg.isAddCommand {
return errors.New("source can't be a URL for COPY")
}

// Resources from remote URLs are not decompressed.
// https://docs.docker.com/engine/reference/builder/#add
//
// Note: mixing up remote archives and local archives in a single ADD instruction
// would result in undefined behavior: https://github.com/moby/buildkit/pull/387#discussion_r189494717
u, err := url.Parse(src)
f := "__unnamed__"
if err == nil {
if base := path.Base(u.Path); base != "." && base != "/" {
f = base
// Resources from remote URLs are not decompressed.
// https://docs.docker.com/engine/reference/builder/#add
//
// Note: mixing up remote archives and local archives in a single ADD instruction
// would result in undefined behavior: https://github.com/moby/buildkit/pull/387#discussion_r189494717
u, err := url.Parse(src)
f := "__unnamed__"
if err == nil {
if base := path.Base(u.Path); base != "." && base != "/" {
f = base
}
}
}

st := llb.HTTP(src, llb.Filename(f), llb.WithCustomName(pgName), llb.Checksum(cfg.checksum), dfCmd(cfg.params))
st := llb.HTTP(src, llb.Filename(f), llb.WithCustomName(pgName), llb.Checksum(cfg.checksum), dfCmd(cfg.params))

opts := append([]llb.CopyOption{&llb.CopyInfo{
Mode: chopt,
CreateDestPath: true,
}}, copyOpt...)
opts := append([]llb.CopyOption{&llb.CopyInfo{
Mode: chopt,
CreateDestPath: true,
}}, copyOpt...)

if a == nil {
a = llb.Copy(st, f, dest, opts...)
} else {
a = a.Copy(st, f, dest, opts...)
}
} else {
validateCopySourcePath(src, &cfg)
var patterns []string
if cfg.parents {
// detect optional pivot point
parent, pattern, ok := strings.Cut(src, "/./")
if !ok {
pattern = src
src = "/"
if a == nil {
a = llb.Copy(st, f, dest, opts...)
} else {
src = parent
a = a.Copy(st, f, dest, opts...)
}
} else {
validateCopySourcePath(src, &cfg)
var patterns []string
if cfg.parents {
// detect optional pivot point
parent, pattern, ok := strings.Cut(src, "/./")
if !ok {
pattern = src
src = "/"
} else {
src = parent
}

pattern, err = system.NormalizePath("/", pattern, d.platform.OS, false)
if err != nil {
return errors.Wrap(err, "removing drive letter")
}

pattern, err = system.NormalizePath("/", pattern, d.platform.OS, false)
patterns = []string{strings.TrimPrefix(pattern, "/")}
}

src, err = system.NormalizePath("/", src, d.platform.OS, false)
if err != nil {
return errors.Wrap(err, "removing drive letter")
}

patterns = []string{strings.TrimPrefix(pattern, "/")}
}

src, err = system.NormalizePath("/", src, d.platform.OS, false)
if err != nil {
return errors.Wrap(err, "removing drive letter")
}

opts := append([]llb.CopyOption{&llb.CopyInfo{
Mode: chopt,
FollowSymlinks: true,
CopyDirContentsOnly: true,
IncludePatterns: patterns,
AttemptUnpack: cfg.isAddCommand,
CreateDestPath: true,
AllowWildcard: true,
AllowEmptyWildcard: true,
}}, copyOpt...)

if a == nil {
a = llb.Copy(cfg.source, src, dest, opts...)
} else {
a = a.Copy(cfg.source, src, dest, opts...)
opts := append([]llb.CopyOption{&llb.CopyInfo{
Mode: chopt,
FollowSymlinks: true,
CopyDirContentsOnly: true,
IncludePatterns: patterns,
AttemptUnpack: cfg.isAddCommand,
CreateDestPath: true,
AllowWildcard: true,
AllowEmptyWildcard: true,
}}, copyOpt...)

if a == nil {
a = llb.Copy(cfg.source, src, dest, opts...)
} else {
a = a.Copy(cfg.source, src, dest, opts...)
}
}
}
}
Expand Down Expand Up @@ -2255,15 +2265,21 @@ func commonImageNames() []string {
return out
}

func isHTTPSource(src string) bool {
func isHTTPSource(src string) (bool, error) {
if !strings.HasPrefix(src, "http://") && !strings.HasPrefix(src, "https://") {
return false
return false, nil
}
// https://github.com/ORG/REPO.git is a git source, not an http source
if gitRef, gitErr := gitutil.ParseGitRef(src); gitRef != nil && gitErr == nil {
return false
gitRef, gitErr := gitutil.ParseGitRef(src)
var eiuf *gitutil.ErrInvalidURLFragemnt
if errors.As(gitErr, &eiuf) {
// this is a git source, and it has an invalid URL fragment
return false, gitErr
}
if gitRef != nil && gitErr == nil {
return false, nil
}
return true
return true, nil
}

func isEnabledForStage(stage string, value string) bool {
Expand Down
2 changes: 1 addition & 1 deletion frontend/dockerui/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func DetectGitContext(ref string, keepGit bool) (*llb.State, bool) {
if g.SubDir != "" {
commit += ":" + g.SubDir
}
gitOpts := []llb.GitOption{WithInternalName("load git source " + ref)}
gitOpts := []llb.GitOption{WithInternalName("load git source " + ref), llb.CommitHash(g.CommitHash)}
if keepGit {
gitOpts = append(gitOpts, llb.KeepGitDir())
}
Expand Down
1 change: 1 addition & 0 deletions frontend/gateway/grpcclient/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ func defaultLLBCaps() []*apicaps.PBCap {
{ID: string(opspb.CapSourceGit), Enabled: true},
{ID: string(opspb.CapSourceGitKeepDir), Enabled: true},
{ID: string(opspb.CapSourceGitFullURL), Enabled: true},
{ID: string(opspb.CapSourceGitCommitHash), Enabled: true},
{ID: string(opspb.CapSourceHTTP), Enabled: true},
{ID: string(opspb.CapSourceHTTPChecksum), Enabled: true},
{ID: string(opspb.CapSourceHTTPPerm), Enabled: true},
Expand Down
1 change: 1 addition & 0 deletions solver/pb/attr.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const AttrAuthHeaderSecret = "git.authheadersecret"
const AttrAuthTokenSecret = "git.authtokensecret"
const AttrKnownSSHHosts = "git.knownsshhosts"
const AttrMountSSHSock = "git.mountsshsock"
const AttrCommitHash = "git.commithash"
Copy link
Member Author

@AkihiroSuda AkihiroSuda Apr 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Bikeshedding: this could be also named AttrGitChecksum to follow AttrHTTPChecksum)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tonistiigi Which one do you prefer?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AttrGitChecksum

const AttrLocalSessionID = "local.session"
const AttrLocalUniqueID = "local.unique"
const AttrIncludePatterns = "local.includepattern"
Expand Down
7 changes: 7 additions & 0 deletions solver/pb/caps.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const (
CapSourceGitKnownSSHHosts apicaps.CapID = "source.git.knownsshhosts"
CapSourceGitMountSSHSock apicaps.CapID = "source.git.mountsshsock"
CapSourceGitSubdir apicaps.CapID = "source.git.subdir"
CapSourceGitCommitHash apicaps.CapID = "source.git.commithash"

CapSourceHTTP apicaps.CapID = "source.http"
CapSourceHTTPAuth apicaps.CapID = "source.http.auth"
Expand Down Expand Up @@ -222,6 +223,12 @@ func init() {
Status: apicaps.CapStatusExperimental,
})

Caps.Init(apicaps.Cap{
ID: CapSourceGitCommitHash,
Enabled: true,
Status: apicaps.CapStatusExperimental,
})

Caps.Init(apicaps.Cap{
ID: CapSourceHTTP,
Enabled: true,
Expand Down
2 changes: 2 additions & 0 deletions source/git/identifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
type GitIdentifier struct {
Remote string
Ref string
CommitHash string
Subdir string
KeepGitDir bool
AuthTokenSecret string
Expand All @@ -33,6 +34,7 @@ func NewGitIdentifier(remoteURL string) (*GitIdentifier, error) {
repo := GitIdentifier{Remote: u.Remote}
if u.Fragment != nil {
repo.Ref = u.Fragment.Ref
repo.CommitHash = u.Fragment.CommitHash
repo.Subdir = u.Fragment.Subdir
}
if sd := path.Clean(repo.Subdir); sd == "/" || sd == "." {
Expand Down
35 changes: 31 additions & 4 deletions source/git/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ func (gs *gitSource) Identifier(scheme, ref string, attrs map[string]string, pla
id.KnownSSHHosts = v
case pb.AttrMountSSHSock:
id.MountSSHSock = v
case pb.AttrCommitHash:
id.CommitHash = v
}
}

Expand Down Expand Up @@ -207,6 +209,9 @@ func (gs *gitSourceHandler) shaToCacheKey(sha, ref string) string {
if gs.src.Subdir != "" {
key += ":" + gs.src.Subdir
}
if gs.src.CommitHash != "" {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed? Isn't sha already same as CommitHash?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, CommitHash can be a partial hash

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does that matter, if we already have a complete one.

Same as https://github.com/moby/buildkit/pull/5903/files/076850aeb7720b56683710d8e30b05fbb9352b97#diff-bf1789b0b19c8d6533a10da5e27f0b31da2cdcd37510a787498c3d19a985d470 . Eg. if you build from alpine:latest or alpine:latest@sha256 then if the digest matches then they will generate the same cache key.

key += ":commit-hash=" + gs.src.CommitHash
}
return key
}

Expand Down Expand Up @@ -349,10 +354,18 @@ func (gs *gitSourceHandler) CacheKey(ctx context.Context, g session.Group, index
gs.locker.Lock(remote)
defer gs.locker.Unlock(remote)

if ref := gs.src.Ref; ref != "" && gitutil.IsCommitSHA(ref) {
cacheKey := gs.shaToCacheKey(ref, "")
var refCommitFullHash string
if gitutil.IsCommitSHA(gs.src.CommitHash) {
refCommitFullHash = gs.src.CommitHash
}
if refCommitFullHash == "" && gitutil.IsCommitSHA(gs.src.Ref) {
refCommitFullHash = gs.src.Ref
}
if refCommitFullHash != "" {
cacheKey := gs.shaToCacheKey(refCommitFullHash, "")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ref should be set here if one is present even if we know the full hash.

If first build is from ref=master,commithash= and second build(replay pinned from provenance) is from ref=master,commithash=abcdef then they should generate same cache keys. Ideally we would cover this with a testcase.

gs.cacheKey = cacheKey
return cacheKey, ref, nil, true, nil
// gs.src.CommitHash is verified after checking out the commit
return cacheKey, refCommitFullHash, nil, true, nil
}

gs.getAuthToken(ctx, g)
Expand Down Expand Up @@ -415,7 +428,9 @@ func (gs *gitSourceHandler) CacheKey(ctx context.Context, g session.Group, index
if !gitutil.IsCommitSHA(sha) {
return "", "", nil, false, errors.Errorf("invalid commit sha %q", sha)
}

if gs.src.CommitHash != "" && !strings.HasPrefix(sha, gs.src.CommitHash) {
return "", "", nil, false, errors.Errorf("expected commit hash to match %s, got %s", gs.src.CommitHash, sha)
}
cacheKey := gs.shaToCacheKey(sha, usedRef)
gs.cacheKey = cacheKey
return cacheKey, sha, nil, true, nil
Expand Down Expand Up @@ -536,6 +551,7 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out
subdir = "."
}

checkedoutRef := "HEAD"
if gs.src.KeepGitDir && subdir == "." {
checkoutDirGit := filepath.Join(checkoutDir, ".git")
if err := os.MkdirAll(checkoutDir, 0711); err != nil {
Expand Down Expand Up @@ -605,6 +621,7 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out
if err != nil {
return nil, errors.Wrapf(err, "failed to checkout remote %s", urlutil.RedactCredentials(gs.src.Remote))
}
checkedoutRef = ref // HEAD may not exist
if subdir != "." {
d, err := os.Open(filepath.Join(cd, subdir))
if err != nil {
Expand Down Expand Up @@ -635,6 +652,16 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out
}

git = git.New(gitutil.WithWorkTree(checkoutDir), gitutil.WithGitDir(gitDir))
if gs.src.CommitHash != "" {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this happening after we have already completed the checkout instead of before?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for verification, as we may potentially have an unexpected code path (in future)

actualHashBuf, err := git.Run(ctx, "rev-parse", checkedoutRef)
if err != nil {
return nil, errors.Wrapf(err, "failed to rev-parse %s for %s", checkedoutRef, urlutil.RedactCredentials(gs.src.Remote))
}
actualHash := strings.TrimSpace(string(actualHashBuf))
if !strings.HasPrefix(actualHash, gs.src.CommitHash) {
return nil, errors.Errorf("expected commit hash to match %s, got %s", gs.src.CommitHash, actualHash)
}
}
_, err = git.Run(ctx, "submodule", "update", "--init", "--recursive", "--depth=1")
if err != nil {
return nil, errors.Wrapf(err, "failed to update submodules for %s", urlutil.RedactCredentials(gs.src.Remote))
Expand Down
Loading