Skip to content

Commit 0270a4e

Browse files
committed
Add a nonstandard shallow clone for GitHub
1 parent 2c4e9f0 commit 0270a4e

File tree

3 files changed

+125
-39
lines changed

3 files changed

+125
-39
lines changed

src/cargo/core/source/source_id.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ use crate::sources::{DirectorySource, CRATES_IO_DOMAIN, CRATES_IO_INDEX, CRATES_
44
use crate::sources::{GitSource, PathSource, RegistrySource};
55
use crate::util::{CanonicalUrl, CargoResult, Config, IntoUrl};
66
use log::trace;
7-
use serde::de;
8-
use serde::ser;
7+
use serde::{de, ser, Serialize};
98
use std::cmp::{self, Ordering};
109
use std::collections::HashSet;
1110
use std::fmt::{self, Formatter};
@@ -58,7 +57,7 @@ enum SourceKind {
5857
}
5958

6059
/// Information to find a specific commit in a Git repository.
61-
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
60+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
6261
pub enum GitReference {
6362
/// From a tag.
6463
Tag(String),

src/cargo/sources/git/utils.rs

Lines changed: 122 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@ use cargo_util::{paths, ProcessBuilder};
99
use curl::easy::List;
1010
use git2::{self, ErrorClass, ObjectType, Oid};
1111
use log::{debug, info};
12-
use serde::ser;
13-
use serde::Serialize;
12+
use serde::{ser, Deserialize, Serialize};
1413
use std::borrow::Cow;
1514
use std::env;
1615
use std::fmt;
@@ -79,7 +78,7 @@ impl GitRemote {
7978
}
8079

8180
pub fn rev_for(&self, path: &Path, reference: &GitReference) -> CargoResult<git2::Oid> {
82-
reference.resolve(&self.db_at(path)?.repo)
81+
reference.resolve(&self.db_at(path)?.repo, true)
8382
}
8483

8584
pub fn checkout(
@@ -104,7 +103,7 @@ impl GitRemote {
104103
}
105104
}
106105
None => {
107-
if let Ok(rev) = reference.resolve(&db.repo) {
106+
if let Ok(rev) = reference.resolve(&db.repo, true) {
108107
return Ok((db, rev));
109108
}
110109
}
@@ -123,7 +122,7 @@ impl GitRemote {
123122
.context(format!("failed to clone into: {}", into.display()))?;
124123
let rev = match locked_rev {
125124
Some(rev) => rev,
126-
None => reference.resolve(&repo)?,
125+
None => reference.resolve(&repo, true)?,
127126
};
128127

129128
Ok((
@@ -180,12 +179,48 @@ impl GitDatabase {
180179
}
181180

182181
pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
183-
r.resolve(&self.repo)
182+
r.resolve(&self.repo, true)
184183
}
185184
}
186185

186+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
187+
struct ShalowDataBlob<'a> {
188+
tree: &'a str,
189+
etag: &'a str,
190+
}
191+
192+
#[test]
193+
fn check_with_git_hub() {
194+
panic!(r#"nonstandard shallow clone may be worse than a full check out.
195+
This test is here to make sure we do not merge until we have official signoff from GitHub"#)
196+
}
197+
187198
impl GitReference {
188-
pub fn resolve(&self, repo: &git2::Repository) -> CargoResult<git2::Oid> {
199+
pub fn resolve(&self, repo: &git2::Repository, tree: bool) -> CargoResult<git2::Oid> {
200+
// Check if Cargo has done a nonstandard shallow clone
201+
if let Some(reference) = repo
202+
.find_reference(
203+
&(format!(
204+
"refs/cargo-{}",
205+
serde_json::to_string(self).expect("why cant we make json of this")
206+
)),
207+
)
208+
.ok()
209+
.and_then(|re| {
210+
let blob = re.peel_to_blob().ok()?;
211+
let shalow_data: ShalowDataBlob<'_> =
212+
serde_json::from_slice(blob.content()).ok()?;
213+
let id = if tree {
214+
shalow_data.tree
215+
} else {
216+
shalow_data.etag
217+
};
218+
Some(id.parse::<Oid>().ok()?)
219+
})
220+
{
221+
return Ok(reference);
222+
}
223+
189224
let id = match self {
190225
// Note that we resolve the named tag here in sync with where it's
191226
// fetched into via `fetch` below.
@@ -707,6 +742,12 @@ fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, config: &Config) -> Ca
707742
opts.progress(|_, cur, max| {
708743
drop(pb.tick(cur, max, ""));
709744
});
745+
if obj.as_tree().is_some() {
746+
debug!("doing reset for Cargo nonstandard shallow clone");
747+
repo.checkout_tree(obj, Some(&mut opts))?;
748+
debug!("reset done");
749+
return Ok(());
750+
}
710751
debug!("doing reset");
711752
repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
712753
debug!("reset done");
@@ -819,32 +860,44 @@ pub fn fetch(
819860
// The `+` symbol on the refspec means to allow a forced (fast-forward)
820861
// update which is needed if there is ever a force push that requires a
821862
// fast-forward.
822-
match reference {
823-
// For branches and tags we can fetch simply one reference and copy it
824-
// locally, no need to fetch other branches/tags.
825-
GitReference::Branch(b) => {
826-
refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
827-
}
828-
GitReference::Tag(t) => {
829-
refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
863+
if let Some(oid_to_fetch) = oid_to_fetch {
864+
// GitHub told us exactly the min needed to fetch. So we can go ahead and do a Cargo nonstandard shallow clone.
865+
refspecs.push(format!("+{0}", oid_to_fetch));
866+
} else {
867+
// In some cases we have Cargo nonstandard shallow cloned this repo before, but cannot do it now.
868+
// Mostly if GitHub is now rate limiting us. If so, remove the info about the shallow clone.
869+
if let Ok(mut refe) = repo.find_reference(&format!(
870+
"refs/cargo-{}",
871+
serde_json::to_string(reference).expect("why cant we make json of this")
872+
)) {
873+
let _ = refe.delete();
830874
}
831875

832-
GitReference::DefaultBranch => {
833-
refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
834-
}
876+
match reference {
877+
// For branches and tags we can fetch simply one reference and copy it
878+
// locally, no need to fetch other branches/tags.
879+
GitReference::Branch(b) => {
880+
refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
881+
}
882+
GitReference::Tag(t) => {
883+
refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
884+
}
835885

836-
GitReference::Rev(rev) => {
837-
if rev.starts_with("refs/") {
838-
refspecs.push(format!("+{0}:{0}", rev));
839-
} else if let Some(oid_to_fetch) = oid_to_fetch {
840-
refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
841-
} else {
842-
// We don't know what the rev will point to. To handle this
843-
// situation we fetch all branches and tags, and then we pray
844-
// it's somewhere in there.
845-
refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
886+
GitReference::DefaultBranch => {
846887
refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
847-
tags = true;
888+
}
889+
890+
GitReference::Rev(rev) => {
891+
if rev.starts_with("refs/") {
892+
refspecs.push(format!("+{0}:{0}", rev));
893+
} else {
894+
// We don't know what the rev will point to. To handle this
895+
// situation we fetch all branches and tags, and then we pray
896+
// it's somewhere in there.
897+
refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
898+
refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
899+
tags = true;
900+
}
848901
}
849902
}
850903
}
@@ -1046,6 +1099,22 @@ enum FastPathRev {
10461099
Indeterminate,
10471100
}
10481101

1102+
#[derive(Debug, Deserialize)]
1103+
struct GithubFastPathJsonResponse {
1104+
sha: String,
1105+
commit: GithubCommitJsonResponse,
1106+
}
1107+
1108+
#[derive(Debug, Deserialize)]
1109+
struct GithubCommitJsonResponse {
1110+
tree: GithubTreeJsonResponse,
1111+
}
1112+
1113+
#[derive(Debug, Deserialize)]
1114+
struct GithubTreeJsonResponse {
1115+
sha: String,
1116+
}
1117+
10491118
/// Updating the index is done pretty regularly so we want it to be as fast as
10501119
/// possible. For registries hosted on GitHub (like the crates.io index) there's
10511120
/// a fast path available to use [1] to tell us that there's no updates to be
@@ -1067,11 +1136,8 @@ fn github_fast_path(
10671136
config: &Config,
10681137
) -> CargoResult<FastPathRev> {
10691138
let url = Url::parse(url)?;
1070-
if !is_github(&url) {
1071-
return Ok(FastPathRev::Indeterminate);
1072-
}
10731139

1074-
let local_object = reference.resolve(repo).ok();
1140+
let local_object = reference.resolve(repo, false).ok();
10751141

10761142
let github_branch_name = match reference {
10771143
GitReference::Branch(branch) => branch,
@@ -1111,6 +1177,10 @@ fn github_fast_path(
11111177
}
11121178
};
11131179

1180+
if !is_github(&url) {
1181+
return Ok(FastPathRev::Indeterminate);
1182+
}
1183+
11141184
// This expects GitHub urls in the form `github.com/user/repo` and nothing
11151185
// else
11161186
let mut pieces = url
@@ -1141,7 +1211,7 @@ fn github_fast_path(
11411211
handle.useragent("cargo")?;
11421212
handle.http_headers({
11431213
let mut headers = List::new();
1144-
headers.append("Accept: application/vnd.github.3.sha")?;
1214+
headers.append("Accept: application/vnd.github+json")?;
11451215
if let Some(local_object) = local_object {
11461216
headers.append(&format!("If-None-Match: \"{}\"", local_object))?;
11471217
}
@@ -1161,7 +1231,24 @@ fn github_fast_path(
11611231
if response_code == 304 {
11621232
Ok(FastPathRev::UpToDate)
11631233
} else if response_code == 200 {
1164-
let oid_to_fetch = str::from_utf8(&response_body)?.parse::<Oid>()?;
1234+
let data: GithubFastPathJsonResponse = serde_json::from_slice(&response_body)?;
1235+
// We can do a Cargo nonstandard shallow clone, so record the relevant information.
1236+
let bytes = serde_json::to_string(&ShalowDataBlob {
1237+
tree: &data.commit.tree.sha,
1238+
etag: &data.sha,
1239+
})
1240+
.expect("why cant we make json of this");
1241+
let shallow_blob = repo.blob(bytes.as_bytes())?;
1242+
repo.reference(
1243+
&format!(
1244+
"refs/cargo-{}",
1245+
serde_json::to_string(reference).expect("why cant we make json of this")
1246+
),
1247+
shallow_blob,
1248+
true,
1249+
"",
1250+
)?;
1251+
let oid_to_fetch = str::from_utf8(data.commit.tree.sha.as_bytes())?.parse::<Oid>()?;
11651252
Ok(FastPathRev::NeedsFetch(oid_to_fetch))
11661253
} else {
11671254
// Usually response_code == 404 if the repository does not exist, and

src/cargo/sources/registry/remote.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ impl<'cfg> RemoteRegistry<'cfg> {
102102
}
103103
}
104104
let repo = self.repo()?;
105-
let oid = self.index_git_ref.resolve(repo)?;
105+
let oid = self.index_git_ref.resolve(repo, true)?;
106106
let obj = repo.find_object(oid, None)?;
107107
let tree = obj.peel_to_tree()?;
108108

0 commit comments

Comments
 (0)