Skip to content

Commit 1d1ede9

Browse files
committed
feat(package): implement vcs status cache
It may not be worthy for the majority. Single file `git status` is generally fast. On some slow file systems or lower-end machines, skipping file system access might be helpful. However, slow file system access usually happen on Window. And we'll only have large amount of `git status` when #14981 merges and the repo contains lots of symlinks. Given symlink handling story is crazy in real world Windows, I doubt anybody will hit the performance issue without this.
1 parent 1b82184 commit 1d1ede9

File tree

2 files changed

+90
-21
lines changed

2 files changed

+90
-21
lines changed

src/cargo/ops/cargo_package/vcs.rs

Lines changed: 77 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Helpers to gather the VCS information for `cargo package`.
22
3+
use std::collections::HashMap;
34
use std::path::Path;
45
use std::path::PathBuf;
56

@@ -38,19 +39,71 @@ pub struct GitVcsInfo {
3839
pub struct VcsInfoBuilder<'a, 'gctx> {
3940
ws: &'a Workspace<'gctx>,
4041
opts: &'a PackageOpts<'gctx>,
42+
/// Map each git workdir path to the workdir's git status cache.
43+
caches: HashMap<PathBuf, RepoStatusCache>,
4144
}
4245

4346
impl<'a, 'gctx> VcsInfoBuilder<'a, 'gctx> {
4447
pub fn new(
4548
ws: &'a Workspace<'gctx>,
4649
opts: &'a PackageOpts<'gctx>,
4750
) -> VcsInfoBuilder<'a, 'gctx> {
48-
VcsInfoBuilder { ws, opts }
51+
VcsInfoBuilder {
52+
ws,
53+
opts,
54+
caches: Default::default(),
55+
}
4956
}
5057

5158
/// Builds an [`VcsInfo`] for the given `pkg` and its associated `src_files`.
5259
pub fn build(&mut self, pkg: &Package, src_files: &[PathBuf]) -> CargoResult<Option<VcsInfo>> {
53-
check_repo_state(pkg, src_files, self.ws.gctx(), self.opts)
60+
check_repo_state(pkg, src_files, self.ws.gctx(), self.opts, &mut self.caches)
61+
}
62+
}
63+
64+
/// Cache of git status inquries for a Git workdir.
65+
struct RepoStatusCache {
66+
repo: git2::Repository,
67+
/// Status of each file path relative to the git workdir path.
68+
cache: HashMap<PathBuf, git2::Status>,
69+
}
70+
71+
impl RepoStatusCache {
72+
fn new(repo: git2::Repository) -> RepoStatusCache {
73+
RepoStatusCache {
74+
repo,
75+
cache: Default::default(),
76+
}
77+
}
78+
79+
/// Like [`git2::Repository::status_file`] but with cache.
80+
fn status_file(&mut self, path: &Path) -> CargoResult<git2::Status> {
81+
use std::collections::hash_map::Entry;
82+
match self.cache.entry(path.into()) {
83+
Entry::Occupied(entry) => {
84+
tracing::trace!(
85+
target: "cargo_package_vcs_cache",
86+
"git status cache hit for `{}` at workdir `{}`",
87+
path.display(),
88+
self.repo.workdir().unwrap().display()
89+
);
90+
Ok(*entry.get())
91+
}
92+
Entry::Vacant(entry) => {
93+
tracing::trace!(
94+
target: "cargo_package_vcs_cache",
95+
"git status cache miss for `{}` at workdir `{}`",
96+
path.display(),
97+
self.repo.workdir().unwrap().display()
98+
);
99+
let status = self.repo.status_file(path)?;
100+
Ok(*entry.insert(status))
101+
}
102+
}
103+
}
104+
105+
fn workdir(&self) -> &Path {
106+
self.repo.workdir().unwrap()
54107
}
55108
}
56109

@@ -67,6 +120,7 @@ pub fn check_repo_state(
67120
src_files: &[PathBuf],
68121
gctx: &GlobalContext,
69122
opts: &PackageOpts<'_>,
123+
caches: &mut HashMap<PathBuf, RepoStatusCache>,
70124
) -> CargoResult<Option<VcsInfo>> {
71125
let Ok(repo) = git2::Repository::discover(p.root()) else {
72126
gctx.shell().verbose(|shell| {
@@ -86,14 +140,20 @@ pub fn check_repo_state(
86140
};
87141

88142
debug!("found a git repo at `{}`", workdir.display());
143+
144+
let cache = caches
145+
.entry(workdir.to_path_buf())
146+
.or_insert_with(|| RepoStatusCache::new(repo));
147+
89148
let path = p.manifest_path();
90-
let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf());
91-
let Ok(status) = repo.status_file(&path) else {
149+
let path =
150+
paths::strip_prefix_canonical(path, cache.workdir()).unwrap_or_else(|_| path.to_path_buf());
151+
let Ok(status) = cache.status_file(&path) else {
92152
gctx.shell().verbose(|shell| {
93153
shell.warn(format!(
94154
"no (git) Cargo.toml found at `{}` in workdir `{}`",
95155
path.display(),
96-
workdir.display()
156+
cache.workdir().display()
97157
))
98158
})?;
99159
// No checked-in `Cargo.toml` found. This package may be irrelevant.
@@ -106,7 +166,7 @@ pub fn check_repo_state(
106166
shell.warn(format!(
107167
"found (git) Cargo.toml ignored at `{}` in workdir `{}`",
108168
path.display(),
109-
workdir.display()
169+
cache.workdir().display()
110170
))
111171
})?;
112172
// An ignored `Cargo.toml` found. This package may be irrelevant.
@@ -117,14 +177,14 @@ pub fn check_repo_state(
117177
debug!(
118178
"found (git) Cargo.toml at `{}` in workdir `{}`",
119179
path.display(),
120-
workdir.display(),
180+
cache.workdir().display(),
121181
);
122182
let path_in_vcs = path
123183
.parent()
124184
.and_then(|p| p.to_str())
125185
.unwrap_or("")
126186
.replace("\\", "/");
127-
let Some(git) = git(p, gctx, src_files, &repo, &opts)? else {
187+
let Some(git) = git(p, gctx, src_files, cache, &opts)? else {
128188
// If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning,
129189
// then don't generate the corresponding file in order to maintain consistency with past behavior.
130190
return Ok(None);
@@ -138,7 +198,7 @@ fn git(
138198
pkg: &Package,
139199
gctx: &GlobalContext,
140200
src_files: &[PathBuf],
141-
repo: &git2::Repository,
201+
cache: &mut RepoStatusCache,
142202
opts: &PackageOpts<'_>,
143203
) -> CargoResult<Option<GitVcsInfo>> {
144204
// This is a collection of any dirty or untracked files. This covers:
@@ -147,10 +207,10 @@ fn git(
147207
// - ignored (in case the user has an `include` directive that
148208
// conflicts with .gitignore).
149209
let mut dirty_files = Vec::new();
150-
collect_statuses(repo, &mut dirty_files)?;
210+
collect_statuses(&cache.repo, &mut dirty_files)?;
151211
// Include each submodule so that the error message can provide
152212
// specifically *which* files in a submodule are modified.
153-
status_submodules(repo, &mut dirty_files)?;
213+
status_submodules(&cache.repo, &mut dirty_files)?;
154214

155215
// Find the intersection of dirty in git, and the src_files that would
156216
// be packaged. This is a lazy n^2 check, but seems fine with
@@ -159,7 +219,7 @@ fn git(
159219
let mut dirty_src_files: Vec<_> = src_files
160220
.iter()
161221
.filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
162-
.chain(dirty_metadata_paths(pkg, repo)?.iter())
222+
.chain(dirty_metadata_paths(pkg, cache)?.iter())
163223
.map(|path| {
164224
pathdiff::diff_paths(path, cwd)
165225
.as_ref()
@@ -172,10 +232,10 @@ fn git(
172232
if !dirty || opts.allow_dirty {
173233
// Must check whetherthe repo has no commit firstly, otherwise `revparse_single` would fail on bare commit repo.
174234
// Due to lacking the `sha1` field, it's better not record the `GitVcsInfo` for consistency.
175-
if repo.is_empty()? {
235+
if cache.repo.is_empty()? {
176236
return Ok(None);
177237
}
178-
let rev_obj = repo.revparse_single("HEAD")?;
238+
let rev_obj = cache.repo.revparse_single("HEAD")?;
179239
Ok(Some(GitVcsInfo {
180240
sha1: rev_obj.id().to_string(),
181241
dirty,
@@ -198,9 +258,8 @@ fn git(
198258
/// This is required because those paths may link to a file outside the
199259
/// current package root, but still under the git workdir, affecting the
200260
/// final packaged `.crate` file.
201-
fn dirty_metadata_paths(pkg: &Package, repo: &git2::Repository) -> CargoResult<Vec<PathBuf>> {
261+
fn dirty_metadata_paths(pkg: &Package, repo: &mut RepoStatusCache) -> CargoResult<Vec<PathBuf>> {
202262
let mut dirty_files = Vec::new();
203-
let workdir = repo.workdir().unwrap();
204263
let root = pkg.root();
205264
let meta = pkg.manifest().metadata();
206265
for path in [&meta.license_file, &meta.readme] {
@@ -212,12 +271,12 @@ fn dirty_metadata_paths(pkg: &Package, repo: &git2::Repository) -> CargoResult<V
212271
// Inside package root. Don't bother checking git status.
213272
continue;
214273
}
215-
if let Ok(rel_path) = paths::strip_prefix_canonical(abs_path.as_path(), workdir) {
274+
if let Ok(rel_path) = paths::strip_prefix_canonical(abs_path.as_path(), repo.workdir()) {
216275
// Outside package root but under git workdir,
217276
if repo.status_file(&rel_path)? != git2::Status::CURRENT {
218277
dirty_files.push(if abs_path.is_symlink() {
219278
// For symlinks, shows paths to symlink sources
220-
workdir.join(rel_path)
279+
repo.workdir().join(rel_path)
221280
} else {
222281
abs_path
223282
});

tests/testsuite/package.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,7 @@ fn vcs_status_check_for_each_workspace_member() {
11851185
"#,
11861186
)
11871187
.file("hobbit", "...")
1188+
.file("README.md", "")
11881189
.file(
11891190
"isengard/Cargo.toml",
11901191
r#"
@@ -1194,6 +1195,7 @@ fn vcs_status_check_for_each_workspace_member() {
11941195
homepage = "saruman"
11951196
description = "saruman"
11961197
license = "MIT"
1198+
readme = "../README.md"
11971199
"#,
11981200
)
11991201
.file("isengard/src/lib.rs", "")
@@ -1206,6 +1208,7 @@ fn vcs_status_check_for_each_workspace_member() {
12061208
homepage = "sauron"
12071209
description = "sauron"
12081210
license = "MIT"
1211+
readme = "../README.md"
12091212
"#,
12101213
)
12111214
.file("mordor/src/lib.rs", "")
@@ -1228,10 +1231,15 @@ fn vcs_status_check_for_each_workspace_member() {
12281231

12291232
// Ensure dirty files be reported only for one affected package.
12301233
p.cargo("package --workspace --no-verify")
1234+
.env("CARGO_LOG", "cargo_package_vcs_cache=trace")
12311235
.with_status(101)
12321236
.with_stderr_data(str![[r#"
1237+
[..] TRACE cargo_package_vcs_cache: git status cache miss for `isengard/Cargo.toml` at workdir `[ROOT]/foo/`
1238+
[..] TRACE cargo_package_vcs_cache: git status cache miss for `README.md` at workdir `[ROOT]/foo/`
12331239
[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard)
1234-
[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
1240+
[PACKAGED] 6 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
1241+
[..] TRACE cargo_package_vcs_cache: git status cache miss for `mordor/Cargo.toml` at workdir `[ROOT]/foo/`
1242+
[..] TRACE cargo_package_vcs_cache: git status cache hit for `README.md` at workdir `[ROOT]/foo/`
12351243
[ERROR] 2 files in the working directory contain changes that were not yet committed into git:
12361244
12371245
mordor/src/lib.rs
@@ -1246,9 +1254,9 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d
12461254
p.cargo("package --workspace --no-verify --allow-dirty")
12471255
.with_stderr_data(str![[r#"
12481256
[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard)
1249-
[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
1250-
[PACKAGING] mordor v0.0.0 ([ROOT]/foo/mordor)
12511257
[PACKAGED] 6 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
1258+
[PACKAGING] mordor v0.0.0 ([ROOT]/foo/mordor)
1259+
[PACKAGED] 7 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
12521260
12531261
"#]])
12541262
.run();
@@ -1263,6 +1271,7 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d
12631271
"Cargo.toml.orig",
12641272
"src/lib.rs",
12651273
"Cargo.lock",
1274+
"README.md",
12661275
],
12671276
[(
12681277
".cargo_vcs_info.json",
@@ -1290,6 +1299,7 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d
12901299
"src/lib.rs",
12911300
"src/main.rs",
12921301
"Cargo.lock",
1302+
"README.md",
12931303
],
12941304
[(
12951305
".cargo_vcs_info.json",

0 commit comments

Comments
 (0)