Skip to content

tree -> index diff for status #1363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 14, 2024
4 changes: 2 additions & 2 deletions .github/workflows/msrv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ jobs:
- uses: actions/checkout@v4
- uses: extractions/setup-just@v2
- run: |
rustup toolchain install $rust_version --profile minimal --no-self-update
rustup default $rust_version
rustup toolchain install ${{ env.rust_version }} --profile minimal --no-self-update
rustup default ${{ env.rust_version }}
- run: just ci-check-msrv
76 changes: 67 additions & 9 deletions gitoxide-core/src/repository/odb.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::io;
use std::sync::atomic::Ordering;

use anyhow::bail;

Expand Down Expand Up @@ -50,6 +51,8 @@ pub mod statistics {
pub struct Options {
pub format: OutputFormat,
pub thread_limit: Option<usize>,
/// A debug-flag that triggers looking up the headers of all objects again, but without indices preloaded
pub extra_header_lookup: bool,
}
}

Expand All @@ -59,7 +62,11 @@ pub fn statistics(
mut progress: impl gix::Progress,
out: impl io::Write,
mut err: impl io::Write,
statistics::Options { format, thread_limit }: statistics::Options,
statistics::Options {
format,
thread_limit,
extra_header_lookup,
}: statistics::Options,
) -> anyhow::Result<()> {
use bytesize::ByteSize;
use gix::odb::{find, HeaderExt};
Expand All @@ -76,6 +83,10 @@ pub fn statistics(
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Default)]
struct Statistics {
/// All objects that were used to produce these statistics.
/// Only `Some` if we are doing an extra round of header queries on a repository without loaded indices.
#[cfg_attr(feature = "serde", serde(skip_serializing))]
ids: Option<Vec<gix::ObjectId>>,
total_objects: usize,
loose_objects: usize,
packed_objects: usize,
Expand Down Expand Up @@ -135,14 +146,17 @@ pub fn statistics(
}

impl gix::parallel::Reduce for Reduce {
type Input = Result<Vec<gix::odb::find::Header>, anyhow::Error>;
type Input = Result<Vec<(gix::ObjectId, gix::odb::find::Header)>, anyhow::Error>;
type FeedProduce = ();
type Output = Statistics;
type Error = anyhow::Error;

fn feed(&mut self, items: Self::Input) -> Result<Self::FeedProduce, Self::Error> {
for item in items? {
for (id, item) in items? {
self.stats.consume(item);
if let Some(ids) = self.stats.ids.as_mut() {
ids.push(id);
}
}
Ok(())
}
Expand All @@ -154,9 +168,9 @@ pub fn statistics(
}

let cancelled = || anyhow::anyhow!("Cancelled by user");
let object_ids = repo.objects.store_ref().iter()?.filter_map(Result::ok);
let object_ids = repo.objects.iter()?.filter_map(Result::ok);
let chunk_size = 1_000;
let stats = if gix::parallel::num_threads(thread_limit) > 1 {
let mut stats = if gix::parallel::num_threads(thread_limit) > 1 {
gix::parallel::in_parallel(
gix::interrupt::Iter::new(
gix::features::iter::Chunks {
Expand All @@ -166,19 +180,30 @@ pub fn statistics(
cancelled,
),
thread_limit,
move |_| (repo.objects.clone().into_inner(), counter),
{
let objects = repo.objects.clone();
move |_| (objects.clone().into_inner(), counter)
},
|ids, (handle, counter)| {
let ids = ids?;
counter.fetch_add(ids.len(), std::sync::atomic::Ordering::Relaxed);
counter.fetch_add(ids.len(), Ordering::Relaxed);
let out = ids
.into_iter()
.map(|id| handle.header(id))
.map(|id| handle.header(id).map(|hdr| (id, hdr)))
.collect::<Result<Vec<_>, _>>()?;
Ok(out)
},
Reduce::default(),
Reduce {
stats: Statistics {
ids: extra_header_lookup.then(Vec::new),
..Default::default()
},
},
)?
} else {
if extra_header_lookup {
bail!("extra-header-lookup is only meaningful in threaded mode");
}
let mut stats = Statistics::default();

for (count, id) in object_ids.enumerate() {
Expand All @@ -193,6 +218,39 @@ pub fn statistics(

progress.show_throughput(start);

if let Some(mut ids) = stats.ids.take() {
// Critical to re-open the repo to assure we don't have any ODB state and start fresh.
let start = std::time::Instant::now();
let repo = gix::open_opts(repo.git_dir(), repo.open_options().to_owned())?;
progress.set_name("re-counting".into());
progress.init(Some(ids.len()), gix::progress::count("objects"));
let counter = progress.counter();
counter.store(0, Ordering::Relaxed);
let errors = gix::parallel::in_parallel_with_slice(
&mut ids,
thread_limit,
{
let objects = repo.objects.clone();
move |_| (objects.clone().into_inner(), counter, false)
},
|id, (odb, counter, has_error), _threads_left, _stop_everything| -> anyhow::Result<()> {
counter.fetch_add(1, Ordering::Relaxed);
if let Err(_err) = odb.header(id) {
*has_error = true;
gix::trace::error!(err = ?_err, "Object that is known to be present wasn't found");
}
Ok(())
},
|| Some(std::time::Duration::from_millis(100)),
|(_, _, has_error)| has_error,
)?;

progress.show_throughput(start);
if errors.contains(&true) {
bail!("At least one object couldn't be looked up even though it must exist");
}
}

#[cfg(feature = "serde")]
{
serde_json::to_writer_pretty(out, &stats)?;
Expand Down
2 changes: 1 addition & 1 deletion gix-config/tests/mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::alloc;
use std::time::Instant;

#[global_allocator]
static ALLOCATOR: Cap<alloc::System> = Cap::new(alloc::System, usize::max_value());
static ALLOCATOR: Cap<alloc::System> = Cap::new(alloc::System, usize::MAX);

#[test]
fn usage() {
Expand Down
22 changes: 16 additions & 6 deletions gix-date/src/time/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ use crate::{time::Sign, Time};
/// Serialization with standard `git` format
impl Time {
/// Serialize this instance into memory, similar to what [`write_to()`][Self::write_to()] would do with arbitrary `Write` implementations.
///
/// # Panics
///
/// If the underlying call fails as this instance can't be represented, typically due to an invalid offset.
pub fn to_bstring(&self) -> BString {
let mut buf = Vec::with_capacity(64);
self.write_to(&mut buf).expect("write to memory cannot fail");
Expand All @@ -13,6 +17,18 @@ impl Time {

/// Serialize this instance to `out` in a format suitable for use in header fields of serialized git commits or tags.
pub fn write_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> {
const SECONDS_PER_HOUR: u32 = 60 * 60;
let offset = self.offset.unsigned_abs();
let hours = offset / SECONDS_PER_HOUR;
let minutes = (offset - (hours * SECONDS_PER_HOUR)) / 60;

if hours > 99 {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
"Cannot represent offsets larger than +-9900",
));
}

let mut itoa = itoa::Buffer::new();
out.write_all(itoa.format(self.seconds).as_bytes())?;
out.write_all(b" ")?;
Expand All @@ -23,12 +39,6 @@ impl Time {

const ZERO: &[u8; 1] = b"0";

const SECONDS_PER_HOUR: u32 = 60 * 60;
let offset = self.offset.unsigned_abs();
let hours = offset / SECONDS_PER_HOUR;
assert!(hours < 25, "offset is more than a day: {hours}");
let minutes = (offset - (hours * SECONDS_PER_HOUR)) / 60;

if hours < 10 {
out.write_all(ZERO)?;
}
Expand Down
162 changes: 109 additions & 53 deletions gix-date/tests/time/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use bstr::ByteSlice;
use gix_date::{time::Sign, SecondsSinceUnixEpoch, Time};
use gix_date::Time;

mod baseline;
mod format;
Expand Down Expand Up @@ -32,57 +31,114 @@ fn is_set() {
.is_set());
}

#[test]
fn write_to() -> Result<(), Box<dyn std::error::Error>> {
for (time, expected) in [
(
Time {
seconds: SecondsSinceUnixEpoch::MAX,
offset: 0,
sign: Sign::Minus,
},
"9223372036854775807 -0000",
),
(
Time {
seconds: SecondsSinceUnixEpoch::MIN,
offset: 0,
sign: Sign::Minus,
},
"-9223372036854775808 -0000",
),
(
Time {
seconds: 500,
offset: 9000,
sign: Sign::Plus,
},
"500 +0230",
),
(
Time {
seconds: 189009009,
offset: -36000,
sign: Sign::Minus,
},
"189009009 -1000",
),
(
Time {
seconds: 0,
offset: 0,
sign: Sign::Minus,
},
"0 -0000",
),
] {
let mut output = Vec::new();
time.write_to(&mut output)?;
assert_eq!(output.as_bstr(), expected);
assert_eq!(time.size(), output.len());
mod write_to {
use bstr::ByteSlice;
use gix_date::time::Sign;
use gix_date::{SecondsSinceUnixEpoch, Time};

#[test]
fn invalid() {
let time = Time {
seconds: 0,
offset: (100 * 60 * 60) + 30 * 60,
sign: Sign::Plus,
};
let err = time.write_to(&mut Vec::new()).unwrap_err();
assert_eq!(err.to_string(), "Cannot represent offsets larger than +-9900");
}

let actual = gix_date::parse(&output.as_bstr().to_string(), None).expect("round-trippable");
assert_eq!(time, actual);
#[test]
fn valid_roundtrips() -> Result<(), Box<dyn std::error::Error>> {
for (time, expected) in [
(
Time {
seconds: SecondsSinceUnixEpoch::MAX,
offset: 0,
sign: Sign::Minus,
},
"9223372036854775807 -0000",
),
(
Time {
seconds: SecondsSinceUnixEpoch::MIN,
offset: 0,
sign: Sign::Minus,
},
"-9223372036854775808 -0000",
),
(
Time {
seconds: 500,
offset: 9000,
sign: Sign::Plus,
},
"500 +0230",
),
(
Time {
seconds: 189009009,
offset: -36000,
sign: Sign::Minus,
},
"189009009 -1000",
),
(
Time {
seconds: 0,
offset: 0,
sign: Sign::Minus,
},
"0 -0000",
),
(
Time {
seconds: 0,
offset: -24 * 60 * 60,
sign: Sign::Minus,
},
"0 -2400",
),
(
Time {
seconds: 0,
offset: 24 * 60 * 60,
sign: Sign::Plus,
},
"0 +2400",
),
(
Time {
seconds: 0,
offset: (25 * 60 * 60) + 30 * 60,
sign: Sign::Plus,
},
"0 +2530",
),
(
Time {
seconds: 0,
offset: (-25 * 60 * 60) - 30 * 60,
sign: Sign::Minus,
},
"0 -2530",
),
(
Time {
seconds: 0,
offset: (99 * 60 * 60) + 59 * 60,
sign: Sign::Plus,
},
"0 +9959",
),
] {
let mut output = Vec::new();
time.write_to(&mut output)?;
assert_eq!(output.as_bstr(), expected);
assert_eq!(time.size(), output.len());

let actual = gix_date::parse(&output.as_bstr().to_string(), None).expect("round-trippable");
assert_eq!(time, actual);
}
Ok(())
}
Ok(())
}
Loading
Loading