1
1
use std:: io;
2
+ use std:: sync:: atomic:: Ordering ;
2
3
3
4
use anyhow:: bail;
4
5
@@ -50,6 +51,8 @@ pub mod statistics {
50
51
pub struct Options {
51
52
pub format : OutputFormat ,
52
53
pub thread_limit : Option < usize > ,
54
+ /// A debug-flag that triggers looking up the headers of all objects again, but without indices preloaded
55
+ pub extra_header_lookup : bool ,
53
56
}
54
57
}
55
58
@@ -59,7 +62,11 @@ pub fn statistics(
59
62
mut progress : impl gix:: Progress ,
60
63
out : impl io:: Write ,
61
64
mut err : impl io:: Write ,
62
- statistics:: Options { format, thread_limit } : statistics:: Options ,
65
+ statistics:: Options {
66
+ format,
67
+ thread_limit,
68
+ extra_header_lookup,
69
+ } : statistics:: Options ,
63
70
) -> anyhow:: Result < ( ) > {
64
71
use bytesize:: ByteSize ;
65
72
use gix:: odb:: { find, HeaderExt } ;
@@ -76,6 +83,10 @@ pub fn statistics(
76
83
#[ cfg_attr( feature = "serde" , derive( serde:: Serialize ) ) ]
77
84
#[ derive( Default ) ]
78
85
struct Statistics {
86
+ /// All objects that were used to produce these statistics.
87
+ /// Only `Some` if we are doing an extra round of header queries on a repository without loaded indices.
88
+ #[ cfg_attr( feature = "serde" , serde( skip_serializing) ) ]
89
+ ids : Option < Vec < gix:: ObjectId > > ,
79
90
total_objects : usize ,
80
91
loose_objects : usize ,
81
92
packed_objects : usize ,
@@ -135,14 +146,17 @@ pub fn statistics(
135
146
}
136
147
137
148
impl gix:: parallel:: Reduce for Reduce {
138
- type Input = Result < Vec < gix:: odb:: find:: Header > , anyhow:: Error > ;
149
+ type Input = Result < Vec < ( gix:: ObjectId , gix :: odb:: find:: Header ) > , anyhow:: Error > ;
139
150
type FeedProduce = ( ) ;
140
151
type Output = Statistics ;
141
152
type Error = anyhow:: Error ;
142
153
143
154
fn feed ( & mut self , items : Self :: Input ) -> Result < Self :: FeedProduce , Self :: Error > {
144
- for item in items? {
155
+ for ( id , item) in items? {
145
156
self . stats . consume ( item) ;
157
+ if let Some ( ids) = self . stats . ids . as_mut ( ) {
158
+ ids. push ( id) ;
159
+ }
146
160
}
147
161
Ok ( ( ) )
148
162
}
@@ -154,9 +168,9 @@ pub fn statistics(
154
168
}
155
169
156
170
let cancelled = || anyhow:: anyhow!( "Cancelled by user" ) ;
157
- let object_ids = repo. objects . store_ref ( ) . iter ( ) ?. filter_map ( Result :: ok) ;
171
+ let object_ids = repo. objects . iter ( ) ?. filter_map ( Result :: ok) ;
158
172
let chunk_size = 1_000 ;
159
- let stats = if gix:: parallel:: num_threads ( thread_limit) > 1 {
173
+ let mut stats = if gix:: parallel:: num_threads ( thread_limit) > 1 {
160
174
gix:: parallel:: in_parallel (
161
175
gix:: interrupt:: Iter :: new (
162
176
gix:: features:: iter:: Chunks {
@@ -166,19 +180,30 @@ pub fn statistics(
166
180
cancelled,
167
181
) ,
168
182
thread_limit,
169
- move |_| ( repo. objects . clone ( ) . into_inner ( ) , counter) ,
183
+ {
184
+ let objects = repo. objects . clone ( ) ;
185
+ move |_| ( objects. clone ( ) . into_inner ( ) , counter)
186
+ } ,
170
187
|ids, ( handle, counter) | {
171
188
let ids = ids?;
172
- counter. fetch_add ( ids. len ( ) , std :: sync :: atomic :: Ordering :: Relaxed ) ;
189
+ counter. fetch_add ( ids. len ( ) , Ordering :: Relaxed ) ;
173
190
let out = ids
174
191
. into_iter ( )
175
- . map ( |id| handle. header ( id) )
192
+ . map ( |id| handle. header ( id) . map ( |hdr| ( id , hdr ) ) )
176
193
. collect :: < Result < Vec < _ > , _ > > ( ) ?;
177
194
Ok ( out)
178
195
} ,
179
- Reduce :: default ( ) ,
196
+ Reduce {
197
+ stats : Statistics {
198
+ ids : extra_header_lookup. then ( Vec :: new) ,
199
+ ..Default :: default ( )
200
+ } ,
201
+ } ,
180
202
) ?
181
203
} else {
204
+ if extra_header_lookup {
205
+ bail ! ( "extra-header-lookup is only meaningful in threaded mode" ) ;
206
+ }
182
207
let mut stats = Statistics :: default ( ) ;
183
208
184
209
for ( count, id) in object_ids. enumerate ( ) {
@@ -193,6 +218,40 @@ pub fn statistics(
193
218
194
219
progress. show_throughput ( start) ;
195
220
221
+ if let Some ( mut ids) = stats. ids . take ( ) {
222
+ // Critical to re-open the repo to assure we don't have any ODB state and start fresh.
223
+ let start = std:: time:: Instant :: now ( ) ;
224
+ let repo = gix:: open_opts ( repo. git_dir ( ) , repo. open_options ( ) . to_owned ( ) ) ?;
225
+ progress. set_name ( "re-counting" . into ( ) ) ;
226
+ progress. init ( Some ( ids. len ( ) ) , gix:: progress:: count ( "objects" ) ) ;
227
+ let counter = progress. counter ( ) ;
228
+ counter. store ( 0 , Ordering :: Relaxed ) ;
229
+ dbg ! ( "starting" ) ;
230
+ let errors = gix:: parallel:: in_parallel_with_slice (
231
+ & mut ids,
232
+ thread_limit,
233
+ {
234
+ let objects = repo. objects . clone ( ) ;
235
+ move |_| ( objects. clone ( ) . into_inner ( ) , counter, false )
236
+ } ,
237
+ |id, ( odb, counter, has_error) , _threads_left, _stop_everything| -> anyhow:: Result < ( ) > {
238
+ counter. fetch_add ( 1 , Ordering :: Relaxed ) ;
239
+ if let Err ( _err) = odb. header ( id) {
240
+ * has_error = true ;
241
+ gix:: trace:: error!( err = ?_err, "Object that is known to be present wasn't found" ) ;
242
+ }
243
+ Ok ( ( ) )
244
+ } ,
245
+ || Some ( std:: time:: Duration :: from_millis ( 100 ) ) ,
246
+ |( _, _, has_error) | has_error,
247
+ ) ?;
248
+
249
+ progress. show_throughput ( start) ;
250
+ if errors. contains ( & true ) {
251
+ bail ! ( "At least one object couldn't be looked up even though it must exist" ) ;
252
+ }
253
+ }
254
+
196
255
#[ cfg( feature = "serde" ) ]
197
256
{
198
257
serde_json:: to_writer_pretty ( out, & stats) ?;
0 commit comments