1
1
#include <stdio.h>
2
2
#include <string.h>
3
3
#include <getopt.h>
4
- #include <openssl/sha.h>
5
4
#include <assert.h>
6
5
#include <inttypes.h>
6
+ #include <xxh3.h>
7
7
#include "mpi.h"
8
8
#include "dtcmp.h"
9
9
#include "mfu.h"
10
10
#include "list.h"
11
11
12
12
/* number of uint64_t values in our key
13
- * 1 for group ID + (SHA256_DIGEST_LENGTH / 8) */
14
- #define DDUP_KEY_SIZE 5
15
-
13
+ * 1 for group ID + (XXHASH_DIGEST_LENGTH / 8) */
14
+ #define DDUP_KEY_SIZE 2
15
+ /*XXH3_64 output is a hexadecimal representation of an unsigned 64 bit integer*/
16
+ #define XXH3_DIGEST_LENGTH 8
16
17
/* amount of data to read in order to compute hash */
17
- #define DDUP_CHUNK_SIZE 1048576
18
+ #define DDUP_CHUNK_SIZE 4096
18
19
19
20
/* Print a usage message */
20
21
static void print_usage (void )
@@ -35,11 +36,11 @@ static void print_usage(void)
35
36
/* create MPI datatypes for key and key and satellite data */
36
37
static void mpi_type_init (MPI_Datatype * key , MPI_Datatype * keysat )
37
38
{
38
- assert (SHA256_DIGEST_LENGTH == (DDUP_KEY_SIZE - 1 ) * 8 );
39
+ assert (XXH3_DIGEST_LENGTH == (DDUP_KEY_SIZE - 1 ) * 8 );
39
40
40
41
/*
41
42
* Build MPI datatype for key.
42
- * 1 for group ID + (SHA256_DIGEST_LENGTH / 8)
43
+ * 1 for group ID + (XXH3_DIGEST_LENGTH / 8)
43
44
*/
44
45
MPI_Type_contiguous (DDUP_KEY_SIZE , MPI_UINT64_T , key );
45
46
MPI_Type_commit (key );
@@ -141,14 +142,14 @@ static int read_data(const char* fname, char* chunk_buf, uint64_t chunk_id,
141
142
}
142
143
143
144
struct file_item {
144
- SHA256_CTX ctx ;
145
+ XXH3_state_t state ;
145
146
};
146
147
147
- /* print SHA256 value to stdout */
148
- static void dump_sha256_digest (char * digest_string , unsigned char digest [])
148
+ /* print XXH3 value to stdout */
149
+ static void dump_xxh3_digest (char * digest_string , unsigned char digest [])
149
150
{
150
151
int i ;
151
- for (i = 0 ; i < SHA256_DIGEST_LENGTH ; i ++ ) {
152
+ for (i = 0 ; i < XXH3_DIGEST_LENGTH ; i ++ ) {
152
153
sprintf (& digest_string [i * 2 ], "%02x" , (unsigned int )digest [i ]);
153
154
}
154
155
}
@@ -161,7 +162,7 @@ int main(int argc, char** argv)
161
162
162
163
uint64_t chunk_size = DDUP_CHUNK_SIZE ;
163
164
164
- SHA256_CTX * ctx_ptr ;
165
+ XXH3_state_t * state_ptr ;
165
166
166
167
MPI_Init (NULL , NULL );
167
168
mfu_init ();
@@ -310,8 +311,8 @@ int main(int argc, char** argv)
310
311
/* get local number of items in flist */
311
312
uint64_t checking_files = mfu_flist_size (flist );
312
313
313
- /* allocate memory to hold SHA256 context values */
314
- struct file_item * file_items = (struct file_item * ) MFU_MALLOC (checking_files * sizeof (* file_items ));
314
+ /* allocate memory to hold XXH3 context values */
315
+ struct file_item * file_items = (struct file_item * ) XXH_alignedMalloc (checking_files * sizeof (* file_items ), 128 );
315
316
316
317
/* Allocate two lists of length size, where each
317
318
* element has (DDUP_KEY_SIZE + 1) uint64_t values
@@ -346,8 +347,9 @@ int main(int argc, char** argv)
346
347
/* record our index in flist */
347
348
ptr [DDUP_KEY_SIZE ] = i ;
348
349
349
- /* initialize the SHA256 hash state for this file */
350
- SHA256_Init (& file_items [i ].ctx );
350
+ /* initialize the XXH3 hash state for this file */
351
+ XXH3_INITSTATE (& file_items [i ].state );
352
+ XXH3_64bits_reset (& file_items [i ].state );
351
353
352
354
/* increment our file count */
353
355
new_checking_files ++ ;
@@ -376,7 +378,7 @@ int main(int argc, char** argv)
376
378
/* update the chunk id we'll read from all files */
377
379
chunk_id ++ ;
378
380
379
- /* iterate over our list and compute SHA256 value for each */
381
+ /* iterate over our list and compute XXH3 value for each */
380
382
ptr = list ;
381
383
for (i = 0 ; i < checking_files ; i ++ ) {
382
384
/* get the flist index for this item */
@@ -399,18 +401,14 @@ int main(int argc, char** argv)
399
401
"process" , fname );
400
402
}
401
403
402
- /* update the SHA256 context for this file */
403
- ctx_ptr = & file_items [idx ].ctx ;
404
- SHA256_Update ( ctx_ptr , chunk_buf , data_size );
404
+ /* update the XXH3 context for this file */
405
+ state_ptr = & file_items [idx ].state ;
406
+ XXH3_64bits_update ( state_ptr , chunk_buf , data_size );
405
407
406
408
/*
407
- * Use SHA256 value as key.
408
- * This is actually an hack, but SHA256_Final can't
409
- * be called multiple times with out changing ctx
409
+ * Use XXH3 digest as key.
410
410
*/
411
- SHA256_CTX ctx_tmp ;
412
- memcpy (& ctx_tmp , ctx_ptr , sizeof (ctx_tmp ));
413
- SHA256_Final ((unsigned char * )(ptr + 1 ), & ctx_tmp );
411
+ XXH64_hash_t result = XXH3_64bits_digest (state_ptr );
414
412
415
413
/* move on to next file in the list */
416
414
ptr += DDUP_KEY_SIZE + 1 ;
@@ -441,8 +439,8 @@ int main(int argc, char** argv)
441
439
/* look up file size */
442
440
file_size = mfu_flist_file_get_size (flist , idx );
443
441
444
- /* get a pointer to the SHA256 context for this file */
445
- ctx_ptr = & file_items [idx ].ctx ;
442
+ /* get a pointer to the XXH3 context for this file */
443
+ state_ptr = & file_items [idx ].state ;
446
444
447
445
if (group_ranks [i ] == 1 ) {
448
446
/*
@@ -457,11 +455,11 @@ int main(int argc, char** argv)
457
455
* duplicate with other files that also have
458
456
* matching group_id[i]
459
457
*/
460
- unsigned char digest [ SHA256_DIGEST_LENGTH ] ;
461
- SHA256_Final ( digest , ctx_ptr ) ;
462
-
463
- char digest_string [SHA256_DIGEST_LENGTH * 2 + 1 ];
464
- dump_sha256_digest (digest_string , digest );
458
+ XXH64_hash_t digest = XXH3_64bits_digest ( state_ptr ) ;
459
+ XXH64_canonical_t digest_canon ;
460
+ XXH64_canonicalFromHash ( & digest_canon , digest );
461
+ char digest_string [XXH3_DIGEST_LENGTH ];
462
+ dump_xxh3_digest (digest_string , digest_canon . digest );
465
463
printf ("%s %s\n" , fname , digest_string );
466
464
} else {
467
465
/* Have multiple files with the same checksum,
@@ -519,7 +517,7 @@ int main(int argc, char** argv)
519
517
mfu_free (& group_id );
520
518
mfu_free (& new_list );
521
519
mfu_free (& list );
522
- mfu_free (& file_items );
520
+ XXH_alignedFree (& file_items );
523
521
mfu_free (& chunk_buf );
524
522
mfu_flist_free (& flist );
525
523
0 commit comments