@@ -9,9 +9,9 @@ use core::arch::x86::*;
9
9
#[ cfg( target_arch = "x86_64" ) ]
10
10
use core:: arch:: x86_64:: * ;
11
11
12
- use crate :: consts:: { K64 , K64X4 } ;
12
+ use crate :: consts:: K64 ;
13
13
14
- cpufeatures:: new!( avx2_cpuid, "avx" , " avx2" , "sse2" , "sse3 ") ;
14
+ cpufeatures:: new!( avx2_cpuid, "avx2" ) ;
15
15
16
16
pub fn compress ( state : & mut [ u64 ; 8 ] , blocks : & [ [ u8 ; 128 ] ] ) {
17
17
// TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
@@ -25,7 +25,7 @@ pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
25
25
}
26
26
}
27
27
28
- #[ target_feature( enable = "avx, avx2,sse2,sse3 " ) ]
28
+ #[ target_feature( enable = "avx2" ) ]
29
29
unsafe fn sha512_compress_x86_64_avx2 ( state : & mut [ u64 ; 8 ] , blocks : & [ [ u8 ; 128 ] ] ) {
30
30
let mut start_block = 0 ;
31
31
@@ -110,10 +110,9 @@ unsafe fn load_data_avx2(
110
110
x[ $i] = _mm256_insertf128_si256( x[ $i] , _mm_loadu_si128( data. add( $i + 1 ) as * const _) , 0 ) ;
111
111
112
112
x[ $i] = _mm256_shuffle_epi8( x[ $i] , MASK ) ;
113
- let y = _mm256_add_epi64(
114
- x[ $i] ,
115
- _mm256_loadu_si256( & K64X4 [ 4 * $i] as * const u64 as * const _) ,
116
- ) ;
113
+
114
+ let t = _mm_loadu_si128( K64 . as_ptr( ) . add( $i * 2 ) as * const u64 as * const _) ;
115
+ let y = _mm256_add_epi64( x[ $i] , _mm256_set_m128i( t, t) ) ;
117
116
118
117
_mm_store_si128(
119
118
& mut ms[ 2 * $i] as * mut u64 as * mut _,
@@ -135,7 +134,8 @@ unsafe fn rounds_0_63_avx(current_state: &mut State, x: &mut [__m128i; 8], ms: &
135
134
136
135
for _ in 0 ..4 {
137
136
for j in 0 ..8 {
138
- let y = sha512_update_x_avx ( x, & K64 [ k64_idx] as * const u64 as * const _ ) ;
137
+ let k64 = _mm_loadu_si128 ( & K64 [ k64_idx] as * const u64 as * const _ ) ;
138
+ let y = sha512_update_x_avx ( x, k64) ;
139
139
140
140
sha_round ( current_state, ms[ 2 * j] ) ;
141
141
sha_round ( current_state, ms[ 2 * j + 1 ] ) ;
@@ -153,11 +153,12 @@ unsafe fn rounds_0_63_avx2(
153
153
ms : & mut MsgSchedule ,
154
154
t2 : & mut RoundStates ,
155
155
) {
156
- let mut k64x4_idx: usize = 2 * SHA512_BLOCK_WORDS_NUM ;
156
+ let mut k64x4_idx: usize = SHA512_BLOCK_WORDS_NUM ;
157
157
158
158
for i in 1 ..5 {
159
159
for j in 0 ..8 {
160
- let y = sha512_update_x_avx2 ( x, & K64X4 [ k64x4_idx] as * const u64 as * const _ ) ;
160
+ let t = _mm_loadu_si128 ( K64 . as_ptr ( ) . add ( k64x4_idx) as * const u64 as * const _ ) ;
161
+ let y = sha512_update_x_avx2 ( x, _mm256_set_m128i ( t, t) ) ;
161
162
162
163
sha_round ( current_state, ms[ 2 * j] ) ;
163
164
sha_round ( current_state, ms[ 2 * j + 1 ] ) ;
@@ -171,7 +172,7 @@ unsafe fn rounds_0_63_avx2(
171
172
_mm256_extracti128_si256 ( y, 1 ) ,
172
173
) ;
173
174
174
- k64x4_idx += 4 ;
175
+ k64x4_idx += 2 ;
175
176
}
176
177
}
177
178
}
@@ -249,14 +250,13 @@ unsafe fn accumulate_state(dst: &mut State, src: &State) {
249
250
250
251
macro_rules! fn_sha512_update_x {
251
252
( $name: ident, $ty: ident, {
252
- LOAD = $LOAD: ident,
253
253
ADD64 = $ADD64: ident,
254
254
ALIGNR8 = $ALIGNR8: ident,
255
255
SRL64 = $SRL64: ident,
256
256
SLL64 = $SLL64: ident,
257
257
XOR = $XOR: ident,
258
258
} ) => {
259
- unsafe fn $name( x: & mut [ $ty; 8 ] , k64_p : * const $ty) -> $ty {
259
+ unsafe fn $name( x: & mut [ $ty; 8 ] , k64 : $ty) -> $ty {
260
260
// q[2:1]
261
261
let mut t0 = $ALIGNR8( x[ 1 ] , x[ 0 ] , 8 ) ;
262
262
// q[10:9]
@@ -320,13 +320,12 @@ macro_rules! fn_sha512_update_x {
320
320
x[ 6 ] = x[ 7 ] ;
321
321
x[ 7 ] = temp;
322
322
323
- $ADD64( x[ 7 ] , $LOAD ( k64_p ) )
323
+ $ADD64( x[ 7 ] , k64 )
324
324
}
325
325
} ;
326
326
}
327
327
328
328
fn_sha512_update_x ! ( sha512_update_x_avx, __m128i, {
329
- LOAD = _mm_loadu_si128,
330
329
ADD64 = _mm_add_epi64,
331
330
ALIGNR8 = _mm_alignr_epi8,
332
331
SRL64 = _mm_srli_epi64,
@@ -335,7 +334,6 @@ fn_sha512_update_x!(sha512_update_x_avx, __m128i, {
335
334
} ) ;
336
335
337
336
fn_sha512_update_x ! ( sha512_update_x_avx2, __m256i, {
338
- LOAD = _mm256_loadu_si256,
339
337
ADD64 = _mm256_add_epi64,
340
338
ALIGNR8 = _mm256_alignr_epi8,
341
339
SRL64 = _mm256_srli_epi64,
0 commit comments