Skip to content

Commit bdeaae3

Browse files
committed
Fix disjoint t[] and f[] when using SIMD implementations
1 parent 600e2a8 commit bdeaae3

File tree

4 files changed

+37
-37
lines changed

4 files changed

+37
-37
lines changed

blake2.cpp

+22-22
Original file line numberDiff line numberDiff line change
@@ -455,12 +455,12 @@ void BLAKE2s::Restart(const BLAKE2s_ParameterBlock& block, const word32 counter[
455455
}
456456

457457
State& state = *m_state.data();
458-
state.t[0] = state.t[1] = 0, state.f[0] = state.f[1] = 0, state.length = 0;
458+
state.tf[0] = state.tf[1] = 0, state.tf[2] = state.tf[3] = 0, state.length = 0;
459459

460460
if (counter != NULLPTR)
461461
{
462-
state.t[0] = counter[0];
463-
state.t[1] = counter[1];
462+
state.tf[0] = counter[0];
463+
state.tf[1] = counter[1];
464464
}
465465

466466
const word32* iv = BLAKE2S_IV;
@@ -486,12 +486,12 @@ void BLAKE2b::Restart(const BLAKE2b_ParameterBlock& block, const word64 counter[
486486
}
487487

488488
State& state = *m_state.data();
489-
state.t[0] = state.t[1] = 0, state.f[0] = state.f[1] = 0, state.length = 0;
489+
state.tf[0] = state.tf[1] = 0, state.tf[2] = state.tf[3] = 0, state.length = 0;
490490

491491
if (counter != NULLPTR)
492492
{
493-
state.t[0] = counter[0];
494-
state.t[1] = counter[1];
493+
state.tf[0] = counter[0];
494+
state.tf[1] = counter[1];
495495
}
496496

497497
const word64* iv = BLAKE2B_IV;
@@ -584,11 +584,11 @@ void BLAKE2s::TruncatedFinal(byte *hash, size_t size)
584584

585585
// Set last block unconditionally
586586
State& state = *m_state.data();
587-
state.f[0] = ~static_cast<word32>(0);
587+
state.tf[2] = ~static_cast<word32>(0);
588588

589589
// Set last node if tree mode
590590
if (m_treeMode)
591-
state.f[1] = ~static_cast<word32>(0);
591+
state.tf[3] = ~static_cast<word32>(0);
592592

593593
// Increment counter for tail bytes only
594594
IncrementCounter(state.length);
@@ -609,11 +609,11 @@ void BLAKE2b::TruncatedFinal(byte *hash, size_t size)
609609

610610
// Set last block unconditionally
611611
State& state = *m_state.data();
612-
state.f[0] = ~static_cast<word64>(0);
612+
state.tf[2] = ~static_cast<word64>(0);
613613

614614
// Set last node if tree mode
615615
if (m_treeMode)
616-
state.f[1] = ~static_cast<word64>(0);
616+
state.tf[3] = ~static_cast<word64>(0);
617617

618618
// Increment counter for tail bytes only
619619
IncrementCounter(state.length);
@@ -630,15 +630,15 @@ void BLAKE2b::TruncatedFinal(byte *hash, size_t size)
630630
void BLAKE2s::IncrementCounter(size_t count)
631631
{
632632
State& state = *m_state.data();
633-
state.t[0] += static_cast<word32>(count);
634-
state.t[1] += !!(state.t[0] < count);
633+
state.tf[0] += static_cast<word32>(count);
634+
state.tf[1] += !!(state.tf[0] < count);
635635
}
636636

637637
void BLAKE2b::IncrementCounter(size_t count)
638638
{
639639
State& state = *m_state.data();
640-
state.t[0] += static_cast<word64>(count);
641-
state.t[1] += !!(state.t[0] < count);
640+
state.tf[0] += static_cast<word64>(count);
641+
state.tf[1] += !!(state.tf[0] < count);
642642
}
643643

644644
void BLAKE2s::Compress(const byte *input)
@@ -702,10 +702,10 @@ void BLAKE2_Compress64_CXX(const byte* input, BLAKE2b_State& state)
702702
v[ 9] = iv[1];
703703
v[10] = iv[2];
704704
v[11] = iv[3];
705-
v[12] = state.t[0] ^ iv[4];
706-
v[13] = state.t[1] ^ iv[5];
707-
v[14] = state.f[0] ^ iv[6];
708-
v[15] = state.f[1] ^ iv[7];
705+
v[12] = state.tf[0] ^ iv[4];
706+
v[13] = state.tf[1] ^ iv[5];
707+
v[14] = state.tf[2] ^ iv[6];
708+
v[15] = state.tf[3] ^ iv[7];
709709

710710
BLAKE2B_ROUND<0>(m, v);
711711
BLAKE2B_ROUND<1>(m, v);
@@ -739,10 +739,10 @@ void BLAKE2_Compress32_CXX(const byte* input, BLAKE2s_State& state)
739739
v[ 9] = iv[1];
740740
v[10] = iv[2];
741741
v[11] = iv[3];
742-
v[12] = state.t[0] ^ iv[4];
743-
v[13] = state.t[1] ^ iv[5];
744-
v[14] = state.f[0] ^ iv[6];
745-
v[15] = state.f[1] ^ iv[7];
742+
v[12] = state.tf[0] ^ iv[4];
743+
v[13] = state.tf[1] ^ iv[5];
744+
v[14] = state.tf[2] ^ iv[6];
745+
v[15] = state.tf[3] ^ iv[7];
746746

747747
BLAKE2S_ROUND<0>(m, v);
748748
BLAKE2S_ROUND<1>(m, v);

blake2.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,12 @@ struct CRYPTOPP_NO_VTABLE BLAKE2s_State
134134
{
135135
// Set all members except scratch buffer[]
136136
h[0]=h[1]=h[2]=h[3]=h[4]=h[5]=h[6]=h[7] = 0;
137-
t[0]=t[1]=f[0]=f[1] = 0;
137+
tf[0]=tf[1]=tf[2]=tf[3] = 0;
138138
length = 0;
139139
}
140140

141-
// SSE2, SSE4 and NEON depend upon t[] and f[] being side-by-side
142-
word32 h[8], t[2], f[2];
141+
// SSE4, Power7 and NEON depend upon t[] and f[] being side-by-side
142+
word32 h[8], tf[4]; // t[2], f[2];
143143
byte buffer[BLAKE2s_Info::BLOCKSIZE];
144144
size_t length;
145145
};
@@ -152,12 +152,12 @@ struct CRYPTOPP_NO_VTABLE BLAKE2b_State
152152
{
153153
// Set all members except scratch buffer[]
154154
h[0]=h[1]=h[2]=h[3]=h[4]=h[5]=h[6]=h[7] = 0;
155-
t[0]=t[1]=f[0]=f[1] = 0;
155+
tf[0]=tf[1]=tf[2]=tf[3] = 0;
156156
length = 0;
157157
}
158158

159-
// SSE2, SSE4 and NEON depend upon t[] and f[] being side-by-side
160-
word64 h[8], t[2], f[2];
159+
// SSE4, Power8 and NEON depend upon t[] and f[] being side-by-side
160+
word64 h[8], tf[4]; // t[2], f[2];
161161
byte buffer[BLAKE2b_Info::BLOCKSIZE];
162162
size_t length;
163163
};

blake2b-simd.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,8 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2b_State& state)
457457
row2h = LOADU( &state.h[6] );
458458
row3l = LOADU( &BLAKE2B_IV[0] );
459459
row3h = LOADU( &BLAKE2B_IV[2] );
460-
row4l = _mm_xor_si128( LOADU( &BLAKE2B_IV[4] ), LOADU( &state.t[0] ) );
461-
row4h = _mm_xor_si128( LOADU( &BLAKE2B_IV[6] ), LOADU( &state.f[0] ) );
460+
row4l = _mm_xor_si128( LOADU( &BLAKE2B_IV[4] ), LOADU( &state.tf[0] ) );
461+
row4h = _mm_xor_si128( LOADU( &BLAKE2B_IV[6] ), LOADU( &state.tf[2] ) );
462462

463463
BLAKE2B_ROUND( 0 );
464464
BLAKE2B_ROUND( 1 );
@@ -717,8 +717,8 @@ void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state)
717717

718718
row3l = vld1q_u64(&BLAKE2B_IV[0]);
719719
row3h = vld1q_u64(&BLAKE2B_IV[2]);
720-
row4l = veorq_u64(vld1q_u64(&BLAKE2B_IV[4]), vld1q_u64(&state.t[0]));
721-
row4h = veorq_u64(vld1q_u64(&BLAKE2B_IV[6]), vld1q_u64(&state.f[0]));
720+
row4l = veorq_u64(vld1q_u64(&BLAKE2B_IV[4]), vld1q_u64(&state.tf[0]));
721+
row4h = veorq_u64(vld1q_u64(&BLAKE2B_IV[6]), vld1q_u64(&state.tf[2]));
722722

723723
BLAKE2B_ROUND(0);
724724
BLAKE2B_ROUND(1);
@@ -1194,8 +1194,8 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
11941194

11951195
row3l = VectorLoad64(&BLAKE2B_IV[0]);
11961196
row3h = VectorLoad64(&BLAKE2B_IV[2]);
1197-
row4l = vec_xor(VectorLoad64(&BLAKE2B_IV[4]), VectorLoad64(&state.t[0]));
1198-
row4h = vec_xor(VectorLoad64(&BLAKE2B_IV[6]), VectorLoad64(&state.f[0]));
1197+
row4l = vec_xor(VectorLoad64(&BLAKE2B_IV[4]), VectorLoad64(&state.tf[0]));
1198+
row4h = vec_xor(VectorLoad64(&BLAKE2B_IV[6]), VectorLoad64(&state.tf[2]));
11991199

12001200
BLAKE2B_ROUND(0);
12011201
BLAKE2B_ROUND(1);

blake2s-simd.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2s_State& state)
335335
row1 = ff0 = LOADU( &state.h[0] );
336336
row2 = ff1 = LOADU( &state.h[4] );
337337
row3 = LOADU( &BLAKE2S_IV[0] );
338-
row4 = _mm_xor_si128( LOADU( &BLAKE2S_IV[4] ), LOADU( &state.t[0] ) );
338+
row4 = _mm_xor_si128( LOADU( &BLAKE2S_IV[4] ), LOADU( &state.tf[0] ) );
339339

340340
BLAKE2S_ROUND( 0 );
341341
BLAKE2S_ROUND( 1 );
@@ -653,7 +653,7 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state)
653653
const uint32x4_t f0 = row1 = vld1q_u32(&state.h[0]);
654654
const uint32x4_t f1 = row2 = vld1q_u32(&state.h[4]);
655655
row3 = vld1q_u32(&BLAKE2S_IV[0]);
656-
row4 = veorq_u32(vld1q_u32(&BLAKE2S_IV[4]), vld1q_u32(&state.t[0]));
656+
row4 = veorq_u32(vld1q_u32(&BLAKE2S_IV[4]), vld1q_u32(&state.tf[0]));
657657

658658
BLAKE2S_ROUND(0);
659659
BLAKE2S_ROUND(1);
@@ -1000,7 +1000,7 @@ void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
10001000
row1 = ff0 = VectorLoad32LE( &state.h[0] );
10011001
row2 = ff1 = VectorLoad32LE( &state.h[4] );
10021002
row3 = VectorLoad32( &BLAKE2S_IV[0] );
1003-
row4 = vec_xor( VectorLoad32( &BLAKE2S_IV[4] ), VectorLoad32( &state.t[0] ) );
1003+
row4 = vec_xor( VectorLoad32( &BLAKE2S_IV[4] ), VectorLoad32( &state.tf[0] ) );
10041004

10051005
BLAKE2S_ROUND( 0 );
10061006
BLAKE2S_ROUND( 1 );

0 commit comments

Comments
 (0)