Crypto++ 8.6
Free C++ class library of cryptographic schemes
lsh256_sse.cpp
1// lsh.cpp - written and placed in the public domain by Jeffrey Walton
2// Based on the specification and source code provided by
3// Korea Internet & Security Agency (KISA) website. Also
4// see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5// and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6
7// We are hitting some sort of GCC bug in the LSH AVX2 code path.
8// Clang is OK on the AVX2 code path. We believe it is GCC Issue
9// 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10// makes using zeroupper a little tricky.
11
12#include "pch.h"
13#include "config.h"
14
15#include "lsh.h"
16#include "cpu.h"
17#include "misc.h"
18
19#if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
20
21#if defined(CRYPTOPP_SSSE3_AVAILABLE)
22# include <emmintrin.h>
23# include <tmmintrin.h>
24#endif
25
26#if defined(CRYPTOPP_XOP_AVAILABLE)
27# include <ammintrin.h>
28#endif
29
30// GCC at 4.5. Clang is unknown. Also see https://stackoverflow.com/a/42493893.
31#if (CRYPTOPP_GCC_VERSION >= 40500)
32# include <x86intrin.h>
33#endif
34
35ANONYMOUS_NAMESPACE_BEGIN
36
37/* LSH Constants */
38
39const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
40// const unsigned int LSH256_MSG_BLK_BIT_LEN = 1024;
41// const unsigned int LSH256_CV_BYTE_LEN = 64;
42const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
43
44// const unsigned int MSG_BLK_WORD_LEN = 32;
45const unsigned int CV_WORD_LEN = 16;
46const unsigned int CONST_WORD_LEN = 8;
47// const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
48// const unsigned int WORD_BIT_LEN = 32;
49const unsigned int NUM_STEPS = 26;
50
51const unsigned int ROT_EVEN_ALPHA = 29;
52const unsigned int ROT_EVEN_BETA = 1;
53const unsigned int ROT_ODD_ALPHA = 5;
54const unsigned int ROT_ODD_BETA = 17;
55
56const unsigned int LSH_TYPE_256_256 = 0x0000020;
57const unsigned int LSH_TYPE_256_224 = 0x000001C;
58
59// const unsigned int LSH_TYPE_224 = LSH_TYPE_256_224;
60// const unsigned int LSH_TYPE_256 = LSH_TYPE_256_256;
61
62/* Error Code */
63
64const unsigned int LSH_SUCCESS = 0x0;
65// const unsigned int LSH_ERR_NULL_PTR = 0x2401;
66// const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
67const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
68const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
69
70/* Index into our state array */
71
72const unsigned int AlgorithmType = 80;
73const unsigned int RemainingBits = 81;
74
75NAMESPACE_END
76
77NAMESPACE_BEGIN(CryptoPP)
78NAMESPACE_BEGIN(LSH)
79
80// lsh256.cpp
81extern const word32 LSH256_IV224[CV_WORD_LEN];
82extern const word32 LSH256_IV256[CV_WORD_LEN];
83extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
84
85NAMESPACE_END // LSH
86NAMESPACE_END // Crypto++
87
88ANONYMOUS_NAMESPACE_BEGIN
89
90using CryptoPP::byte;
94
95using CryptoPP::GetBlock;
99
100typedef byte lsh_u8;
101typedef word32 lsh_u32;
102typedef word32 lsh_uint;
103typedef word32 lsh_err;
104typedef word32 lsh_type;
105
106using CryptoPP::LSH::LSH256_IV224;
107using CryptoPP::LSH::LSH256_IV256;
108using CryptoPP::LSH::LSH256_StepConstants;
109
110struct LSH256_SSSE3_Context
111{
112 LSH256_SSSE3_Context(word32* state, word32 algType, word32& remainingBitLength) :
113 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
114 last_block(reinterpret_cast<byte*>(state+48)),
115 remain_databitlen(remainingBitLength),
116 alg_type(static_cast<lsh_type>(algType)) {}
117
118 lsh_u32* cv_l; // start of our state block
119 lsh_u32* cv_r;
120 lsh_u32* sub_msgs;
121 lsh_u8* last_block;
122 lsh_u32& remain_databitlen;
123 lsh_type alg_type;
124};
125
126struct LSH256_SSSE3_Internal
127{
128 LSH256_SSSE3_Internal(word32* state) :
129 submsg_e_l(state+16), submsg_e_r(state+24),
130 submsg_o_l(state+32), submsg_o_r(state+40) { }
131
132 lsh_u32* submsg_e_l; /* even left sub-message */
133 lsh_u32* submsg_e_r; /* even right sub-message */
134 lsh_u32* submsg_o_l; /* odd left sub-message */
135 lsh_u32* submsg_o_r; /* odd right sub-message */
136};
137
138// const word32 g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
139
140/* LSH AlgType Macro */
141
142inline bool LSH_IS_LSH512(lsh_uint val) {
143 return (val & 0xf0000) == 0;
144}
145
146inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
147 return val >> 24;
148}
149
150inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
151 return val & 0xffff;
152}
153
154inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
155 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
156}
157
158inline lsh_u32 loadLE32(lsh_u32 v) {
160}
161
162lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
163 return rotlFixed(x, r);
164}
165
166// Original code relied upon unaligned lsh_u32 buffer
167inline void load_msg_blk(LSH256_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
168{
169 CRYPTOPP_ASSERT(i_state != NULLPTR);
170 lsh_u32* submsg_e_l = i_state->submsg_e_l;
171 lsh_u32* submsg_e_r = i_state->submsg_e_r;
172 lsh_u32* submsg_o_l = i_state->submsg_o_l;
173 lsh_u32* submsg_o_r = i_state->submsg_o_r;
174
175 _mm_storeu_si128(M128_CAST(submsg_e_l+0),
176 _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
177 _mm_storeu_si128(M128_CAST(submsg_e_l+4),
178 _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
179 _mm_storeu_si128(M128_CAST(submsg_e_r+0),
180 _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
181 _mm_storeu_si128(M128_CAST(submsg_e_r+4),
182 _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
183 _mm_storeu_si128(M128_CAST(submsg_o_l+0),
184 _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
185 _mm_storeu_si128(M128_CAST(submsg_o_l+4),
186 _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
187 _mm_storeu_si128(M128_CAST(submsg_o_r+0),
188 _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
189 _mm_storeu_si128(M128_CAST(submsg_o_r+4),
190 _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
191}
192
193inline void msg_exp_even(LSH256_SSSE3_Internal* i_state)
194{
195 CRYPTOPP_ASSERT(i_state != NULLPTR);
196
197 lsh_u32* submsg_e_l = i_state->submsg_e_l;
198 lsh_u32* submsg_e_r = i_state->submsg_e_r;
199 lsh_u32* submsg_o_l = i_state->submsg_o_l;
200 lsh_u32* submsg_o_r = i_state->submsg_o_r;
201
202 _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi32(
203 _mm_shuffle_epi32(
204 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(3,2,1,0)),
205 _mm_shuffle_epi32(
206 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(1,0,2,3))));
207
208 _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi32(
209 _mm_shuffle_epi32(
210 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(3,2,1,0)),
211 _mm_shuffle_epi32(
212 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(2,1,0,3))));
213
214 _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi32(
215 _mm_shuffle_epi32(
216 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(3,2,1,0)),
217 _mm_shuffle_epi32(
218 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(1,0,2,3))));
219
220 _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi32(
221 _mm_shuffle_epi32(
222 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(3,2,1,0)),
223 _mm_shuffle_epi32(
224 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(2,1,0,3))));
225}
226
227inline void msg_exp_odd(LSH256_SSSE3_Internal* i_state)
228{
229 CRYPTOPP_ASSERT(i_state != NULLPTR);
230
231 lsh_u32* submsg_e_l = i_state->submsg_e_l;
232 lsh_u32* submsg_e_r = i_state->submsg_e_r;
233 lsh_u32* submsg_o_l = i_state->submsg_o_l;
234 lsh_u32* submsg_o_r = i_state->submsg_o_r;
235
236 _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi32(
237 _mm_shuffle_epi32(
238 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(3,2,1,0)),
239 _mm_shuffle_epi32(
240 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(1,0,2,3))));
241
242 _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi32(
243 _mm_shuffle_epi32(
244 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(3,2,1,0)),
245 _mm_shuffle_epi32(
246 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(2,1,0,3))));
247
248 _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi32(
249 _mm_shuffle_epi32(
250 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(3,2,1,0)),
251 _mm_shuffle_epi32(
252 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(1,0,2,3))));
253
254 _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi32(
255 _mm_shuffle_epi32(
256 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(3,2,1,0)),
257 _mm_shuffle_epi32(
258 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(2,1,0,3))));
259}
260
261inline void load_sc(const lsh_u32** p_const_v, size_t i)
262{
263 CRYPTOPP_ASSERT(p_const_v != NULLPTR);
264
265 *p_const_v = &LSH256_StepConstants[i];
266}
267
268inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
269{
270 CRYPTOPP_ASSERT(i_state != NULLPTR);
271
272 lsh_u32* submsg_e_l = i_state->submsg_e_l;
273 lsh_u32* submsg_e_r = i_state->submsg_e_r;
274
275 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_xor_si128(
276 _mm_loadu_si128(CONST_M128_CAST(cv_l+0)),
277 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
278 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
279 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
280 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
281 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_xor_si128(
282 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
283 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
284 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
285 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
286 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
287}
288
289inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
290{
291 CRYPTOPP_ASSERT(i_state != NULLPTR);
292
293 lsh_u32* submsg_o_l = i_state->submsg_o_l;
294 lsh_u32* submsg_o_r = i_state->submsg_o_r;
295
296 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
297 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
298 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
299 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
300 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
301 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
302 _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
303 _mm_loadu_si128(CONST_M128_CAST(cv_r)),
304 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
305 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
306 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
307 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
308}
309
310inline void add_blk(lsh_u32 cv_l[8], const lsh_u32 cv_r[8])
311{
312 _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi32(
313 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
314 _mm_loadu_si128(CONST_M128_CAST(cv_r))));
315 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi32(
316 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
317 _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
318}
319
320template <unsigned int R>
321inline void rotate_blk(lsh_u32 cv[8])
322{
323#if defined(CRYPTOPP_XOP_AVAILABLE)
324 _mm_storeu_si128(M128_CAST(cv),
325 _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
326 _mm_storeu_si128(M128_CAST(cv+4),
327 _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
328#else
329 _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
330 _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
331 _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), 32-R)));
332 _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
333 _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
334 _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 32-R)));
335#endif
336}
337
338inline void xor_with_const(lsh_u32* cv_l, const lsh_u32* const_v)
339{
340 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
341 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
342 _mm_loadu_si128(CONST_M128_CAST(const_v))));
343 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
344 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
345 _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
346}
347
348inline void rotate_msg_gamma(lsh_u32 cv_r[8])
349{
350 // g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
351 _mm_storeu_si128(M128_CAST(cv_r+0),
352 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
353 _mm_set_epi8(12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
354 _mm_storeu_si128(M128_CAST(cv_r+4),
355 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
356 _mm_set_epi8(15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1)));
357}
358
359inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
360{
361 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_shuffle_epi32(
362 _mm_loadu_si128(CONST_M128_CAST(cv_l+0)), _MM_SHUFFLE(3,1,0,2)));
363 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_shuffle_epi32(
364 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)), _MM_SHUFFLE(3,1,0,2)));
365 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_shuffle_epi32(
366 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)), _MM_SHUFFLE(1,2,3,0)));
367 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_shuffle_epi32(
368 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)), _MM_SHUFFLE(1,2,3,0)));
369
370 __m128i temp = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
371 _mm_storeu_si128(M128_CAST(cv_l+0),
372 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
373 _mm_storeu_si128(M128_CAST(cv_l+4),
374 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
375 _mm_storeu_si128(M128_CAST(cv_r+4),
376 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
377 _mm_storeu_si128(M128_CAST(cv_r+0), temp);
378};
379
380/* -------------------------------------------------------- *
381* step function
382* -------------------------------------------------------- */
383
384template <unsigned int Alpha, unsigned int Beta>
385inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 const_v[8])
386{
387 add_blk(cv_l, cv_r);
388 rotate_blk<Alpha>(cv_l);
389 xor_with_const(cv_l, const_v);
390 add_blk(cv_r, cv_l);
391 rotate_blk<Beta>(cv_r);
392 add_blk(cv_l, cv_r);
393 rotate_msg_gamma(cv_r);
394}
395
396/* -------------------------------------------------------- *
397* compression function
398* -------------------------------------------------------- */
399
400inline void compress(LSH256_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
401{
402 CRYPTOPP_ASSERT(ctx != NULLPTR);
403
404 LSH256_SSSE3_Internal s_state(ctx->cv_l);
405 LSH256_SSSE3_Internal* i_state = &s_state;
406
407 const lsh_u32* const_v = NULL;
408 lsh_u32* cv_l = ctx->cv_l;
409 lsh_u32* cv_r = ctx->cv_r;
410
411 load_msg_blk(i_state, pdMsgBlk);
412
413 msg_add_even(cv_l, cv_r, i_state);
414 load_sc(&const_v, 0);
415 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
416 word_perm(cv_l, cv_r);
417
418 msg_add_odd(cv_l, cv_r, i_state);
419 load_sc(&const_v, 8);
420 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
421 word_perm(cv_l, cv_r);
422
423 for (size_t i = 1; i < NUM_STEPS / 2; i++)
424 {
425 msg_exp_even(i_state);
426 msg_add_even(cv_l, cv_r, i_state);
427 load_sc(&const_v, 16 * i);
428 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
429 word_perm(cv_l, cv_r);
430
431 msg_exp_odd(i_state);
432 msg_add_odd(cv_l, cv_r, i_state);
433 load_sc(&const_v, 16 * i + 8);
434 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
435 word_perm(cv_l, cv_r);
436 }
437
438 msg_exp_even(i_state);
439 msg_add_even(cv_l, cv_r, i_state);
440}
441
442/* -------------------------------------------------------- */
443
444inline void load_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 iv[16])
445{
446 _mm_storeu_si128(M128_CAST(cv_l+ 0),
447 _mm_load_si128(CONST_M128_CAST(iv+ 0)));
448 _mm_storeu_si128(M128_CAST(cv_l+ 4),
449 _mm_load_si128(CONST_M128_CAST(iv+ 4)));
450 _mm_storeu_si128(M128_CAST(cv_r+ 0),
451 _mm_load_si128(CONST_M128_CAST(iv+ 8)));
452 _mm_storeu_si128(M128_CAST(cv_r+ 4),
453 _mm_load_si128(CONST_M128_CAST(iv+12)));
454}
455
456inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
457{
458 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
459 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
460 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
461 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
462}
463
464inline void zero_submsgs(LSH256_SSSE3_Context* ctx)
465{
466 lsh_u32* sub_msgs = ctx->sub_msgs;
467
468 _mm_storeu_si128(M128_CAST(sub_msgs+ 0), _mm_setzero_si128());
469 _mm_storeu_si128(M128_CAST(sub_msgs+ 4), _mm_setzero_si128());
470 _mm_storeu_si128(M128_CAST(sub_msgs+ 8), _mm_setzero_si128());
471 _mm_storeu_si128(M128_CAST(sub_msgs+12), _mm_setzero_si128());
472 _mm_storeu_si128(M128_CAST(sub_msgs+16), _mm_setzero_si128());
473 _mm_storeu_si128(M128_CAST(sub_msgs+20), _mm_setzero_si128());
474 _mm_storeu_si128(M128_CAST(sub_msgs+24), _mm_setzero_si128());
475 _mm_storeu_si128(M128_CAST(sub_msgs+28), _mm_setzero_si128());
476}
477
478inline void init224(LSH256_SSSE3_Context* ctx)
479{
480 CRYPTOPP_ASSERT(ctx != NULLPTR);
481
482 zero_submsgs(ctx);
483 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
484}
485
486inline void init256(LSH256_SSSE3_Context* ctx)
487{
488 CRYPTOPP_ASSERT(ctx != NULLPTR);
489
490 zero_submsgs(ctx);
491 load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
492}
493
494/* -------------------------------------------------------- */
495
496inline void fin(LSH256_SSSE3_Context* ctx)
497{
498 CRYPTOPP_ASSERT(ctx != NULLPTR);
499
500 _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
501 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
502 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
503 _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
504 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
505 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
506}
507
508/* -------------------------------------------------------- */
509
510inline void get_hash(LSH256_SSSE3_Context* ctx, lsh_u8* pbHashVal)
511{
512 CRYPTOPP_ASSERT(ctx != NULLPTR);
513 CRYPTOPP_ASSERT(ctx->alg_type != 0);
514 CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
515
516 lsh_uint alg_type = ctx->alg_type;
517 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
518 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
519
520 // Multiplying by sizeof(lsh_u8) looks odd...
521 memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
522 if (hash_val_bit_len){
523 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
524 }
525}
526
527/* -------------------------------------------------------- */
528
529lsh_err lsh256_ssse3_init(LSH256_SSSE3_Context* ctx)
530{
531 CRYPTOPP_ASSERT(ctx != NULLPTR);
532 CRYPTOPP_ASSERT(ctx->alg_type != 0);
533
534 lsh_u32 alg_type = ctx->alg_type;
535 const lsh_u32* const_v = NULL;
536 ctx->remain_databitlen = 0;
537
538 switch (alg_type)
539 {
540 case LSH_TYPE_256_256:
541 init256(ctx);
542 return LSH_SUCCESS;
543 case LSH_TYPE_256_224:
544 init224(ctx);
545 return LSH_SUCCESS;
546 default:
547 break;
548 }
549
550 lsh_u32* cv_l = ctx->cv_l;
551 lsh_u32* cv_r = ctx->cv_r;
552
553 zero_iv(cv_l, cv_r);
554 cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
555 cv_l[1] = LSH_GET_HASHBIT(alg_type);
556
557 for (size_t i = 0; i < NUM_STEPS / 2; i++)
558 {
559 //Mix
560 load_sc(&const_v, i * 16);
561 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
562 word_perm(cv_l, cv_r);
563
564 load_sc(&const_v, i * 16 + 8);
565 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
566 word_perm(cv_l, cv_r);
567 }
568
569 return LSH_SUCCESS;
570}
571
572lsh_err lsh256_ssse3_update(LSH256_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
573{
574 CRYPTOPP_ASSERT(ctx != NULLPTR);
575 CRYPTOPP_ASSERT(data != NULLPTR);
576 CRYPTOPP_ASSERT(databitlen % 8 == 0);
577 CRYPTOPP_ASSERT(ctx->alg_type != 0);
578
579 if (databitlen == 0){
580 return LSH_SUCCESS;
581 }
582
583 // We are byte oriented. tail bits will always be 0.
584 size_t databytelen = databitlen >> 3;
585 // lsh_uint pos2 = databitlen & 0x7;
586 const size_t pos2 = 0;
587
588 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
589 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
590 const size_t remain_msg_bit = 0;
591
592 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
593 return LSH_ERR_INVALID_STATE;
594 }
595 if (remain_msg_bit > 0){
596 return LSH_ERR_INVALID_DATABITLEN;
597 }
598
599 if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
600 {
601 memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
602 ctx->remain_databitlen += (lsh_uint)databitlen;
603 remain_msg_byte += (lsh_uint)databytelen;
604 if (pos2){
605 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
606 }
607 return LSH_SUCCESS;
608 }
609
610 if (remain_msg_byte > 0){
611 size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
612 memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
613 compress(ctx, ctx->last_block);
614 data += more_byte;
615 databytelen -= more_byte;
616 remain_msg_byte = 0;
617 ctx->remain_databitlen = 0;
618 }
619
620 while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
621 {
622 // This call to compress caused some trouble.
623 // The data pointer can become unaligned in the
624 // previous block.
625 compress(ctx, data);
626 data += LSH256_MSG_BLK_BYTE_LEN;
627 databytelen -= LSH256_MSG_BLK_BYTE_LEN;
628 }
629
630 if (databytelen > 0){
631 memcpy(ctx->last_block, data, databytelen);
632 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
633 }
634
635 if (pos2){
636 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
637 ctx->remain_databitlen += pos2;
638 }
639
640 return LSH_SUCCESS;
641}
642
643lsh_err lsh256_ssse3_final(LSH256_SSSE3_Context* ctx, lsh_u8* hashval)
644{
645 CRYPTOPP_ASSERT(ctx != NULLPTR);
646 CRYPTOPP_ASSERT(hashval != NULLPTR);
647
648 // We are byte oriented. tail bits will always be 0.
649 size_t remain_msg_byte = ctx->remain_databitlen >> 3;
650 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
651 const size_t remain_msg_bit = 0;
652
653 if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
654 return LSH_ERR_INVALID_STATE;
655 }
656
657 if (remain_msg_bit){
658 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
659 }
660 else{
661 ctx->last_block[remain_msg_byte] = 0x80;
662 }
663 memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
664
665 compress(ctx, ctx->last_block);
666
667 fin(ctx);
668 get_hash(ctx, hashval);
669
670 return LSH_SUCCESS;
671}
672
673ANONYMOUS_NAMESPACE_END // Anonymous
674
675NAMESPACE_BEGIN(CryptoPP)
676
677extern
678void LSH256_Base_Restart_SSSE3(word32* state)
679{
680 state[RemainingBits] = 0;
681 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
682 lsh_err err = lsh256_ssse3_init(&ctx);
683
684 if (err != LSH_SUCCESS)
685 throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_init failed");
686}
687
688extern
689void LSH256_Base_Update_SSSE3(word32* state, const byte *input, size_t size)
690{
691 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
692 lsh_err err = lsh256_ssse3_update(&ctx, input, 8*size);
693
694 if (err != LSH_SUCCESS)
695 throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_update failed");
696}
697
698extern
699void LSH256_Base_TruncatedFinal_SSSE3(word32* state, byte *hash, size_t)
700{
701 LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
702 lsh_err err = lsh256_ssse3_final(&ctx, hash);
703
704 if (err != LSH_SUCCESS)
705 throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_final failed");
706}
707
708NAMESPACE_END
709
710#endif // CRYPTOPP_SSSE3_AVAILABLE
#define M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:614
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:159
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:177
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:56
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
Functions for CPU features and intrinsics.
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Definition: cryptlib.h:150
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T rotlConstant(T x)
Performs a left rotate.
Definition: misc.h:1547
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2187
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1598
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68