hash.c 4.17 KB
Newer Older
Wentao Zhang committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
#include "api.h"
typedef unsigned char u8;
typedef unsigned long long u64;

#define PRH_ROUNDS 140
#define RATE 8
#define U64BIG(x) (x)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include  <string.h>
#include <stdlib.h>
#include <emmintrin.h>//sse2 header file(include sse header file)  
#include <pmmintrin.h> //SSE3(include emmintrin.h)  
#include <tmmintrin.h>//SSSE3(include pmmintrin.h)  
#include <smmintrin.h>//SSE4.1(include tmmintrin.h)  
#include <nmmintrin.h>//SSE4.2(include smmintrin.h)  
#include <immintrin.h>
#include <xmmintrin.h>
#include <wmmintrin.h>
#define U64BIG(x) (x)
#define U32BIG(x) (x)
static const u8 constant8[255] = { 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47,
		0x8e, 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25, 0x4b, 0x97, 0x2e,
		0x5c, 0xb8, 0x70, 0xe0, 0xc0, 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64,
		0xc9, 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37, 0x6e, 0xdc, 0xb9,
		0x72, 0xe4, 0xc8, 0x90, 0x20, 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56,
		0xad, 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac, 0x59, 0xb2, 0x65,
		0xcb, 0x96, 0x2c, 0x58, 0xb0, 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d,
		0xfb, 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a, 0xf5, 0xeb, 0xd7,
		0xae, 0x5d, 0xba, 0x74, 0xe8, 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43,
		0x86, 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7, 0x8f, 0x1e, 0x3c,
		0x79, 0xf3, 0xe7, 0xce, 0x9c, 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62,
		0xc5, 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4, 0x48, 0x91, 0x22,
		0x45, 0x8a, 0x14, 0x29, 0x52, 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53,
		0xa7, 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76, 0xec, 0xd9, 0xb3,
		0x67, 0xcf, 0x9e, 0x3d, 0x7b, 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa,
		0xf4, 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd, 0x9a, 0x35, 0x6a,
		0xd4, 0xa8, 0x51, 0xa3, 0x46, 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07,
		0x0e, 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab, 0x57, 0xaf, 0x5f,
		0xbe, 0x7c, 0xf9, 0xf2, 0xe5, 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84,
		0x09, 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe, 0xfc, 0xf8, 0xf0,
		0xe1, 0xc2, 0x85, 0x0b, 0x17, 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6,
		0x8d, 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80 };
#define forward_sbox_SSE(a, b, c, d,  f, g, h) \
{                   \
tmm1    =_mm_xor_si128(  a , all1 );	\
tmm2 =_mm_and_si128(  b, tmm1    );	\
tmm3 =_mm_xor_si128(  c  , tmm2 );	\
h=_mm_xor_si128(  d  , tmm3 );	\
tmm5 =_mm_or_si128 (  b, c);	\
tmm6=_mm_xor_si128(  d, tmm1 );	\
g=_mm_xor_si128(  tmm5, tmm6 );	\
tmm8=_mm_xor_si128(  b, d );	\
tmm9=_mm_and_si128(  tmm3, tmm6 );	\
a=_mm_xor_si128(  tmm8, tmm9 );	\
tmm11=_mm_and_si128(  g, tmm8 );	\
f=_mm_xor_si128(  tmm3, tmm11 );	\
}

#define ROUND512(i) {\
state[0] = _mm_xor_si128(state[0], _mm_set_epi64x(0,(u64)constant8[i]));	\
forward_sbox_SSE(state[0], state[1], state[2], state[3], out1, out2, out3);\
state[1] = _mm_or_si128(_mm_slli_epi64(out1, 1), _mm_srli_epi64(_mm_shuffle_epi32(out1, _MM_SHUFFLE(1, 0, 3, 2)), 63));\
state[2] = _mm_or_si128(_mm_slli_epi64(out2, 16), _mm_srli_epi64(_mm_shuffle_epi32(out2, _MM_SHUFFLE(1, 0, 3, 2)), 48));\
state[3] = _mm_or_si128(_mm_slli_epi64(out3, 25), _mm_srli_epi64(_mm_shuffle_epi32(out3, _MM_SHUFFLE(1, 0, 3, 2)), 39));\
}
int crypto_hash(unsigned char *out, const unsigned char *in,
	unsigned long long inlen) {
	__m128i all1 = _mm_set1_epi32(0xffffffff);
	__m128i tmm0, tmm1, tmm2, tmm3, tmm5, tmm6, tmm8, tmm9, tmm11, out1, out2, out3;
	__m128i state[4] = { 0 };
	u8  i=0;
	u8 tempData[8] = { 0 };
	// initialization
	//absorb
	//rlen = inlen;
	//RATE=8
	while (inlen >= RATE) {
		tmm0 = _mm_loadl_epi64((__m128i*)in);
		state[0] = _mm_xor_si128(state[0], tmm0);
		for (i = 0; i < PRH_ROUNDS; i++) {
			ROUND512(i);
		}
		inlen -= RATE;
		in += RATE;
	}

	memset(tempData, 0, 8);
	memcpy(tempData, in, inlen * sizeof(unsigned char));
	tempData[inlen] = 0x01;
	state[0] = _mm_xor_si128(state[0], _mm_loadl_epi64((__m128i*)(tempData)));

	for (i = 0; i < PRH_ROUNDS; i++) {
		ROUND512(i);
	}
	//sequeez

	memcpy(out, state, CRYPTO_BYTES / 2 * sizeof(unsigned char));
	for (i = 0; i < PRH_ROUNDS; i++) {
		ROUND512(i);
	}

	memcpy(out+ CRYPTO_BYTES / 2, state, CRYPTO_BYTES / 2 * sizeof(unsigned char));
	return 0;
}