* imported original ECRYPT submissions after first automatic cleanup.
/*
*
* Optimized code for Pentium-IV / Microsoft Visual C++ 6.0
*
* Developed on a Pentium-IV 3.2GHz machine running Windows XP SP2.
*
*/
#include "ecrypt-sync.h"
#undef u32
typedef unsigned int u32;
#undef u8
typedef unsigned char u8;
u32 tsc(ECRYPT_ctx* ctx);
u32 tsc(ECRYPT_ctx* ctx)
{
u32 pH0, pL0, pH1, pL1;
u32 s0, s1, s2, s3, u0, u1, u2, u3;
u32 tmpH, tmpL;
/* parameter calculation */
pL0 = (ctx->l[3]) & (ctx->l[2]) & (ctx->l[1]) & (ctx->l[0]);
pH0 = (ctx->h[3]) & (ctx->h[2]) & (ctx->h[1]) & (ctx->h[0]);
pL0 ^= tmpL = pL0 + 0x89;
pH0 ^= pH0 + 0x49108910 + (tmpL >> 8);
pL1 = pL0;
pH1 = pH0;
tmpL = (ctx->l[3]) + (ctx->l[2]);
tmpH = (ctx->h[3]) + (ctx->h[2]);
pL0 ^= (tmpL << 1);
pH0 ^= (tmpH << 1) + (tmpL >> 7);
pH1 ^= (tmpH << 8) + (tmpL );
tmpL = (ctx->l[1]) + (ctx->l[0]);
tmpH = (ctx->h[1]) + (ctx->h[0]);
pL1 ^= (tmpL << 1);
pH1 ^= (tmpH << 1) + (tmpL >> 7);
pH0 ^= (tmpH << 8) + (tmpL );
/* s-box application */
s3 = (ctx->l[1]) ^ ((ctx->l[3]) & (ctx->l[2]) & ~(ctx->l[0]));
s2 = (ctx->l[0]) ^ ((ctx->l[3]) & ~(ctx->l[2]) & ~(ctx->l[1]));
s0 = ~(ctx->l[3]) ^ ((ctx->l[2]) & ~(ctx->l[1]) & (ctx->l[0]));
u3 = s3 ^ ((ctx->l[3]) | (ctx->l[2]) | ~(ctx->l[0]));
u2 = s2 ^ ((ctx->l[3]) | ~(ctx->l[2]) | ~(ctx->l[1]));
u0 = s0 ^ ((ctx->l[2]) | ~(ctx->l[1]) | (ctx->l[0]));
u1 = (ctx->l[2]) ^ ((ctx->l[3]) | (ctx->l[1]) | (ctx->l[0]));
s1 = ~u1 ^ ((ctx->l[3]) & (ctx->l[1]) & (ctx->l[0]));
ctx->l[3] = 0xff & ((pL1 & s3) ^ (~pL1 & u3));
ctx->l[2] = 0xff & ((pL1 & s2) ^ (~pL1 & u2));
ctx->l[1] = 0xff & ((pL1 & s1) ^ (~pL1 & u1));
ctx->l[0] = 0xff & ((pL1 & s0) ^ (~pL1 & u0));
s3 = (ctx->h[1]) ^ ((ctx->h[3]) & (ctx->h[2]) & ~(ctx->h[0]));
s2 = (ctx->h[0]) ^ ((ctx->h[3]) & ~(ctx->h[2]) & ~(ctx->h[1]));
s0 = ~(ctx->h[3]) ^ ((ctx->h[2]) & ~(ctx->h[1]) & (ctx->h[0]));
u3 = s3 ^ ((ctx->h[3]) | (ctx->h[2]) | ~(ctx->h[0]));
u2 = s2 ^ ((ctx->h[3]) | ~(ctx->h[2]) | ~(ctx->h[1]));
u0 = s0 ^ ((ctx->h[2]) | ~(ctx->h[1]) | (ctx->h[0]));
u1 = (ctx->h[2]) ^ ((ctx->h[3]) | (ctx->h[1]) | (ctx->h[0]));
s1 = ~u1 ^ ((ctx->h[3]) & (ctx->h[1]) & (ctx->h[0]));
ctx->h[3] = (pH1 & s3) ^ (~pH1 & u3);
ctx->h[2] = (pH1 & s2) ^ (~pH1 & u2);
ctx->h[1] = (pH1 & s1) ^ (~pH1 & u1);
ctx->h[0] = (pH1 & s0) ^ (~pH1 & u0);
s3 = (ctx->l[1]) ^ ((ctx->l[3]) & (ctx->l[2]) & ~(ctx->l[0]));
s2 = (ctx->l[0]) ^ ((ctx->l[3]) & ~(ctx->l[2]) & ~(ctx->l[1]));
s1 = (ctx->l[2]) ^ ((ctx->l[3]) & (ctx->l[1]) & (ctx->l[0])) ^ ~((ctx->l[3]) | (ctx->l[1]) | (ctx->l[0]));
s0 = ~(ctx->l[3]) ^ ((ctx->l[2]) & ~(ctx->l[1]) & (ctx->l[0]));
ctx->l[3] = 0xff & ((pL0 & ctx->l[3]) ^ (~pL0 & s3));
ctx->l[2] = 0xff & ((pL0 & ctx->l[2]) ^ (~pL0 & s2));
ctx->l[1] = 0xff & ((pL0 & ctx->l[1]) ^ (~pL0 & s1));
ctx->l[0] = 0xff & ((pL0 & ctx->l[0]) ^ (~pL0 & s0));
s3 = (ctx->h[1]) ^ ((ctx->h[3]) & (ctx->h[2]) & ~(ctx->h[0]));
s2 = (ctx->h[0]) ^ ((ctx->h[3]) & ~(ctx->h[2]) & ~(ctx->h[1]));
s1 = (ctx->h[2]) ^ ((ctx->h[3]) & (ctx->h[1]) & (ctx->h[0])) ^ ~((ctx->h[3]) | (ctx->h[1]) | (ctx->h[0]));
s0 = ~(ctx->h[3]) ^ ((ctx->h[2]) & ~(ctx->h[1]) & (ctx->h[0]));
ctx->h[3] = (pH0 & ctx->h[3]) ^ (~pH0 & s3);
ctx->h[2] = (pH0 & ctx->h[2]) ^ (~pH0 & s2);
ctx->h[1] = (pH0 & ctx->h[1]) ^ (~pH0 & s1);
ctx->h[0] = (pH0 & ctx->h[0]) ^ (~pH0 & s0);
/* filter calculation */
s3 = ctx->h[3];
s2 = ctx->h[2];
s1 = ctx->h[1];
s0 = ctx->h[0];
if ((1&(ctx->l[0])) == 1) tmpH = s0, s0 = s1, s1 = tmpH;
if ((1&(ctx->l[2])) == 1) tmpH = s2, s2 = s3, s3 = tmpH;
if ((1&(ctx->l[1])) == 1) tmpH = s1, s1 = s2, s2 = tmpH;
if ((1&(ctx->l[3])) == 1) tmpH = s0, s0 = s3, s3 = tmpH;
tmpH = _lrotl(_lrotl(s0, 7) + _lrotr(s1, 2), 8)
+ _lrotr(_lrotl(s2, 7) + s3 , 9);
return tmpH;
}
void ECRYPT_init(void)
{
return;
}
void ECRYPT_keysetup(
ECRYPT_ctx* ctx,
const u8* key,
u32 keysize,
u32 ivsize)
{
ctx->ivBs = ivsize/8;
keysize /= 8;
((u32 *) ctx->k)[ 0] = ((u32 *) key)[ 0];
((u32 *) ctx->k)[ 1] = ((u32 *) key)[ 1];
ctx->k[ 8] = key[ 8];
ctx->k[ 9] = key[ 9];
ctx->k[10] = key[10%keysize];
ctx->k[11] = key[11%keysize];
ctx->k[12] = key[12%keysize];
ctx->k[13] = key[13%keysize];
ctx->k[14] = key[14%keysize];
ctx->k[15] = key[15%keysize];
ctx->k[16] = key[16%keysize];
ctx->k[17] = key[17%keysize];
ctx->k[18] = key[18%keysize];
ctx->k[19] = key[19%keysize];
return;
}
void ECRYPT_ivsetup(
ECRYPT_ctx* ctx,
const u8* iv)
{
u32 tmp, i;
u8 v[20];
tmp = ctx->ivBs;
for (i = 0; i < 20; i++) v[i] = iv[i%tmp];
ctx->l[3] = (u32) ((ctx->k[15]) ^ (v [15]));
ctx->l[2] = (u32) ((ctx->k[10]) ^ (v [10]));
ctx->l[1] = (u32) ((ctx->k[ 5]) ^ (v [ 5]));
ctx->l[0] = (u32) ((ctx->k[ 0]) ^ (v [ 0]));
ctx->h[3] = (((u32 *) (&(ctx->k[16])))[0]) ^ (((u32 *) (&(v[16])))[0]);
ctx->h[2] = (((u32 *) (&(ctx->k[11])))[0]) ^ (((u32 *) (&(v[11])))[0]);
ctx->h[1] = (((u32 *) (&(ctx->k[ 6])))[0]) ^ (((u32 *) (&(v[ 6])))[0]);
ctx->h[0] = (((u32 *) (&(ctx->k[ 1])))[0]) ^ (((u32 *) (&(v[ 1])))[0]);
tmp = tsc(ctx);
ctx->h[3] ^= tmp;
ctx->l[0] ^= 0xff & tmp;
tmp = _lrotl(tmp, 8);
ctx->h[2] ^= tmp;
ctx->l[3] ^= 0xff & tmp;
tmp = _lrotl(tmp, 8);
ctx->h[1] ^= tmp;
ctx->l[2] ^= 0xff & tmp;
tmp = _lrotl(tmp, 8);
ctx->h[0] ^= tmp;
ctx->l[1] ^= 0xff & tmp;
ctx->l[3] ^= (u32) v[15];
ctx->l[2] ^= (u32) v[10];
ctx->l[1] ^= (u32) v[ 5];
ctx->l[0] ^= (u32) v[ 0];
ctx->h[3] ^= ((u32 *) (&(v[16])))[0];
ctx->h[2] ^= ((u32 *) (&(v[11])))[0];
ctx->h[1] ^= ((u32 *) (&(v[ 6])))[0];
ctx->h[0] ^= ((u32 *) (&(v[ 1])))[0];
tmp = tsc(ctx);
ctx->h[3] ^= tmp;
ctx->l[0] ^= 0xff & tmp;
tmp = _lrotl(tmp, 8);
ctx->h[2] ^= tmp;
ctx->l[3] ^= 0xff & tmp;
tmp = _lrotl(tmp, 8);
ctx->h[1] ^= tmp;
ctx->l[2] ^= 0xff & tmp;
tmp = _lrotl(tmp, 8);
ctx->h[0] ^= tmp;
ctx->l[1] ^= 0xff & tmp;
ctx->l[3] ^= (u32) v[15];
ctx->l[2] ^= (u32) v[10];
ctx->l[1] ^= (u32) v[ 5];
ctx->l[0] ^= (u32) v[ 0];
ctx->h[3] ^= ((u32 *) (&(v[16])))[0];
ctx->h[2] ^= ((u32 *) (&(v[11])))[0];
ctx->h[1] ^= ((u32 *) (&(v[ 6])))[0];
ctx->h[0] ^= ((u32 *) (&(v[ 1])))[0];
tmp = tsc(ctx);
ctx->h[3] ^= tmp;
ctx->l[0] ^= 0xff & tmp;
tmp = _lrotl(tmp, 8);
ctx->h[2] ^= tmp;
ctx->l[3] ^= 0xff & tmp;
tmp = _lrotl(tmp, 8);
ctx->h[1] ^= tmp;
ctx->l[2] ^= 0xff & tmp;
tmp = _lrotl(tmp, 8);
ctx->h[0] ^= tmp;
ctx->l[1] ^= 0xff & tmp;
return;
}
void ECRYPT_process_bytes(
int action,
ECRYPT_ctx* ctx,
const u8* input,
u8* output,
u32 msglen)
{
u32 tmp;
while (msglen > 3)
{
((u32 *) output)[0] = ((u32 *) input)[0] ^ (tsc(ctx));
input += 4; output += 4;
msglen -= 4;
}
if (msglen == 0) return;
tmp = tsc(ctx);
output[0] = (u8) (tmp );
if (msglen == 1) return;
output[1] = (u8) (tmp >> 8);
if (msglen == 2) return;
output[2] = (u8) (tmp >> 16);
return;
}
void ECRYPT_keystream_bytes(
ECRYPT_ctx* ctx,
u8* keystream,
u32 length)
{
u32 tmp;
while (length > 3)
{
((u32 *) keystream)[0] = tsc(ctx);
keystream += 4;
length -= 4;
}
if (length == 0) return;
tmp = tsc(ctx);
keystream[0] = (u8) (tmp );
if (length == 1) return;
keystream[1] = (u8) (tmp >> 8);
if (length == 2) return;
keystream[2] = (u8) (tmp >> 16);
return;
}
void ECRYPT_process_blocks(
int action,
ECRYPT_ctx* ctx,
const u8* input,
u8* output,
u32 blocks)
{
while (blocks != 0)
{
((u32 *) output)[0] = ((u32 *) input)[0] ^ (tsc(ctx));
input += 4; output += 4;
blocks--;
}
return;
}
void ECRYPT_keystream_blocks(
ECRYPT_ctx* ctx,
u8* keystream,
u32 blocks)
{
while (blocks != 0)
{
((u32 *) keystream)[0] = tsc(ctx);
keystream += 4;
blocks--;
}
return;
}
/**************************************************************************/
/**************************************************************************/
/*
One more way to implement the nonlinear filter.
Included here for reference.
*/
/*
switch(
((1&(ctx->l[3])) << 3) ^
((1&(ctx->l[2])) << 2) ^
((1&(ctx->l[1])) << 1) ^
( 1&(ctx->l[0]) )
)
{
case 15:
tmpH = _lrotl(_lrotl(ctx->h[2], 7) + _lrotr(ctx->h[3], 2), 8)
+ _lrotr(_lrotl(ctx->h[0], 7) + ctx->h[1] , 9);
break;
case 14:
tmpH = _lrotl(_lrotl(ctx->h[2], 7) + _lrotr(ctx->h[3], 2), 8)
+ _lrotr(_lrotl(ctx->h[1], 7) + ctx->h[0] , 9);
break;
case 13:
tmpH = _lrotl(_lrotl(ctx->h[2], 7) + _lrotr(ctx->h[0], 2), 8)
+ _lrotr(_lrotl(ctx->h[3], 7) + ctx->h[1] , 9);
break;
case 12:
tmpH = _lrotl(_lrotl(ctx->h[2], 7) + _lrotr(ctx->h[1], 2), 8)
+ _lrotr(_lrotl(ctx->h[3], 7) + ctx->h[0] , 9);
break;
case 11:
tmpH = _lrotl(_lrotl(ctx->h[3], 7) + _lrotr(ctx->h[2], 2), 8)
+ _lrotr(_lrotl(ctx->h[0], 7) + ctx->h[1] , 9);
break;
case 10:
tmpH = _lrotl(_lrotl(ctx->h[3], 7) + _lrotr(ctx->h[2], 2), 8)
+ _lrotr(_lrotl(ctx->h[1], 7) + ctx->h[0] , 9);
break;
case 9:
tmpH = _lrotl(_lrotl(ctx->h[3], 7) + _lrotr(ctx->h[0], 2), 8)
+ _lrotr(_lrotl(ctx->h[2], 7) + ctx->h[1] , 9);
break;
case 8:
tmpH = _lrotl(_lrotl(ctx->h[3], 7) + _lrotr(ctx->h[1], 2), 8)
+ _lrotr(_lrotl(ctx->h[2], 7) + ctx->h[0] , 9);
break;
case 7:
tmpH = _lrotl(_lrotl(ctx->h[1], 7) + _lrotr(ctx->h[3], 2), 8)
+ _lrotr(_lrotl(ctx->h[0], 7) + ctx->h[2] , 9);
break;
case 6:
tmpH = _lrotl(_lrotl(ctx->h[0], 7) + _lrotr(ctx->h[3], 2), 8)
+ _lrotr(_lrotl(ctx->h[1], 7) + ctx->h[2] , 9);
break;
case 5:
tmpH = _lrotl(_lrotl(ctx->h[1], 7) + _lrotr(ctx->h[0], 2), 8)
+ _lrotr(_lrotl(ctx->h[3], 7) + ctx->h[2] , 9);
break;
case 4:
tmpH = _lrotl(_lrotl(ctx->h[0], 7) + _lrotr(ctx->h[1], 2), 8)
+ _lrotr(_lrotl(ctx->h[3], 7) + ctx->h[2] , 9);
break;
case 3:
tmpH = _lrotl(_lrotl(ctx->h[1], 7) + _lrotr(ctx->h[2], 2), 8)
+ _lrotr(_lrotl(ctx->h[0], 7) + ctx->h[3] , 9);
break;
case 2:
tmpH = _lrotl(_lrotl(ctx->h[0], 7) + _lrotr(ctx->h[2], 2), 8)
+ _lrotr(_lrotl(ctx->h[1], 7) + ctx->h[3] , 9);
break;
case 1:
tmpH = _lrotl(_lrotl(ctx->h[1], 7) + _lrotr(ctx->h[0], 2), 8)
+ _lrotr(_lrotl(ctx->h[2], 7) + ctx->h[3] , 9);
break;
case 0:
tmpH = _lrotl(_lrotl(ctx->h[0], 7) + _lrotr(ctx->h[1], 2), 8)
+ _lrotr(_lrotl(ctx->h[2], 7) + ctx->h[3] , 9);
break;
}
*/
/*
A more straightforward way to implement s-box application
Included here for reference.
*/
/*
tmpL = ~(ctx->l[3]) | (ctx->l[2]) | (ctx->l[1]);
v1 = ~(ctx->l[0]);
s2 = v1 ^ tmpL;
t1 = (ctx->l[0]) ^ (~(ctx->l[3]) & (ctx->l[2]) & (ctx->l[1]));
u2 = t1 ^ tmpL;
tmpL = (ctx->l[3]) | (ctx->l[2]) | ~(ctx->l[0]);
s3 = (ctx->l[1]) ^ ((ctx->l[3]) & (ctx->l[2]) & ~(ctx->l[0]));
t0 = s3 ^ tmpL;
u3 = t0;
v0 = (ctx->l[1]) ^ ~tmpL;
tmpL = (ctx->l[3]) | (ctx->l[1]) | (ctx->l[0]);
v3 = ~(ctx->l[2]);
u1 = (ctx->l[2]) ^ tmpL;
t3 = ~u1;
s1 = t3 ^ (ctx->l[3]) & (ctx->l[1]) & (ctx->l[0]);
tmpL = (ctx->l[3]) ^ ((ctx->l[2]) & ~(ctx->l[1]) & (ctx->l[0]));
s0 = ~tmpL;
t2 = tmpL ^ ((ctx->l[2]) | ~(ctx->l[1]) | (ctx->l[0]));
u0 = ~t2;
v2 = (ctx->l[3]) ^ s0 ^ t2;
ctx->l[3] = 0xff & (((pL1) & (pL0) & s3) ^ ((pL1) & (~pL0) & t3) ^ ((~pL1) & (pL0) & u3) ^ ((~pL1) & (~pL0) & v3));
ctx->l[2] = 0xff & (((pL1) & (pL0) & s2) ^ ((pL1) & (~pL0) & t2) ^ ((~pL1) & (pL0) & u2) ^ ((~pL1) & (~pL0) & v2));
ctx->l[1] = 0xff & (((pL1) & (pL0) & s1) ^ ((pL1) & (~pL0) & t1) ^ ((~pL1) & (pL0) & u1) ^ ((~pL1) & (~pL0) & v1));
ctx->l[0] = 0xff & (((pL1) & (pL0) & s0) ^ ((pL1) & (~pL0) & t0) ^ ((~pL1) & (pL0) & u0) ^ ((~pL1) & (~pL0) & v0));
tmpH = ~(ctx->h[3]) | (ctx->h[2]) | (ctx->h[1]);
v1 = ~(ctx->h[0]);
s2 = v1 ^ tmpH;
t1 = (ctx->h[0]) ^ (~(ctx->h[3]) & (ctx->h[2]) & (ctx->h[1]));
u2 = t1 ^ tmpH;
tmpH = (ctx->h[3]) | (ctx->h[2]) | ~(ctx->h[0]);
s3 = (ctx->h[1]) ^ ((ctx->h[3]) & (ctx->h[2]) & ~(ctx->h[0]));
t0 = s3 ^ tmpH;
u3 = t0;
v0 = (ctx->h[1]) ^ ~tmpH;
tmpH = (ctx->h[3]) | (ctx->h[1]) | (ctx->h[0]);
v3 = ~(ctx->h[2]);
u1 = (ctx->h[2]) ^ tmpH;
t3 = ~u1;
s1 = t3 ^ (ctx->h[3]) & (ctx->h[1]) & (ctx->h[0]);
tmpH = (ctx->h[3]) ^ ((ctx->h[2]) & ~(ctx->h[1]) & (ctx->h[0]));
s0 = ~tmpH;
t2 = tmpH ^ ((ctx->h[2]) | ~(ctx->h[1]) | (ctx->h[0]));
u0 = ~t2;
v2 = (ctx->h[3]) ^ s0 ^ t2;
ctx->h[3] = ((pH1) & (pH0) & s3) ^ ((pH1) & (~pH0) & t3) ^ ((~pH1) & (pH0) & u3) ^ ((~pH1) & (~pH0) & v3);
ctx->h[2] = ((pH1) & (pH0) & s2) ^ ((pH1) & (~pH0) & t2) ^ ((~pH1) & (pH0) & u2) ^ ((~pH1) & (~pH0) & v2);
ctx->h[1] = ((pH1) & (pH0) & s1) ^ ((pH1) & (~pH0) & t1) ^ ((~pH1) & (pH0) & u1) ^ ((~pH1) & (~pH0) & v1);
ctx->h[0] = ((pH1) & (pH0) & s0) ^ ((pH1) & (~pH0) & t0) ^ ((~pH1) & (pH0) & u0) ^ ((~pH1) & (~pH0) & v0);
*/
|
eSTREAM Project Powered by ViewCVS 1.0-dev |
ViewCVS and CVS Help |