* imported Hongjun's optimized C-code for AES-128 and AES-256.
/* aes256.h*/
/* Hongjun Wu, January 2007*/
#include "ecrypt-sync.h"
#include "aes-table.h"
#include <stdio.h>
#define first_round(ctx,x0,y0) { \
u32 z0,t0,tem0; \
z0 = (x0) ^ ctx->round_key[0][0]; \
t0 = (u8) z0; \
tem0 = T0[t0]; \
(y0) = tem0 ^ ctx->first_round_output_x0; \
}
#define second_round(ctx,x0,y0,y1,y2,y3) { \
u32 t0,t7,t10,t13; \
u32 tem0,tem7,tem10,tem13; \
t0 = (u8)(x0); \
tem0 = T0[t0]; \
(y0) = tem0 ^ ctx->second_round_output[0]; \
t7 = (u8)((x0)>>24); \
tem7 = T3[t7]; \
(y1) = tem7 ^ ctx->second_round_output[1]; \
t10 = (u8)((x0)>>16); \
tem10 = T2[t10]; \
(y2) = tem10 ^ ctx->second_round_output[2]; \
t13 = (u8)((x0)>>8); \
tem13 = T1[t13];\
(y3) = tem13 ^ ctx->second_round_output[3]; \
}
#define round(ctx,x0,x1,x2,x3,y0,y1,y2,y3,r) { \
u32 t0,t1,t2,t3; \
u32 t4,t5,t6,t7; \
u32 t8,t9,t10,t11; \
u32 t12,t13,t14,t15;\
u32 tem0,tem1,tem2,tem3; \
u32 tem4,tem5,tem6,tem7; \
u32 tem8,tem9,tem10,tem11; \
u32 tem12,tem13,tem14,tem15;\
\
t0 = (u8)(x0); \
tem0 = T0[t0]; \
t1 = (u8)((x1)>>8); \
tem1 = tem0 ^ T1[t1]; \
t2 = (u8)((x2)>>16); \
tem2 = tem1 ^ T2[t2]; \
t3 = (u8)((x3)>>24); \
tem3 = tem2 ^ T3[t3]; \
(y0) = tem3 ^ ctx->round_key[r][0]; \
\
t4 = (u8)(x1); \
tem4 = T0[t4]; \
t5 = (u8)((x2)>>8); \
tem5 = tem4 ^ T1[t5]; \
t6 = (u8)((x3)>>16); \
tem6 = tem5 ^ T2[t6]; \
t7 = (u8)((x0)>>24); \
tem7 = tem6 ^ T3[t7]; \
(y1) = tem7 ^ ctx->round_key[r][1]; \
\
t8 = (u8)(x2); \
tem8 = T0[t8]; \
t9 = (u8)((x3)>>8); \
tem9 = tem8 ^ T1[t9]; \
t10 = (u8)((x0)>>16); \
tem10 = tem9 ^ T2[t10]; \
t11 = (u8)((x1)>>24); \
tem11 = tem10 ^ T3[t11];\
(y2) = tem11 ^ ctx->round_key[r][2]; \
\
t12 = (u8)(x3); \
tem12 = T0[t12]; \
t13 = (u8)((x0)>>8); \
tem13 = tem12 ^ T1[t13];\
t14 = (u8)((x1)>>16); \
tem14 = tem13 ^ T2[t14];\
t15 = (u8)((x2)>>24); \
tem15 = tem14 ^ T3[t15];\
(y3) = tem15 ^ ctx->round_key[r][3]; \
}
/* 22.14 cycles/byte*/
#define last_round(ctx,x0,x1,x2,x3,output,r) { \
u32 t0,t1,t2,t3; \
u32 t4,t5,t6,t7; \
u32 t8,t9,t10,t11; \
u32 t12,t13,t14,t15;\
\
t0 = (u8)(x0); \
output[0] = Sbox[t0]; \
t7 = (u8)((x0)>>24); \
output[7] = Sbox[t7]; \
t10 = (u8)((x0)>>16); \
output[10] = Sbox[t10]; \
t13 = (u8)((x0)>>8); \
output[13] = Sbox[t13]; \
\
t1 = (u8)((x1)>>8); \
output[1] = Sbox[t1]; \
t4 = (u8)(x1); \
output[4] = Sbox[t4]; \
t11 = (u8)((x1)>>24); \
output[11] = Sbox[t11]; \
t14 = (u8)((x1)>>16); \
output[14] = Sbox[t14]; \
\
t2 = (u8)((x2)>>16); \
output[2] = Sbox[t2]; \
t5 = (u8)((x2)>>8); \
output[5] = Sbox[t5]; \
t8 = (u8)(x2); \
output[8] = Sbox[t8]; \
t15 = (u8)((x2)>>24); \
output[15] = Sbox[t15]; \
\
t3 = (u8)((x3)>>24); \
output[3] = Sbox[t3]; \
t6 = (u8)((x3)>>16); \
output[6] = Sbox[t6]; \
t9 = (u8)((x3)>>8); \
output[9] = Sbox[t9]; \
t12 = (u8)(x3); \
output[12] = Sbox[t12]; \
}
/*
((u32*)output)[0] ^= ctx->round_key[r][0]; \
((u32*)output)[1] ^= ctx->round_key[r][1]; \
((u32*)output)[2] ^= ctx->round_key[r][2]; \
((u32*)output)[3] ^= ctx->round_key[r][3]; \
}
*/
void aes256_enc_block(u32* x,u8* output, ECRYPT_ctx* ctx) {
u32 y0,y1,y2,y3;
u32 z0,z1,z2,z3;
u32 a0,a1,a2,a3;
u32 b0,b1,b2,b3;
first_round(ctx,x[0],y0);
second_round(ctx,y0,z0,z1,z2,z3);
round(ctx,z0,z1,z2,z3,a0,a1,a2,a3,3);
round(ctx,a0,a1,a2,a3,b0,b1,b2,b3,4);
round(ctx,b0,b1,b2,b3,y0,y1,y2,y3,5);
round(ctx,y0,y1,y2,y3,z0,z1,z2,z3,6);
round(ctx,z0,z1,z2,z3,a0,a1,a2,a3,7);
round(ctx,a0,a1,a2,a3,b0,b1,b2,b3,8);
round(ctx,b0,b1,b2,b3,y0,y1,y2,y3,9);
round(ctx,y0,y1,y2,y3,z0,z1,z2,z3,10);
round(ctx,z0,z1,z2,z3,a0,a1,a2,a3,11);
round(ctx,a0,a1,a2,a3,b0,b1,b2,b3,12);
round(ctx,b0,b1,b2,b3,y0,y1,y2,y3,13);
last_round(ctx,y0,y1,y2,y3,output,14);
}
/*compute the intermediate values for the first two rounds*/
void partial_precompute_tworounds(ECRYPT_ctx* ctx)
{
u32 x0,x1,x2,x3,y0,y1,y2,y3;
x0 = ctx->counter[0] ^ ctx->round_key[0][0];
x1 = ctx->counter[1] ^ ctx->round_key[0][1];
x2 = ctx->counter[2] ^ ctx->round_key[0][2];
x3 = ctx->counter[3] ^ ctx->round_key[0][3];
x0 &= 0xffffff00;
round(ctx,x0,x1,x2,x3,y0,y1,y2,y3,1);
ctx->first_round_output_x0 = y0 ^ T0[0];
y0 = 0;
round(ctx,y0,y1,y2,y3,x0,x1,x2,x3,2);
ctx->second_round_output[0] = x0 ^ T0[0];
ctx->second_round_output[1] = x1 ^ T3[0];
ctx->second_round_output[2] = x2 ^ T2[0];
ctx->second_round_output[3] = x3 ^ T1[0];
}
|
eSTREAM Project Powered by ViewCVS 1.0-dev |
ViewCVS and CVS Help |