/* aes256.h*/ /* Hongjun Wu, January 2007*/ #include "ecrypt-sync.h" #include "aes-table.h" #include #define first_round(ctx,x0,y0) { \ u32 z0,t0,tem0; \ z0 = (x0) ^ ctx->round_key[0][0]; \ t0 = (u8) z0; \ tem0 = T0[t0]; \ (y0) = tem0 ^ ctx->first_round_output_x0; \ } #define second_round(ctx,x0,y0,y1,y2,y3) { \ u32 t0,t7,t10,t13; \ u32 tem0,tem7,tem10,tem13; \ t0 = (u8)(x0); \ tem0 = T0[t0]; \ (y0) = tem0 ^ ctx->second_round_output[0]; \ t7 = (u8)((x0)>>24); \ tem7 = T3[t7]; \ (y1) = tem7 ^ ctx->second_round_output[1]; \ t10 = (u8)((x0)>>16); \ tem10 = T2[t10]; \ (y2) = tem10 ^ ctx->second_round_output[2]; \ t13 = (u8)((x0)>>8); \ tem13 = T1[t13];\ (y3) = tem13 ^ ctx->second_round_output[3]; \ } #define round(ctx,x0,x1,x2,x3,y0,y1,y2,y3,r) { \ u32 t0,t1,t2,t3; \ u32 t4,t5,t6,t7; \ u32 t8,t9,t10,t11; \ u32 t12,t13,t14,t15;\ u32 tem0,tem1,tem2,tem3; \ u32 tem4,tem5,tem6,tem7; \ u32 tem8,tem9,tem10,tem11; \ u32 tem12,tem13,tem14,tem15;\ \ t0 = (u8)(x0); \ tem0 = T0[t0]; \ t1 = (u8)((x1)>>8); \ tem1 = tem0 ^ T1[t1]; \ t2 = (u8)((x2)>>16); \ tem2 = tem1 ^ T2[t2]; \ t3 = (u8)((x3)>>24); \ tem3 = tem2 ^ T3[t3]; \ (y0) = tem3 ^ ctx->round_key[r][0]; \ \ t4 = (u8)(x1); \ tem4 = T0[t4]; \ t5 = (u8)((x2)>>8); \ tem5 = tem4 ^ T1[t5]; \ t6 = (u8)((x3)>>16); \ tem6 = tem5 ^ T2[t6]; \ t7 = (u8)((x0)>>24); \ tem7 = tem6 ^ T3[t7]; \ (y1) = tem7 ^ ctx->round_key[r][1]; \ \ t8 = (u8)(x2); \ tem8 = T0[t8]; \ t9 = (u8)((x3)>>8); \ tem9 = tem8 ^ T1[t9]; \ t10 = (u8)((x0)>>16); \ tem10 = tem9 ^ T2[t10]; \ t11 = (u8)((x1)>>24); \ tem11 = tem10 ^ T3[t11];\ (y2) = tem11 ^ ctx->round_key[r][2]; \ \ t12 = (u8)(x3); \ tem12 = T0[t12]; \ t13 = (u8)((x0)>>8); \ tem13 = tem12 ^ T1[t13];\ t14 = (u8)((x1)>>16); \ tem14 = tem13 ^ T2[t14];\ t15 = (u8)((x2)>>24); \ tem15 = tem14 ^ T3[t15];\ (y3) = tem15 ^ ctx->round_key[r][3]; \ } /* 22.14 cycles/byte*/ #define last_round(ctx,x0,x1,x2,x3,output,r) { \ u32 t0,t1,t2,t3; \ u32 t4,t5,t6,t7; \ u32 t8,t9,t10,t11; \ u32 t12,t13,t14,t15;\ \ t0 = (u8)(x0); \ output[0] = Sbox[t0]; \ t7 = (u8)((x0)>>24); \ output[7] = Sbox[t7]; \ t10 = (u8)((x0)>>16); \ output[10] = Sbox[t10]; \ t13 = (u8)((x0)>>8); \ output[13] = Sbox[t13]; \ \ t1 = (u8)((x1)>>8); \ output[1] = Sbox[t1]; \ t4 = (u8)(x1); \ output[4] = Sbox[t4]; \ t11 = (u8)((x1)>>24); \ output[11] = Sbox[t11]; \ t14 = (u8)((x1)>>16); \ output[14] = Sbox[t14]; \ \ t2 = (u8)((x2)>>16); \ output[2] = Sbox[t2]; \ t5 = (u8)((x2)>>8); \ output[5] = Sbox[t5]; \ t8 = (u8)(x2); \ output[8] = Sbox[t8]; \ t15 = (u8)((x2)>>24); \ output[15] = Sbox[t15]; \ \ t3 = (u8)((x3)>>24); \ output[3] = Sbox[t3]; \ t6 = (u8)((x3)>>16); \ output[6] = Sbox[t6]; \ t9 = (u8)((x3)>>8); \ output[9] = Sbox[t9]; \ t12 = (u8)(x3); \ output[12] = Sbox[t12]; \ } /* ((u32*)output)[0] ^= ctx->round_key[r][0]; \ ((u32*)output)[1] ^= ctx->round_key[r][1]; \ ((u32*)output)[2] ^= ctx->round_key[r][2]; \ ((u32*)output)[3] ^= ctx->round_key[r][3]; \ } */ void aes256_enc_block(u32* x,u8* output, ECRYPT_ctx* ctx) { u32 y0,y1,y2,y3; u32 z0,z1,z2,z3; u32 a0,a1,a2,a3; u32 b0,b1,b2,b3; first_round(ctx,x[0],y0); second_round(ctx,y0,z0,z1,z2,z3); round(ctx,z0,z1,z2,z3,a0,a1,a2,a3,3); round(ctx,a0,a1,a2,a3,b0,b1,b2,b3,4); round(ctx,b0,b1,b2,b3,y0,y1,y2,y3,5); round(ctx,y0,y1,y2,y3,z0,z1,z2,z3,6); round(ctx,z0,z1,z2,z3,a0,a1,a2,a3,7); round(ctx,a0,a1,a2,a3,b0,b1,b2,b3,8); round(ctx,b0,b1,b2,b3,y0,y1,y2,y3,9); round(ctx,y0,y1,y2,y3,z0,z1,z2,z3,10); round(ctx,z0,z1,z2,z3,a0,a1,a2,a3,11); round(ctx,a0,a1,a2,a3,b0,b1,b2,b3,12); round(ctx,b0,b1,b2,b3,y0,y1,y2,y3,13); last_round(ctx,y0,y1,y2,y3,output,14); } /*compute the intermediate values for the first two rounds*/ void partial_precompute_tworounds(ECRYPT_ctx* ctx) { u32 x0,x1,x2,x3,y0,y1,y2,y3; x0 = ctx->counter[0] ^ ctx->round_key[0][0]; x1 = ctx->counter[1] ^ ctx->round_key[0][1]; x2 = ctx->counter[2] ^ ctx->round_key[0][2]; x3 = ctx->counter[3] ^ ctx->round_key[0][3]; x0 &= 0xffffff00; round(ctx,x0,x1,x2,x3,y0,y1,y2,y3,1); ctx->first_round_output_x0 = y0 ^ T0[0]; y0 = 0; round(ctx,y0,y1,y2,y3,x0,x1,x2,x3,2); ctx->second_round_output[0] = x0 ^ T0[0]; ctx->second_round_output[1] = x1 ^ T3[0]; ctx->second_round_output[2] = x2 ^ T2[0]; ctx->second_round_output[3] = x3 ^ T1[0]; }