* included i386 assembly implementation.
/*
* ----------------------------------------------------------------
* Phelix encryption/authentication algorithm
* Author: Doug Whiting, Hifn. 2005.
*
* This source code is released to the public domain
* ----------------------------------------------------------------
*/
#ifndef __i386__
#error architecture is not supported
#endif
.file "phelix.S"
.text
.align 4
#include "strucmac.S" /* structured programming macros */
.set PHELIX_INCREMENTAL_API,1 /* comment this out to exclude incremental calls */
/* concatenate text together (useful in building names inside macros) */
.macro strCat aa,bb,cc,dd,ee,ff,gg,hh
\aa\bb\cc\dd\ee\ff\gg\hh
.endm
/* ---------------------------------------------------------------- */
/* define a global label. Handle linking with and without underscore */
.macro C_global phelixName,ecryptName
#ifdef MIX_ASM
strCat ".global ",\phelixName,"_ASM"
strCat ".global _",\phelixName,"_ASM"
strCat " ",\phelixName,"_ASM:"
strCat "_",\phelixName,"_ASM:"
#else
.global \phelixName
.global _\phelixName
#endif
\phelixName:
_\phelixName:
#ifdef ECRYPT_API
.ifnc \ecryptName,
.global \ecryptName
.global _\ecryptName
\ecryptName:
_\ecryptName:
.endif
#endif
.endm
/* ---------------------------------------------------------------- */
C_global _debugPhelix_
.long 0 /* ignored here, but must be defined for testPhelix.c */
AsmName: .ascii "gnu.as\0"
.align 4
C_global PhelixCompiler_Name /* show who assembled us */
lea AsmName,%eax
C_Global PhelixInit,ECRYPT_init /* Init call does nothing */
ret
/* */
/* ---------------------------------------------------------------- */
/* Macros and definitions */
/* ---------------------------------------------------------------- */
/* */
/* Phelix rotation constants */
.set ROT_0a, 9
.set ROT_1a, 10
.set ROT_2a, 17
.set ROT_3a, 30
.set ROT_4a, 13
.set ROT_0b, 20
.set ROT_1b, 11
.set ROT_2b, 5
.set ROT_3b, 15
.set ROT_4b, 25
.set UNROLL_CNT, 8 /* how many blocks to unroll in inner loop */
.set ZERO_INIT_CNT, 8 /* number of words of init */
.set MAGIC_MAC_XOR, 0x912d94f1 /* special constants */
.set MAGIC_AAD_XOR, 0xaadaadaa
/* */
/* ----- register assignments */
/* Z0 equ eax */
/* Z1 equ ebx */
/* Z2 equ ecx */
/* Z3 equ edx */
/* Z4 equ esi */
/* t0 equ ebp #"temp" scratch registers */
/* t1 equ edi */
/* oldZreg equ Z4 */
/* */
/* ---------------------------------------------------------------- */
/* */
/* Allocate and define local variables on the stack */
/* [Note: We use esp for locals, not ebp, since we need ebp as a variable. */
/* Thus, we can't use the assembler stack frame primitives.] */
/* */
.set _maxLocalSize_ ,0 /* max locals usage in bytes */
.set _Phelix_LocalSize ,0 /* starting value: no locals allocated yet */
.set _SO_ ,0 /* current stack offset due to calls */
/* */
.macro _newLocal wCnt,lName /* macro to define a local variable */
.set \lName ,_Phelix_LocalSize
.set _Phelix_LocalSize,_Phelix_LocalSize+4*(\wCnt)
/* keep running tabs on stack usage for locals */
.if _maxLocalSize_<_Phelix_LocalSize
.set _maxLocalSize_,_Phelix_LocalSize
.endif
.endm
/* */
.macro _newParm wCnt,_pp_
.set \_pp_, _pOfs_
strCat ".set ",\_pp_,_LCL,",",(_pOfs_-_cpOfs_)
.set _pOfs_,_pOfs_+4*(\wCnt)
.endm
/* */
/* now define local variables for the Encrypt/Decrypt functions */
_newLocal 1,srcPtr /* pointer to input data buffer */
_newLocal 1,dstPtr /* pointer to output data buffer */
_newLocal 1,loopByteCnt /* inner loop byte counter */
_newLocal 1,jmpTabPtr /* pointer to encrypt/decrypt jump table */
_newLocal 8,X_i_0 /* local copy of the key values */
_newLocal 8,X_i_1
_newLocal 4,oldZ /* "old" Z values */
_newLocal 1,_i_ /* block number (+8) */
_newLocal UNROLL_CNT ,exitTab/* local jump table for exiting unrolled loop */
_newLocal UNROLL_CNT+4,tmpBuf /* local buffer encryption/decryption blocks */
_newLocal 1,aadLeft /* # bytes of aad remaining */
_newLocal 1,msgLen0 /* initial value of src_ByteCnt */
_newLocal 1,dstPtr0 /* initial dst pointer */
_newLocal 1,retAddr /* local "return" address */
.set _cpOfs_,4+8*4+_Phelix_LocalSize /* caller parms offset from esp */
.set retAddr_LCL,retAddr-_cpOfs_
.set dstPtr0_LCL,dstPtr0-_cpOfs_
.set msgLen0_LCL,msgLen0-_cpOfs_
.set tmpBuf_LCL, tmpBuf-_cpOfs_
/* */
/* ---------------------------------------------------------------- */
/* Define caller's parameters on the stack, relative to esp */
/* */
.set _pOfs_,_cpOfs_
_newParm 0,callerParms /* placeholder, no space allocated */
_newParm 1,ctxt_Ptr
_newParm 1,nonce_Ptr
_newParm 1,aad_Ptr
_newParm 1,aad_Len
_newParm 1,src_Ptr
_newParm 1,dst_Ptr
_newParm 1,src_ByteCnt
_newParm 1,mac_Ptr
/* */
/* ---------------------------------------------------------------- */
/* Phelix context structure definition */
.set _pOfs_,0
_newParm 1,keySize /* size of raw key in bits */
_newParm 1,macSize /* size of mac tag in bits */
_newParm 1,X_1_Bump /* 4*(keySize/8) + 256*(macSize mod 128) */
_newParm 8,X_0 /* subkeys */
_newParm 8,X_1 /* subkeys */
/* internal cipher state */
_newParm 4,old_Z /* previous Z[4] values for output */
_newParm 5,_Z_ /* 5 internal state words */
_newParm 1,blkNum /* block number (i) */
_newParm 2,aadLen /* 64-bit aadLen counter (LSW first) */
_newParm 1,msgLen /* 32-bit msgLen counter (mod 2**32) */
_newParm 1,aadXor /* aad Xor constant */
/* */
/* ---------------------------------------------------------------- */
/* */
.macro _o_ op1,op2,op3,cond3 /* shorthand: instantiate 1-3 opcodes */
\op1
\op2
\op3
\cond3
.endm
/* ---------------------------------------------------------------- */
/* adjust _SO_ with push/pop operations */
.macro _stackOp op,reg,bump
.ifnc \reg, /* only do something if reg is not blank */
\op %\reg
.set _SO_,_SO_+\bump
.endif
.endm
.macro _push r0,r1,r2,r3,r4,r5,r6
_stackOp push,\r0,4
_stackOp push,\r1,4
_stackOp push,\r2,4
_stackOp push,\r3,4
_stackOp push,\r4,4
_stackOp push,\r5,4
_stackOp push,\r6,4
.endm
/* */
.macro _pop r0,r1,r2,r3,r4,r5,r6
_stackOp pop,\r0,-4
_stackOp pop,\r1,-4
_stackOp pop,\r2,-4
_stackOp pop,\r3,-4
_stackOp pop,\r4,-4
_stackOp pop,\r5,-4
_stackOp pop,\r6,-4
.endm
/* */
/* ---------------------------------------------------------------- */
/* Init code, jump tables (for lblName = Encrypt/Decrypt) */
/* ---------------------------------------------------------------- */
/* */
.macro PhelixAlgo lblName
/* first, set up the stack frame */
pushal /* save all regs on stack */
strCat "lea ",\lblName,"_jmpTab,%ebp" /* handle the encrypt/decrypt difference */
jmp Phelix_Main /* go run the algorithm */
/* */
/* the jump table for this operation */
/* */
.align 4
strCat \lblName,"_jmpTab:"
/* first, a list of "block boundary" targets within unrolled processing loop */
.irp xxx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
.if \xxx < UNROLL_CNT
strCat " .long \lblName","Blk_",\xxx
strCat " .global \lblName","Blk_",\xxx
.endif
.endr
/* next, successive "control" targets within Phelix_Main */
strCat ".set OddBytes_OFFS,","(.-\lblName","_jmpTab)"
strCat ".long \lblName","_OddBytes"
.endm /* PhelixAlgo */
/* */
/* ---------------------------------------------------------------- */
/* Common unrolled loop end code for encrypt/decrypt */
/* ---------------------------------------------------------------- */
/* */
.macro PhelixEndLoop CNT
addl $(\CNT)*4,srcPtr(%esp) /* bump the pointers */
addl $(\CNT)*4,dstPtr(%esp)
addl $(\CNT) ,_i_ (%esp) /* bump the count */
subl $(\CNT)*4,loopByteCnt(%esp) /* are we done yet? */
.endm /* leave here with flags set for loop jmp */
/* */
/* ---------------------------------------------------------------- */
/* Common "early exit" code for encrypt/decrypt inner loop */
/* ---------------------------------------------------------------- */
/* This functionality is required for splicing AAD/text/padding */
/* */
.macro PhelixEarlyExit jTabReg,_bn_
.if \_bn_ < (UNROLL_CNT-1) /* don't need early exit at bottom of loop */
testl %\jTabReg,%\jTabReg /* time to exit? */
_if nz
movl %esi,oldZ+4*((\_bn_) & 3)+_SO_(%esp)
jmp *%\jTabReg /* go to "exit" address */
_endif
.endif
movl %esi,oldZ+4*((\_bn_)& 3)+_SO_(%esp)
.endm
/* */
/* **************************************************************** */
/* start of actual code (i.e., end of macro definitions) */
/* **************************************************************** */
/* */
.align 4
INIT_ZEROES:
.rept ZERO_INIT_CNT
.long 0
.endr
MASK_TAB: .long 0,0xff,0xffff,0xffffff
_PhelixCodeStart_:
/* */
/* ---------------------------------------------------------------- */
/* Common control path for Encrypt/Decrypt */
/* ---------------------------------------------------------------- */
/* In: ebp --> (const) jump table (Encrypt_jmpTab or Decrypt_jmpTab) */
/* Out: everything done */
/* */
Phelix_Main:
/* point to callers first parameter (save code size below) */
leal callerParms-_Phelix_LocalSize(%esp),%esi
subl $_Phelix_LocalSize,%esp /* make room for locals on stack */
movl %ebp,jmpTabPtr(%esp) /* save jump table pointer */
call InitNonce
/* */
/* ################################################################ */
/* Finally ready to start running Phelix on some data */
/* ################################################################ */
/* First, process the initialization zeroes (loopByteCnt == 0 from PhelixInit) */
/* */
movl $_ret_InitZeroDone,exitTab+4*(ZERO_INIT_CNT-1)+_SO_(%esp)
jmp EncryptBlk_0
/* */
/* "local" function */
.set _SO_,4
InitNonce:
/* first, init the local keys on the stack */
movl ctxt_Ptr_LCL(%esi),%ebp /* point to context structure */
movl X_1_Bump(%ebp),%edi /* edi=4*(keySize/8)+256*(macSize mod 128) */
movl nonce_Ptr_LCL(%esi),%edx /* (const) pointer to nonce words */
_push esi /* save esi (push/pop = smaller than lea esi,callerParms) */
xor %esi,%esi /* use esi as the variable i in SetTwoKeys */
inc %esi /* start with i = 1, since edi = X'_1 = 4*L(U) already */
call SetTwoKeys /* set X_1_n, X_5_n, for n=0,1 [return w/edi == 0] */
call SetTwoKeys /* set X_2_n, X_6_n, for n=0,1 */
call SetTwoKeys /* set X_3_n, X_7_n, for n=0,1 */
xor %esi,%esi /* wrap to i = 0 */
call SetTwoKeys /* set X_0_n, X_4_n, for n=0,1 */
_pop esi /* restore pointer to callerParms */
/* set up for initialization phase */
xorl %ecx,%ecx
leal INIT_ZEROES,%ebp /* use all zero input words, for i= -8 .. -1 */
leal tmpBuf+_SO_(%esp),%edi /* discard output */
movl %ecx,loopByteCnt+_SO_(%esp) /* initialize loop byte count counter = 0 */
movl %ecx,_i_+_SO_(%esp) /* initialize i = 0 (block number + 8) */
movl %ebp,srcPtr+_SO_(%esp)
movl %edi,dstPtr+_SO_(%esp)
/* now initialize the Zn register values */
movl ctxt_Ptr_LCL(%esi),%ebp
movl nonce_Ptr_LCL(%esi),%edi
movl X_0+12(%ebp),%eax /* get the X_0 key values */
movl X_0+16(%ebp),%ebx
movl X_0+20(%ebp),%ecx
movl X_0+24(%ebp),%edx
movl X_0+28(%ebp),%esi
xorl (%edi),%eax /* merge in the nonce */
xorl 4(%edi),%ebx
xorl 8(%edi),%ecx
xorl 12(%edi),%edx
ret
.set _SO_,0
/* ########################################### */
/* done with the initial zeroes. */
_ret_InitZeroDone:
.if UNROLL_CNT > ZERO_INIT_CNT /* do we need to clear out the return point? */
xorl %ebp,%ebp /* (only if it's not already at the end) */
movl %ebp,exitTab+4*(ZERO_INIT_CNT-1)+_SO_(%esp)
.endif
/* ################ */
/* handle AAD here, looping if needed */
xorl $MAGIC_AAD_XOR,%ebx
movl aad_Len+_SO_(%esp),%ebp
testl %ebp,%ebp
_if nz /* if nothing there, skip all aad processing */
movl aad_Ptr+_SO_(%esp),%edi
movl %ebp,aadLeft+_SO_(%esp)
movl %edi, srcPtr+_SO_(%esp) /* src will come from aad_Ptr */
_aad_Loop: /* here with ebp == aad_Len */
leal tmpBuf+_SO_(%esp),%edi /* always use tmpBuf for aad dst (discard) */
movl %edi,dstPtr+_SO_(%esp)
movl aadLeft+_SO_(%esp),%ebp
subl $4*UNROLL_CNT,%ebp /* only do one unrolled loop each time */
_if ae /* (since we use tmpBuf to discard ciphertext) */
movl %ebp,aadLeft+_SO_(%esp)
xorl %edi,%edi
movl %edi,loopByteCnt+_SO_(%esp)
movl $_aad_Loop,exitTab+4*(UNROLL_CNT-1)+_SO_(%esp)
jmp EncryptBlk_0
_endif
/* here to handle final partial loop */
_aad_PartialLoop:
andl $4*(UNROLL_CNT-1),%ebp
movl %ebp,loopByteCnt+_SO_(%esp)
cmpl $4,%ebp
_if ae
movl $_ret_aad_1,exitTab-4+_SO_(%esp,%ebp)
jmp EncryptBlk_0
_ret_aad_1:
movl loopByteCnt+_SO_(%esp),%ebp
xorl %edi,%edi
movl %edi,exitTab-4+_SO_(%esp,%ebp) /* clear the entry */
_endif
/* here to handle final partial word of AAD */
movl aadLeft+_SO_(%esp),%ebp
movl %ebp,%edi
andl $3,%edi /* any odd bytes? */
_ifbrk z /* if not, we're done with AAD */
addl $4,%ebp
andl $4*(UNROLL_CNT-1),%ebp
movl %ebp,loopByteCnt+_SO_(%esp)
_push esi
subl $4,%ebp
andl $4*(UNROLL_CNT-1),%ebp
movl srcPtr+_SO_(%esp),%esi
movl (%esi,%ebp),%esi /* get the last AAD word */
andl MASK_TAB(,%edi,4),%esi /* clear out extra bits */
leal tmpBuf+_SO_(%esp),%edi
movl %esi,(%edi)
subl %ebp,%edi
movl %edi,dstPtr+_SO_(%esp)
movl %edi,srcPtr+_SO_(%esp)
movl $_ret_aad_2,exitTab+_SO_(%esp,%ebp)
movl %ebp,tmpBuf+4+_SO_(%esp)/* save this */
_pop esi
jmp *Encrypt_jmpTab(%ebp)
_ret_aad_2:
movl tmpBuf+4+_SO_(%esp),%ebp
xorl %edi,%edi
movl %edi,exitTab+_SO_(%esp,%ebp)
_endif
xorl $MAGIC_AAD_XOR,%ebx
/* ################ */
/* process the user data */
_startUserData:
_push esi /* use esi as temp pointer */
leal callerParms+_SO_(%esp),%esi /* (to save code size in accessing caller parms below) */
leal _ret_MAC0,%ebp
movl %ebp,retAddr_LCL(%esi)
movl src_Ptr_LCL(%esi),%ebp
movl %ebp,srcPtr+_SO_(%esp)
movl dst_Ptr_LCL(%esi),%edi
movl src_ByteCnt_LCL(%esi),%ebp
/* enter here from EncryptBytes */
processUserData:
movl %edi,dstPtr+_SO_(%esp)
movl %edi,dstPtr0_LCL(%esi)
movl %ebp,msgLen0_LCL(%esi)
_pop esi /* restore esi */
movl loopByteCnt+_SO_(%esp),%edi
andl $4*(UNROLL_CNT-1),%edi /* get the loop "phase" */
subl %edi,dstPtr+_SO_(%esp) /* adjust pointers accordingly */
subl %edi,srcPtr+_SO_(%esp)
/* ################ */
/* now process the bulk of the data in "full" loop chunks (ebp = src_ByteCnt) */
addl %edi,%ebp
subl $UNROLL_CNT*4,%ebp /* enough for one "full" loop? */
movl %ebp,loopByteCnt+_SO_(%esp) /* save the pre-subtracted value for use in the loop */
_if ae
add jmpTabPtr+_SO_(%esp),%edi /* get ready to jump into block processing */
movl $_ret_DataDone1,exitTab+4*(UNROLL_CNT-1)+_SO_(%esp)
jmp *(%edi) /* go encrypt or decrypt */
_ret_DataDone1:
movl loopByteCnt+_SO_(%esp),%ebp /* restore ebp = loopByteCnt */
xorl %edi,%edi /* starting phase is at ??crypt_0 now */
_endif
/* ################ */
/* now process the remainder of the data, if any (partial loop) */
andl $4*(UNROLL_CNT-1),%ebp /* compute ebp = end phase */
cmpl %edi,%ebp /* any partial loop to do? */
_if nz
movl %ebp,loopByteCnt+_SO_(%esp) /* make sure that the exit loop test falls thru */
addl jmpTabPtr+_SO_(%esp),%edi /* get ready to jump */
movl $_ret_DataDone2,exitTab-4+_SO_(%esp,%ebp) /* force an exit at the correct point */
jmp *(%edi)
_ret_DataDone2:
xorl %edi,%edi /* edi = 0 */
movl loopByteCnt+_SO_(%esp),%ebp
andl $4*(UNROLL_CNT-1),%ebp /* recompute exitTab index */
movl %edi,exitTab-4+_SO_(%esp,%ebp) /* clear the exitTab entry */
_endif
/* ################ */
/* special (i.e. UGLY!!) handling when src_ByteCnt isn't a multiple of 4 */
/* here with ebp = loopByteCnt AND 4*(UNROLL_CNT-1) */
movl msgLen0+_SO_(%esp),%edi /* get original msgLen */
andl $3,%edi /* any partial words? (hopefully rare) */
_if nz
movl $_ret_OddBytes,exitTab+_SO_(%esp,%ebp)
orl %ebp,%edi /* save word index and odd byte count */
movl %edi,loopByteCnt+_SO_(%esp) /* back into loopByteCnt */
_push esi
andl $3,%edi
movl srcPtr+_SO_(%esp),%esi
addl %ebp,%esi
_push ebp
movl MASK_TAB(,%edi,4),%edi /* get the mask bits */
movl (%esi),%ebp /* and get the source word */
leal tmpBuf+_SO_(%esp),%esi
andl %edi,%ebp /* ebp = masked source word */
movl %edi,8(%esi) /* save the mask bits (for use in Decrypt_OddBytes) */
movl %ebp, (%esi) /* save the masked source word */
_pop ebp
subl %ebp,%esi /* adjust src/dst ptrs for hard coded offsets in block code */
movl %esi,srcPtr+_SO_(%esp) /* set up for "single-word" encrypt in tmpBuf[] */
addl $4,%esi
movl %esi,dstPtr+_SO_(%esp)
mov jmpTabPtr+_SO_(%esp),%edi /* dispatch to different handler for Encrypt & Decrypt */
_pop esi
jmp *OddBytes_OFFS(%edi)
/* */
/* here to handle the odd-byte encrypt case */
Encrypt_OddBytes:
jmp *Encrypt_jmpTab(%ebp) /* go encrypt the single word */
/* */
/* here to handle the funky odd-byte decrypt case */
Decrypt_OddBytes:
/* we have to encrypt halfway thru the block to compute keystream :-(( */
/* (i.e., in order to produce the "full" ciphertext word) */
_push eax,ebx,ecx,edx,esi,ebp
_o_ "addl %edx,%eax","roll $ROT_3b,%edx","mov X_i_0+_SO_(%esp,%ebp),%ebp" /* get the key word */
_o_ "addl %esi,%ebx","roll $ROT_4b,%esi"
_o_ "xorl %eax,%ecx","roll $ROT_0a,%eax"
_o_ "xorl %ebx,%edx" ,"add %edx,%ebp"
_o_ "addl %ecx,%esi","roll $ROT_2a,%ecx","mov loopByteCnt+_SO_(%esp),%edi"
_o_ "xorl %ebp,%eax","roll $ROT_4a,%esi","and $4*3,%edi"
_o_ "addl %eax,%ecx" ,"mov oldZ+_SO_(%esp,%edi),%ebp"
_o_ "xorl %ecx,%esi"
addl %esi,%ebp /* now ebp = keystream */
movl tmpBuf+8+_SO_(%esp),%edi /* get the mask word */
notl %edi /* toggle the maskbits */
andl %ebp,%edi /* mask off unused maskbits */
xorl %edi,tmpBuf+_SO_(%esp) /* re-create the "full" ciphertext word @ tmp src buffer */
_pop ebp,esi,edx,ecx,ebx,eax
jmp *Decrypt_jmpTab(%ebp) /* go decrypt */
/* "return" here with the dest word computed at [tmpBuf+4] */
_ret_OddBytes:
_push esi,eax
leal callerParms+_SO_(%esp),%esi
xorl %edi,%edi
movl loopByteCnt+_SO_(%esp),%ebp
andl $4*(UNROLL_CNT-1),%ebp
movl %edi,exitTab+_SO_(%esp,%ebp) /* clear out the exitTab entry we just used */
movl msgLen0+_SO_(%esp),%edi /* now output just the number of dst bytes specified */
movl %edi,%ebp
andl $3,%ebp
xorl %ebp,%edi /* clear low 2 bits of count */
addl dstPtr0_LCL(%esi),%edi /* point to "final" word offset */
movl tmpBuf_LCL+4(%esi),%eax /* get the dst output word (short offset) */
xorl (%edi),%eax /* do bit diddling to output just the odd bytes */
andl MASK_TAB(,%ebp,4),%eax
xorl %eax,(%edi)
_pop eax,esi
_endif
jmp *retAddr+_SO_(%esp) /* "return" to whomever */
_ret_MAC0:
/* ################ */
/* here to compute and output/compare the MAC */
movl mac_Ptr+_SO_(%esp),%ebp
xorl aad_Len+_SO_(%esp),%esi
processMAC:
movl %ebp,dstPtr0+_SO_(%esp) /* save MAC ptr */
xorl $MAGIC_MAC_XOR,%eax /* toggle bits to start the MAC */
_push esi
movl loopByteCnt+_SO_(%esp),%ebp
movl %ebp,%edi
addl $3,%ebp /* advance to next full word, if odd bytes */
andl $4*(UNROLL_CNT-1),%ebp /* ebp = next word "offset" within block */
andl $3,%edi /* edi = length of src mod 4 (plaintext for MAC) */
leal tmpBuf+_SO_(%esp),%esi
.set _bb_,0
.rept 12 /* 8 for padding, 4 for MAC size */
movl %edi,_bb_(%esi) /* fill tmpBuf with L(P) mod 4 */
.set _bb_,_bb_+4
.endr
leal 7*4(%ebp),%edi
andl $4*(UNROLL_CNT-1),%edi /* stop point is after 8 blocks (i+0..i+7) */
movl $_ret_MAC1,exitTab+_SO_(%esp,%edi)
subl %ebp,%esi /* set up source/dest pointers */
movl %esi,srcPtr+_SO_(%esp)
movl %esi,dstPtr+_SO_(%esp)
addl $8*4-1,%ebp /* FUNKY wrap logic requires -1 */
movl %ebp,loopByteCnt+_SO_(%esp)
incl %ebp /* undo adjustment */
andl $4*(UNROLL_CNT-1),%ebp
_pop esi
jmp *Encrypt_jmpTab(%ebp) /* go do the encryption */
/* just finished eight blocks of "padding" using L(P) mod 4 */
/* now generate the MAC */
_ret_MAC1:
movl loopByteCnt+_SO_(%esp),%ebp
incl %ebp /* undo the -1 above */
andl $4*(UNROLL_CNT-1),%ebp
leal 3*4(%ebp),%edi /* do four more (0..3 -- stop after #3) */
andl $4*(UNROLL_CNT-1),%edi
movl $_ret_MAC2,exitTab+_SO_(%esp,%edi)
leal 4*4-1(%ebp),%edi /* FUNKY wrap logic requires -1 */
movl %edi,loopByteCnt+_SO_(%esp)
jmp *Encrypt_jmpTab(%ebp)
/* */
/* here with the MAC computed. eax..esi now can be trashed */
_ret_MAC2:
leal callerParms+_SO_(%esp),%esi
movl ctxt_Ptr_LCL(%esi),%edi
movl macSize(%edi),%ecx /* ecx = # bits in MAC */
movl dstPtr0_LCL(%esi),%edi
leal tmpBuf+8*4+_SO_(%esp),%esi
testl $31,%ecx /* can we do it one word at a time? */
_if z
shrl $5,%ecx /* if so, it's faster */
rep movsl
_else
addl $7,%ecx /* round up to byte boundary */
shrl $3,%ecx /* non-word sizes get the slow treatment */
rep movsb
_endif
/* ################ */
/* tear down the stack and return */
addl $_Phelix_LocalSize,%esp
popal /* restore all of callers regs */
ret /* and return to caller */
/* */
/* ---------------------------------------------------------------- */
/* Common subroutine (for use in Phelix_Main) to init subkeys */
/* ---------------------------------------------------------------- */
/* In: ebp --> pCtxt (const) */
/* edx --> nonce (const) */
/* edi = X' value for I */
/* esi = value of I (0..3) */
/* Out: esi incremented. ebp, edx unmodified */
/* edi = oldZ[I] = 0 */
/* X_i_0, X_i_1 set on stack for both i=I and i=I+4 */
/* edi */
.set _SO_,12 /* two words on stack before call */
SetTwoKeys:
movl X_0+4*0(%ebp,%esi,4),%eax /* load two key values */
movl X_0+4*4(%ebp,%esi,4),%ebx
movl %eax,X_i_0+4*0+_SO_(%esp,%esi,4) /* store the X_i_0 values */
movl %ebx,X_i_0+4*4+_SO_(%esp,%esi,4)
movl (%edx,%esi,4),%ecx /* get ecx = N_i */
addl %edi,%eax /* add in 4*L(U), for esi == 1 */
addl %edi,%ebx
addl %ecx,%ebx /* add/sub the nonce value */
subl %ecx,%eax
addl %esi,%eax
xorl %edi,%edi /* set edi = 0 */
movl %ebx,X_i_1+4*0+_SO_(%esp,%esi,4) /* store the X_i_1 values */
movl %eax,X_i_1+4*4+_SO_(%esp,%esi,4)
movl %edi,oldZ+_SO_(%esp,%esi,4) /* zero out the oldZ values */
.set _NN_,0
.rept UNROLL_CNT/4 /* init the "block exit" jump table: all zeroes */
movl %edi,exitTab+_NN_+_SO_(%esp,%esi,4)
.set _NN_,_NN_ + 16
.endr
incl %esi /* bump the counter for next call */
ret
/* */
.set _SO_,0 /* back to no offset */
/* */
/* ---------------------------------------------------------------- */
/* Encryption routines */
/* ---------------------------------------------------------------- */
/* */
.align 4
C_global PhelixEncryptPacket,ECRYPT_AE_encrypt_packet
PhelixAlgo Encrypt /* instantiate the algorithm ocde */
/* */
/* the main block processing loop */
/* */
_rept
.irp _blkNum_,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
.if \_blkNum_ < UNROLL_CNT
strCat EncryptBlk_,\_blkNum_,":" /* make a label for re-entry points */
.set _bb_,\_blkNum_ & 7 /* support UNROLL_CNT > 8 */
_o_ "addl %edx,%eax","roll $ROT_3b,%edx","movl X_i_0+4*_bb_+_SO_(%esp),%ebp"
_o_ "addl %esi,%ebx","roll $ROT_4b,%esi"
_o_ "xorl %eax,%ecx","roll $ROT_0a,%eax","movl srcPtr+_SO_(%esp),%edi"
_o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl %edx,%ebp" /* does LEA opcode help here? */
_o_ "addl %ecx,%esi","roll $ROT_2a,%ecx"
_o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl 4*_bb_(%edi),%ebp" /* ebp = plaintext */
_o_ "xorl %esi,%ebx","roll $ROT_4a,%esi","movl oldZ+4*(_bb_&3)+_SO_(%esp),%edi"
_o_ "addl %eax,%ecx","roll $ROT_0b,%eax"
_o_ "addl %ebx,%edx","roll $ROT_1b,%ebx","xorl %edx,%ebp"
_o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx"
addl %esi,%edi /* now edi = keystream */
xorl %edx,%edi /* set up to compute edi = ciphertext below */
_o_ "addl %ebp,%eax","roll $ROT_3b,%edx","xorl %ebp,%edi" /* now edi = ciphertext */
_o_ "addl %esi,%ebx","roll $ROT_4b,%esi","movl X_i_1+4*_bb_+_SO_(%esp),%ebp"
_o_ "xorl %eax,%ecx","roll $ROT_0a,%eax"
_o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl _i_+_SO_(%esp),%ebp"
_o_ "addl %ecx,%esi","roll $ROT_2a,%ecx","leal _bb_(%ebp,%edx),%ebp"
_o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl dstPtr+_SO_(%esp),%ebp"
_o_ "xorl %esi,%ebx","roll $ROT_4a,%esi"
_o_ "addl %eax,%ecx","roll $ROT_0b,%eax","movl %edi,4*_bb_(%ebp)" /* save ciphertext */
_o_ "addl %ebx,%edx","roll $ROT_1b,%ebx","movl exitTab+4*\_blkNum_+_SO_(%esp),%edi"
_o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx"
PhelixEarlyExit edi,\_blkNum_ /* do we need to do an early exit? If so, do it */
.endif
.endr
PhelixEndLoop UNROLL_CNT /* set condition code for _until below */
_until b
jmp *exitTab+4*(UNROLL_CNT-1)+_SO_(%esp) /* "return" to do more */
/* */
/* ---------------------------------------------------------------- */
/* Decryption routine */
/* ---------------------------------------------------------------- */
/* */
.align 4
C_global PhelixDecryptPacket,ECRYPT_AE_decrypt_packet
PhelixAlgo Decrypt /* instantiate the algorithm ocde */
/* */
/* the main block processing loop */
/* */
_rept
.irp _blkNum_,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
.if \_blkNum_ < UNROLL_CNT
strCat DecryptBlk_,\_blkNum_,":" /* make a label for re-entry points */
.set _bb_,\_blkNum_ & 7 /* support UNROLL_CNT > 8 (but not really!) */
_o_ "addl %edx,%eax","roll $ROT_3b,%edx","movl X_i_0+4*_bb_+_SO_(%esp),%ebp"
_o_ "addl %esi,%ebx","roll $ROT_4b,%esi"
_o_ "xorl %eax,%ecx","roll $ROT_0a,%eax","movl srcPtr+_SO_(%esp),%edi"
_o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl %edx,%ebp"
_o_ "addl %ecx,%esi","roll $ROT_2a,%ecx"
_o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl 4*_bb_(%edi),%ebp" /* ebp = ciphertext */
_o_ "xorl %esi,%ebx","roll $ROT_4a,%esi","movl oldZ+4*(_bb_&3)+_SO_(%esp),%edi"
_o_ "addl %eax,%ecx","roll $ROT_0b,%eax"
_o_ "addl %ebx,%edx","roll $ROT_1b,%ebx"
_o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx"
addl %esi,%edi /* set edi = keystream */
xorl %ebp,%edi /* now edi = plaintext */
movl %edx,%ebp
xorl %edi,%ebp /* now ebp = plaintext ^ edx */
_o_ "addl %ebp,%eax","roll $ROT_3b,%edx"
_o_ "addl %esi,%ebx","roll $ROT_4b,%esi","movl X_i_1+4*_bb_+_SO_(%esp),%ebp"
_o_ "xorl %eax,%ecx","roll $ROT_0a,%eax"
_o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl _i_+_SO_(%esp),%ebp"
_o_ "addl %ecx,%esi","roll $ROT_2a,%ecx","leal _bb_(%ebp,%edx),%ebp"
_o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl dstPtr+_SO_(%esp),%ebp"
_o_ "xorl %esi,%ebx","roll $ROT_4a,%esi"
_o_ "addl %eax,%ecx","roll $ROT_0b,%eax","movl %edi,4*_bb_(%ebp)" /* save plaintext computed above */
_o_ "addl %ebx,%edx","roll $ROT_1b,%ebx","movl exitTab+4*\_blkNum_+_SO_(%esp),%edi"
_o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx"
PhelixEarlyExit edi,\_blkNum_ /* do we need to do an early exit? If so, do it */
.endif
.endr
PhelixEndLoop UNROLL_CNT /* set condition code for _until below */
_until b
jmp *exitTab+4*(UNROLL_CNT-1)+_SO_(%esp) /* "return" to do more */
/* */
_PhelixCodeEnd_:
.ifdef PHELIX_INCREMENTAL_API
/* */
/* ---------------------------------------------------------------- */
/* "Incremental" function: SetupNonce */
/* ---------------------------------------------------------------- */
/* use same stack as EncryptPacket! */
/* */
C_global PhelixSetupNonce,ECRYPT_AE_ivsetup
pushal
lea callerParms-_Phelix_LocalSize(%esp),%esi
subl $_Phelix_LocalSize,%esp
_SO_ = 0
call InitNonce
movl $_ret_SetupNonceDone,exitTab+4*(ZERO_INIT_CNT-1)+_SO_(%esp)
jmp EncryptBlk_0
_ret_SetupNonceDone:
.if UNROLL_CNT > ZERO_INIT_CNT /* do we need to clear out the return point? */
.err "Replicate code here from _ret_InitZeroDone"
.endif
movl ctxt_Ptr+_SO_(%esp),%ebp /* save our context */
/* */
movl $MAGIC_AAD_XOR,%edi
xorl %edi,%ebx
movl %edi,aadXor(%ebp)
/* */
movl %eax,4*0+_Z_(%ebp)
movl %ebx,4*1+_Z_(%ebp)
movl %ecx,4*2+_Z_(%ebp)
movl %edx,4*3+_Z_(%ebp)
movl %esi,4*4+_Z_(%ebp)
.irp _nn_,0,1,2,3
movl X_i_1+8*\_nn_ +_SO_(%esp),%eax
movl X_i_1+8*\_nn_+4+_SO_(%esp),%ebx
movl oldZ +4*\_nn_ +_SO_(%esp),%ecx
movl %eax,X_1+ 8*\_nn_(%ebp)
movl %ebx,X_1+4+8*\_nn_(%ebp)
movl %ecx,old_Z+4*\_nn_(%ebp)
.endr
/* */
xorl %edi,%edi
movl %edi,msgLen (%ebp)
movl %edi,aadLen (%ebp)
movl %edi,aadLen+4(%ebp)
movl _i_+_SO_(%esp),%edi
movl %edi,blkNum(%ebp)
/* */
addl $_Phelix_LocalSize,%esp
popal
ret
/* */
/* ---------------------------------------------------------------- */
/* "Incremental" function: EncryptBytes/DecryptBytes */
/* ---------------------------------------------------------------- */
/* use same locals stack as EncryptPacket */
/* */
_pOfs_ = _cpOfs_
/* */
_newParm 1,ctxt_Ptr
_newParm 1,src_Ptr
_newParm 1,dst_Ptr
_newParm 1,bCnt
/* */
C_global PhelixEncryptBytes,ECRYPT_AE_encrypt_bytes
pushal
leal Encrypt_jmpTab,%ebp
PhelixBytes:
leal callerParms-_Phelix_LocalSize(%esp),%esi
subl $_Phelix_LocalSize,%esp
.set _SO_,0
movl %ebp,jmpTabPtr+_SO_(%esp)
/* copy context to local on stack */
movl ctxt_Ptr_LCL(%esi),%ebp
_push esi
leal X_0(%ebp),%esi
leal X_i_0+_SO_(%esp),%edi
movl $8+8+4,%ecx /* X_0, X_1, and oldZ */
cld
rep movsl /* copy the context */
xorl %eax,%eax
movl $UNROLL_CNT,%ecx /* zero out exitTab */
rep stosl
_pop esi
leal _ret_PhelixBytes,%ebp
movl %ebp,retAddr_LCL(%esi) /* set up return address */
movl src_Ptr_LCL(%esi),%ebp /* copy srcPtr and dstPtr */
movl %ebp,srcPtr+_SO_(%esp)
movl dst_Ptr_LCL(%esi),%ebp
movl %ebp,dstPtr+_SO_(%esp)
movl ctxt_Ptr_LCL(%esi),%ebp
movl blkNum(%ebp),%edi /* convert blkNum from pCtxt to locals */
andl $~(UNROLL_CNT-1),%edi
movl %edi,_i_+_SO_(%esp)
movl blkNum(%ebp),%edi
shll $2,%edi /* convert blkNum to a word count */
movl %edi,loopByteCnt+_SO_(%esp) /* and save it as the "phase" */
movl _Z_+4*0(%ebp),%eax /* load the Z values */
movl _Z_+4*1(%ebp),%ebx
movl _Z_+4*2(%ebp),%ecx
movl _Z_+4*3(%ebp),%edx
movl _Z_+4*4(%ebp),%esi
xorl aadXor(%ebp),%ebx
movl $0,aadXor(%ebp)
_push esi
leal callerParms+_SO_(%esp),%esi
movl src_Ptr_LCL(%esi),%ebp
movl %ebp,srcPtr+_SO_(%esp)
movl bCnt_LCL(%esi),%ebp
movl dst_Ptr_LCL(%esi),%edi
jmp processUserData
_pop esi
_ret_PhelixBytes:
/* copy modified value back to context */
movl ctxt_Ptr+_SO_(%esp),%ebp
movl %eax,_Z_+4*0(%ebp) /* store the values Z0..Z4 */
movl %ebx,_Z_+4*1(%ebp)
movl %ecx,_Z_+4*2(%ebp)
movl %edx,_Z_+4*3(%ebp)
movl %esi,_Z_+4*4(%ebp)
movl msgLen0+_SO_(%esp),%edi /* update pCtxt.blkNum */
movl %edi,%esi
addl $3,%edi
shrl $2,%edi
addl %edi,blkNum(%ebp)
addl %esi,msgLen(%ebp) /* track low 2 bits of msgLen */
leal old_Z(%ebp),%edi
leal oldZ+_SO_(%esp),%esi
movl $4,%ecx /* copy back the updated oldZ values */
rep movsl
addl $_Phelix_LocalSize,%esp
popal
ret
/* */
/* handle decryption here */
C_global PhelixDecryptBytes,ECRYPT_AE_decrypt_bytes
pushal
leal Decrypt_jmpTab,%ebp
jmp PhelixBytes
/* */
/* ---------------------------------------------------------------- */
/* "Incremental" function: Finalize (MAC) */
/* ---------------------------------------------------------------- */
/* use same locals stack as EncryptPacket */
/* */
_pOfs_ = _cpOfs_
_newParm 1,ctxt_Ptr
_newParm 1,mac_Ptr
/* */
C_global PhelixFinalize,ECRYPT_AE_finalize
pushal
leal callerParms-_Phelix_LocalSize(%esp),%esi
subl $_Phelix_LocalSize,%esp
.set _SO_,0
leal Encrypt_jmpTab,%ebp
movl %ebp,jmpTabPtr+_SO_(%esp)
/* copy context to local on stack */
movl ctxt_Ptr_LCL(%esi),%ebp
_push esi
leal X_0(%ebp),%esi
leal X_i_0+_SO_(%esp),%edi
movl $8+8+4,%ecx /* X_0, X_1, and oldZ */
cld
rep movsl /* copy the context */
xorl %eax,%eax
movl $UNROLL_CNT,%ecx /* zero out exitTab */
rep stosl
_pop esi
movl ctxt_Ptr_LCL(%esi),%ebp
movl blkNum(%ebp),%edi /* convert blkNum from pCtxt to locals */
andl $~(UNROLL_CNT-1),%edi
movl %edi,_i_+_SO_(%esp)
movl msgLen(%ebp),%eax
subl $4,%eax
negl %eax
andl $3,%eax /* track the low 2 bits of msgLen */
movl blkNum(%ebp),%edi
shll $2,%edi /* convert blkNum to a word count */
subl %eax,%edi
movl %edi,loopByteCnt+_SO_(%esp) /* and save it as the "phase" */
movl _Z_+4*0(%ebp),%eax /* load the Z values */
movl _Z_+4*1(%ebp),%ebx
movl _Z_+4*2(%ebp),%ecx
movl _Z_+4*3(%ebp),%edx
movl _Z_+4*4(%ebp),%esi
xorl aadXor (%ebp),%ebx
xorl aadLen (%ebp),%esi
xorl aadLen+4(%ebp),%ecx
movl mac_Ptr+_SO_(%esp),%ebp
jmp processMAC
/* */
/* */
/* ---------------------------------------------------------------- */
/* "Incremental" function: ProcessAAD */
/* ---------------------------------------------------------------- */
.set _Phelix_LocalSize,0
_newLocal 1,aad_I /* different local stack from from above! */
_newLocal 1,aad_bb
_newLocal 1,aad_tmp
/* */
_cpOfs_ = 4+8*4+_Phelix_LocalSize /* caller parms offset from esp */
_pOfs_ = _cpOfs_
/* */
_newParm 1,ctxt_Ptr
_newParm 1,aad_Ptr
_newParm 1,aad_Len
/* */
C_global PhelixProcessAAD,ECRYPT_AE_authenticate_bytes
pushal
subl $_Phelix_LocalSize,%esp
.set _SO_,0
movl ctxt_Ptr+_SO_(%esp),%ebp /* point to context */
movl aad_Len+_SO_(%esp),%edi
addl %edi,aadLen (%ebp) /* update accumulated length */
adcl $0 ,aadLen+4(%ebp)
movl blkNum(%ebp),%edi
movl %edi,aad_I+_SO_(%esp)
movl _Z_+4*0(%ebp),%eax /* load the Z values */
movl _Z_+4*1(%ebp),%ebx
movl _Z_+4*2(%ebp),%ecx
movl _Z_+4*3(%ebp),%edx
movl _Z_+4*4(%ebp),%esi
subl $4,aad_Len+_SO_(%esp) /* are we done yet? */
_rept ae
aad_Again:movl aad_I+_SO_(%esp),%edi
andl $7,%edi
movl ctxt_Ptr+_SO_(%esp),%ebp
_o_ "addl %edx,%eax","roll $ROT_3b,%edx","movl X_0(%ebp,%edi,4),%ebp"
_o_ "addl %esi,%ebx","roll $ROT_4b,%esi","movl %edi,aad_bb+_SO_(%esp)"
_o_ "xorl %eax,%ecx","roll $ROT_0a,%eax","movl aad_Ptr+_SO_(%esp),%edi"
_o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl %edx,%ebp"
_o_ "addl %ecx,%esi","roll $ROT_2a,%ecx"
_o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl (%edi),%ebp" /* ebp = AAD plaintext */
_o_ "xorl %esi,%ebx","roll $ROT_4a,%esi","addl $4,%edi"
_o_ "addl %eax,%ecx","roll $ROT_0b,%eax","movl %edi,aad_Ptr+_SO_(%esp)"
_o_ "addl %ebx,%edx","roll $ROT_1b,%ebx","xorl %edx,%ebp"
_o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx","movl aad_bb+_SO_(%esp),%edi"
_o_ "addl %ebp,%eax","roll $ROT_3b,%edx","movl ctxt_Ptr+_SO_(%esp),%ebp"
_o_ "addl %esi,%ebx","roll $ROT_4b,%esi","movl X_1(%ebp,%edi,4),%ebp"
_o_ "xorl %eax,%ecx","roll $ROT_0a,%eax"
_o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl aad_I+_SO_(%esp),%ebp"
_o_ "addl %ecx,%esi","roll $ROT_2a,%ecx","addl %edx,%ebp"
_o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl ctxt_Ptr+_SO_(%esp),%ebp"
_o_ "xorl %esi,%ebx","roll $ROT_4a,%esi","andl $3,%edi"
_o_ "addl %eax,%ecx","roll $ROT_0b,%eax","incl aad_I+_SO_(%esp)"
_o_ "addl %ebx,%edx","roll $ROT_1b,%ebx"
_o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx","movl %esi,old_Z(%ebp,%edi,4)"
subl $4,aad_Len+_SO_(%esp) /* are we done yet? */
_until b
/* note ebp == ctxt_Ptr here */
movl aad_Len+_SO_(%esp),%edi /* at this point, -4 <= aad_Len < 0 */
andl $3,%edi /* any odd bytes left? */
_if z /* if not, we're done */
movl aad_I+_SO_(%esp),%edi /* copy back the updated blkNum */
movl %edi,blkNum(%ebp)
movl %eax,_Z_+4*0(%ebp) /* save the Z values */
movl %ebx,_Z_+4*1(%ebp)
movl %ecx,_Z_+4*2(%ebp)
movl %edx,_Z_+4*3(%ebp)
movl %esi,_Z_+4*4(%ebp)
/* clean up the stack and return */
addl $_Phelix_LocalSize,%esp
popal
ret
_endif
/* here to handle odd AAD bytes */
movl aad_Ptr+_SO_(%esp),%ebp /* get the final partial word */
movl (%ebp),%ebp
andl MASK_TAB(,%edi,4),%ebp /* mask off unused bits */
leal aad_tmp+_SO_(%esp),%edi
movl %edi,aad_Ptr+_SO_(%esp) /* point aad_Ptr to aad_Tmp */
movl %ebp,(%edi) /* store zero-padded word there */
xorl %ebp,%ebp /* fix up the count to not come here again */
movl %ebp,aad_Len+_SO_(%esp)
jmp aad_Again
/* */
/* ---------------------------------------------------------------- */
/* "Incremental" function: SetupKey */
/* ---------------------------------------------------------------- */
/* */
.set _Phelix_LocalSize,0
_newLocal 1,sk_esi
_newLocal 1,sk_Cnt
/* */
_cpOfs_ = 4+8*4+_Phelix_LocalSize /* caller parms offset from esp */
_pOfs_ = _cpOfs_
/* */
_newParm 1,ctxt_Ptr
_newParm 1,key_Ptr
_newParm 1,key_Size
_newParm 1,iv_Size
_newParm 1,mac_Size
/* */
/* assert(PHELIX_NONCE_SIZE==ivSize)# Phelix only supports "full" nonces */
/* assert( 0 == (keySize%8))# Phelix only supports byte-sized keys */
/* assert(256 >= keySize)# Phelix only supports keys <= 256 bits*/
/* */
C_global PhelixSetupKey,ECRYPT_AE_keysetup
pushal
subl $_Phelix_LocalSize,%esp
.set _SO_,0
movl ctxt_Ptr+_SO_(%esp),%ebp /* point to the context to be built */
movl key_Size+_SO_(%esp),%eax /* copy keySize */
movl %eax,keySize(%ebp)
movl mac_Size+_SO_(%esp),%ebx /* and macSize */
movl %ebx,macSize(%ebp)
andl $127,%ebx /* and compute X1_Bump */
shll $8 ,%ebx
shrl $1 ,%eax /* eax = keySize/2 (in bits) */
addl %eax,%ebx
movl %ebx,X_1_Bump(%ebp) /* then store it */
shrl $2 ,%eax /* eax = keySize/8 (# bytes of key) */
/* now copy in the key bits */
movl key_Ptr+_SO_(%esp),%edi
xorl %ebx,%ebx /* ebx = counter */
_rept
cmpl %eax,%ebx /* is this full word part of the key? */
_brk ae /* if not, go handle partial word (if any) */
movl (%edi,%ebx),%ecx /* else get next full word of key */
movl %ecx,X_0(%ebp,%ebx) /* and copy it to context */
addl $4,%ebx /* bump counter */
_endr /* go back for more */
testl $3,%eax /* if any partial words, handle that here */
_if nz
movl %eax,%esi
andl $3,%esi /* esi = (keySize/8) mod 4 */
movl MASK_TAB(,%esi,4),%ecx /* mask off "unused" bits */
andl %ecx,X_0-4(%ebp,%ebx)
_endif
xorl %ecx,%ecx /* zero out the rest of the context key */
_rept
cmpl $8*4,%ebx /* are we done yet? */
_brk ae
movl %ecx,X_0(%ebp,%ebx) /* zero context key */
addl $4,%ebx
_endr
/* now run the Feistel network for initial key mixing */
addl $64,%eax
movl %eax,sk_esi+_SO_(%esp) /* precompute L(U)+64 "constant" for mixing */
movl $128,sk_Cnt+_SO_(%esp) /* use this as a counter */
_rept
movl sk_Cnt+_SO_(%esp),%edi
andl $16,%edi /* isolate one bit */
movl X_0+4*0(%ebp,%edi),%eax
movl X_0+4*1(%ebp,%edi),%ebx
movl X_0+4*2(%ebp,%edi),%ecx
movl X_0+4*3(%ebp,%edi),%edx
movl sk_esi+ _SO_(%esp),%esi
.rept 2 /* unroll just a bit */
_o_ "addl %edx,%eax","roll $ROT_3b,%edx"
_o_ "addl %esi,%ebx","roll $ROT_4b,%esi"
_o_ "xorl %eax,%ecx","roll $ROT_0a,%eax"
_o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx"
_o_ "addl %ecx,%esi","roll $ROT_2a,%ecx"
_o_ "xorl %edx,%eax","roll $ROT_3a,%edx"
_o_ "xorl %esi,%ebx","roll $ROT_4a,%esi"
_o_ "addl %eax,%ecx","roll $ROT_0b,%eax"
_o_ "addl %ebx,%edx","roll $ROT_1b,%ebx"
_o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx"
.endr
xorl $16,%edi /* go to other half */
xorl %eax,X_0+4*0(%ebp,%edi) /* perform the Feistel xor */
xorl %ebx,X_0+4*1(%ebp,%edi)
xorl %ecx,X_0+4*2(%ebp,%edi)
xorl %edx,X_0+4*3(%ebp,%edi)
subl $16,sk_Cnt+_SO_(%esp)
_until be
/* clean up the stack and return */
addl $_Phelix_LocalSize,%esp
popal
ret
/* */
/* ---------------------------------------------------------------- */
/* */
C_global PhelixIncremental_CodeSize
mov $(.- _PhelixCodeStart_),%eax
ret
/* */
.endif /* _INCREMENTAL_API */
/* */
/* ---------------------------------------------------------------- */
/* use this NOP routine to calibrate/check our timing tests */
/* ---------------------------------------------------------------- */
/* */
C_global PhelixNop
pushal
popal
ret
/* */
/* ---------------------------------------------------------------- */
/* size statistics at compile time */
/* ---------------------------------------------------------------- */
/* */
C_global PhelixProcessPacket_CodeSize,ECRYPT_AE_process_packet_CodeSize
movl $(_PhelixCodeEnd_-_PhelixCodeStart_),%eax
ret
/* */
.end
|
eSTREAM Project Powered by ViewCVS 1.0-dev |
ViewCVS and CVS Help |