/* * ---------------------------------------------------------------- * Phelix encryption/authentication algorithm * Author: Doug Whiting, Hifn. 2005. * * This source code is released to the public domain * ---------------------------------------------------------------- */ #ifndef __i386__ #error architecture is not supported #endif .file "phelix.S" .text .align 4 #include "strucmac.S" /* structured programming macros */ .set PHELIX_INCREMENTAL_API,1 /* comment this out to exclude incremental calls */ /* concatenate text together (useful in building names inside macros) */ .macro strCat aa,bb,cc,dd,ee,ff,gg,hh \aa\bb\cc\dd\ee\ff\gg\hh .endm /* ---------------------------------------------------------------- */ /* define a global label. Handle linking with and without underscore */ .macro C_global phelixName,ecryptName #ifdef MIX_ASM strCat ".global ",\phelixName,"_ASM" strCat ".global _",\phelixName,"_ASM" strCat " ",\phelixName,"_ASM:" strCat "_",\phelixName,"_ASM:" #else .global \phelixName .global _\phelixName #endif \phelixName: _\phelixName: #ifdef ECRYPT_API .ifnc \ecryptName, .global \ecryptName .global _\ecryptName \ecryptName: _\ecryptName: .endif #endif .endm /* ---------------------------------------------------------------- */ C_global _debugPhelix_ .long 0 /* ignored here, but must be defined for testPhelix.c */ AsmName: .ascii "gnu.as\0" .align 4 C_global PhelixCompiler_Name /* show who assembled us */ lea AsmName,%eax C_Global PhelixInit,ECRYPT_init /* Init call does nothing */ ret /* */ /* ---------------------------------------------------------------- */ /* Macros and definitions */ /* ---------------------------------------------------------------- */ /* */ /* Phelix rotation constants */ .set ROT_0a, 9 .set ROT_1a, 10 .set ROT_2a, 17 .set ROT_3a, 30 .set ROT_4a, 13 .set ROT_0b, 20 .set ROT_1b, 11 .set ROT_2b, 5 .set ROT_3b, 15 .set ROT_4b, 25 .set UNROLL_CNT, 8 /* how many blocks to unroll in inner loop */ .set ZERO_INIT_CNT, 8 /* number of words of init */ .set MAGIC_MAC_XOR, 0x912d94f1 /* special constants */ .set MAGIC_AAD_XOR, 0xaadaadaa /* */ /* ----- register assignments */ /* Z0 equ eax */ /* Z1 equ ebx */ /* Z2 equ ecx */ /* Z3 equ edx */ /* Z4 equ esi */ /* t0 equ ebp #"temp" scratch registers */ /* t1 equ edi */ /* oldZreg equ Z4 */ /* */ /* ---------------------------------------------------------------- */ /* */ /* Allocate and define local variables on the stack */ /* [Note: We use esp for locals, not ebp, since we need ebp as a variable. */ /* Thus, we can't use the assembler stack frame primitives.] */ /* */ .set _maxLocalSize_ ,0 /* max locals usage in bytes */ .set _Phelix_LocalSize ,0 /* starting value: no locals allocated yet */ .set _SO_ ,0 /* current stack offset due to calls */ /* */ .macro _newLocal wCnt,lName /* macro to define a local variable */ .set \lName ,_Phelix_LocalSize .set _Phelix_LocalSize,_Phelix_LocalSize+4*(\wCnt) /* keep running tabs on stack usage for locals */ .if _maxLocalSize_<_Phelix_LocalSize .set _maxLocalSize_,_Phelix_LocalSize .endif .endm /* */ .macro _newParm wCnt,_pp_ .set \_pp_, _pOfs_ strCat ".set ",\_pp_,_LCL,",",(_pOfs_-_cpOfs_) .set _pOfs_,_pOfs_+4*(\wCnt) .endm /* */ /* now define local variables for the Encrypt/Decrypt functions */ _newLocal 1,srcPtr /* pointer to input data buffer */ _newLocal 1,dstPtr /* pointer to output data buffer */ _newLocal 1,loopByteCnt /* inner loop byte counter */ _newLocal 1,jmpTabPtr /* pointer to encrypt/decrypt jump table */ _newLocal 8,X_i_0 /* local copy of the key values */ _newLocal 8,X_i_1 _newLocal 4,oldZ /* "old" Z values */ _newLocal 1,_i_ /* block number (+8) */ _newLocal UNROLL_CNT ,exitTab/* local jump table for exiting unrolled loop */ _newLocal UNROLL_CNT+4,tmpBuf /* local buffer encryption/decryption blocks */ _newLocal 1,aadLeft /* # bytes of aad remaining */ _newLocal 1,msgLen0 /* initial value of src_ByteCnt */ _newLocal 1,dstPtr0 /* initial dst pointer */ _newLocal 1,retAddr /* local "return" address */ .set _cpOfs_,4+8*4+_Phelix_LocalSize /* caller parms offset from esp */ .set retAddr_LCL,retAddr-_cpOfs_ .set dstPtr0_LCL,dstPtr0-_cpOfs_ .set msgLen0_LCL,msgLen0-_cpOfs_ .set tmpBuf_LCL, tmpBuf-_cpOfs_ /* */ /* ---------------------------------------------------------------- */ /* Define caller's parameters on the stack, relative to esp */ /* */ .set _pOfs_,_cpOfs_ _newParm 0,callerParms /* placeholder, no space allocated */ _newParm 1,ctxt_Ptr _newParm 1,nonce_Ptr _newParm 1,aad_Ptr _newParm 1,aad_Len _newParm 1,src_Ptr _newParm 1,dst_Ptr _newParm 1,src_ByteCnt _newParm 1,mac_Ptr /* */ /* ---------------------------------------------------------------- */ /* Phelix context structure definition */ .set _pOfs_,0 _newParm 1,keySize /* size of raw key in bits */ _newParm 1,macSize /* size of mac tag in bits */ _newParm 1,X_1_Bump /* 4*(keySize/8) + 256*(macSize mod 128) */ _newParm 8,X_0 /* subkeys */ _newParm 8,X_1 /* subkeys */ /* internal cipher state */ _newParm 4,old_Z /* previous Z[4] values for output */ _newParm 5,_Z_ /* 5 internal state words */ _newParm 1,blkNum /* block number (i) */ _newParm 2,aadLen /* 64-bit aadLen counter (LSW first) */ _newParm 1,msgLen /* 32-bit msgLen counter (mod 2**32) */ _newParm 1,aadXor /* aad Xor constant */ /* */ /* ---------------------------------------------------------------- */ /* */ .macro _o_ op1,op2,op3,cond3 /* shorthand: instantiate 1-3 opcodes */ \op1 \op2 \op3 \cond3 .endm /* ---------------------------------------------------------------- */ /* adjust _SO_ with push/pop operations */ .macro _stackOp op,reg,bump .ifnc \reg, /* only do something if reg is not blank */ \op %\reg .set _SO_,_SO_+\bump .endif .endm .macro _push r0,r1,r2,r3,r4,r5,r6 _stackOp push,\r0,4 _stackOp push,\r1,4 _stackOp push,\r2,4 _stackOp push,\r3,4 _stackOp push,\r4,4 _stackOp push,\r5,4 _stackOp push,\r6,4 .endm /* */ .macro _pop r0,r1,r2,r3,r4,r5,r6 _stackOp pop,\r0,-4 _stackOp pop,\r1,-4 _stackOp pop,\r2,-4 _stackOp pop,\r3,-4 _stackOp pop,\r4,-4 _stackOp pop,\r5,-4 _stackOp pop,\r6,-4 .endm /* */ /* ---------------------------------------------------------------- */ /* Init code, jump tables (for lblName = Encrypt/Decrypt) */ /* ---------------------------------------------------------------- */ /* */ .macro PhelixAlgo lblName /* first, set up the stack frame */ pushal /* save all regs on stack */ strCat "lea ",\lblName,"_jmpTab,%ebp" /* handle the encrypt/decrypt difference */ jmp Phelix_Main /* go run the algorithm */ /* */ /* the jump table for this operation */ /* */ .align 4 strCat \lblName,"_jmpTab:" /* first, a list of "block boundary" targets within unrolled processing loop */ .irp xxx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 .if \xxx < UNROLL_CNT strCat " .long \lblName","Blk_",\xxx strCat " .global \lblName","Blk_",\xxx .endif .endr /* next, successive "control" targets within Phelix_Main */ strCat ".set OddBytes_OFFS,","(.-\lblName","_jmpTab)" strCat ".long \lblName","_OddBytes" .endm /* PhelixAlgo */ /* */ /* ---------------------------------------------------------------- */ /* Common unrolled loop end code for encrypt/decrypt */ /* ---------------------------------------------------------------- */ /* */ .macro PhelixEndLoop CNT addl $(\CNT)*4,srcPtr(%esp) /* bump the pointers */ addl $(\CNT)*4,dstPtr(%esp) addl $(\CNT) ,_i_ (%esp) /* bump the count */ subl $(\CNT)*4,loopByteCnt(%esp) /* are we done yet? */ .endm /* leave here with flags set for loop jmp */ /* */ /* ---------------------------------------------------------------- */ /* Common "early exit" code for encrypt/decrypt inner loop */ /* ---------------------------------------------------------------- */ /* This functionality is required for splicing AAD/text/padding */ /* */ .macro PhelixEarlyExit jTabReg,_bn_ .if \_bn_ < (UNROLL_CNT-1) /* don't need early exit at bottom of loop */ testl %\jTabReg,%\jTabReg /* time to exit? */ _if nz movl %esi,oldZ+4*((\_bn_) & 3)+_SO_(%esp) jmp *%\jTabReg /* go to "exit" address */ _endif .endif movl %esi,oldZ+4*((\_bn_)& 3)+_SO_(%esp) .endm /* */ /* **************************************************************** */ /* start of actual code (i.e., end of macro definitions) */ /* **************************************************************** */ /* */ .align 4 INIT_ZEROES: .rept ZERO_INIT_CNT .long 0 .endr MASK_TAB: .long 0,0xff,0xffff,0xffffff _PhelixCodeStart_: /* */ /* ---------------------------------------------------------------- */ /* Common control path for Encrypt/Decrypt */ /* ---------------------------------------------------------------- */ /* In: ebp --> (const) jump table (Encrypt_jmpTab or Decrypt_jmpTab) */ /* Out: everything done */ /* */ Phelix_Main: /* point to callers first parameter (save code size below) */ leal callerParms-_Phelix_LocalSize(%esp),%esi subl $_Phelix_LocalSize,%esp /* make room for locals on stack */ movl %ebp,jmpTabPtr(%esp) /* save jump table pointer */ call InitNonce /* */ /* ################################################################ */ /* Finally ready to start running Phelix on some data */ /* ################################################################ */ /* First, process the initialization zeroes (loopByteCnt == 0 from PhelixInit) */ /* */ movl $_ret_InitZeroDone,exitTab+4*(ZERO_INIT_CNT-1)+_SO_(%esp) jmp EncryptBlk_0 /* */ /* "local" function */ .set _SO_,4 InitNonce: /* first, init the local keys on the stack */ movl ctxt_Ptr_LCL(%esi),%ebp /* point to context structure */ movl X_1_Bump(%ebp),%edi /* edi=4*(keySize/8)+256*(macSize mod 128) */ movl nonce_Ptr_LCL(%esi),%edx /* (const) pointer to nonce words */ _push esi /* save esi (push/pop = smaller than lea esi,callerParms) */ xor %esi,%esi /* use esi as the variable i in SetTwoKeys */ inc %esi /* start with i = 1, since edi = X'_1 = 4*L(U) already */ call SetTwoKeys /* set X_1_n, X_5_n, for n=0,1 [return w/edi == 0] */ call SetTwoKeys /* set X_2_n, X_6_n, for n=0,1 */ call SetTwoKeys /* set X_3_n, X_7_n, for n=0,1 */ xor %esi,%esi /* wrap to i = 0 */ call SetTwoKeys /* set X_0_n, X_4_n, for n=0,1 */ _pop esi /* restore pointer to callerParms */ /* set up for initialization phase */ xorl %ecx,%ecx leal INIT_ZEROES,%ebp /* use all zero input words, for i= -8 .. -1 */ leal tmpBuf+_SO_(%esp),%edi /* discard output */ movl %ecx,loopByteCnt+_SO_(%esp) /* initialize loop byte count counter = 0 */ movl %ecx,_i_+_SO_(%esp) /* initialize i = 0 (block number + 8) */ movl %ebp,srcPtr+_SO_(%esp) movl %edi,dstPtr+_SO_(%esp) /* now initialize the Zn register values */ movl ctxt_Ptr_LCL(%esi),%ebp movl nonce_Ptr_LCL(%esi),%edi movl X_0+12(%ebp),%eax /* get the X_0 key values */ movl X_0+16(%ebp),%ebx movl X_0+20(%ebp),%ecx movl X_0+24(%ebp),%edx movl X_0+28(%ebp),%esi xorl (%edi),%eax /* merge in the nonce */ xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx ret .set _SO_,0 /* ########################################### */ /* done with the initial zeroes. */ _ret_InitZeroDone: .if UNROLL_CNT > ZERO_INIT_CNT /* do we need to clear out the return point? */ xorl %ebp,%ebp /* (only if it's not already at the end) */ movl %ebp,exitTab+4*(ZERO_INIT_CNT-1)+_SO_(%esp) .endif /* ################ */ /* handle AAD here, looping if needed */ xorl $MAGIC_AAD_XOR,%ebx movl aad_Len+_SO_(%esp),%ebp testl %ebp,%ebp _if nz /* if nothing there, skip all aad processing */ movl aad_Ptr+_SO_(%esp),%edi movl %ebp,aadLeft+_SO_(%esp) movl %edi, srcPtr+_SO_(%esp) /* src will come from aad_Ptr */ _aad_Loop: /* here with ebp == aad_Len */ leal tmpBuf+_SO_(%esp),%edi /* always use tmpBuf for aad dst (discard) */ movl %edi,dstPtr+_SO_(%esp) movl aadLeft+_SO_(%esp),%ebp subl $4*UNROLL_CNT,%ebp /* only do one unrolled loop each time */ _if ae /* (since we use tmpBuf to discard ciphertext) */ movl %ebp,aadLeft+_SO_(%esp) xorl %edi,%edi movl %edi,loopByteCnt+_SO_(%esp) movl $_aad_Loop,exitTab+4*(UNROLL_CNT-1)+_SO_(%esp) jmp EncryptBlk_0 _endif /* here to handle final partial loop */ _aad_PartialLoop: andl $4*(UNROLL_CNT-1),%ebp movl %ebp,loopByteCnt+_SO_(%esp) cmpl $4,%ebp _if ae movl $_ret_aad_1,exitTab-4+_SO_(%esp,%ebp) jmp EncryptBlk_0 _ret_aad_1: movl loopByteCnt+_SO_(%esp),%ebp xorl %edi,%edi movl %edi,exitTab-4+_SO_(%esp,%ebp) /* clear the entry */ _endif /* here to handle final partial word of AAD */ movl aadLeft+_SO_(%esp),%ebp movl %ebp,%edi andl $3,%edi /* any odd bytes? */ _ifbrk z /* if not, we're done with AAD */ addl $4,%ebp andl $4*(UNROLL_CNT-1),%ebp movl %ebp,loopByteCnt+_SO_(%esp) _push esi subl $4,%ebp andl $4*(UNROLL_CNT-1),%ebp movl srcPtr+_SO_(%esp),%esi movl (%esi,%ebp),%esi /* get the last AAD word */ andl MASK_TAB(,%edi,4),%esi /* clear out extra bits */ leal tmpBuf+_SO_(%esp),%edi movl %esi,(%edi) subl %ebp,%edi movl %edi,dstPtr+_SO_(%esp) movl %edi,srcPtr+_SO_(%esp) movl $_ret_aad_2,exitTab+_SO_(%esp,%ebp) movl %ebp,tmpBuf+4+_SO_(%esp)/* save this */ _pop esi jmp *Encrypt_jmpTab(%ebp) _ret_aad_2: movl tmpBuf+4+_SO_(%esp),%ebp xorl %edi,%edi movl %edi,exitTab+_SO_(%esp,%ebp) _endif xorl $MAGIC_AAD_XOR,%ebx /* ################ */ /* process the user data */ _startUserData: _push esi /* use esi as temp pointer */ leal callerParms+_SO_(%esp),%esi /* (to save code size in accessing caller parms below) */ leal _ret_MAC0,%ebp movl %ebp,retAddr_LCL(%esi) movl src_Ptr_LCL(%esi),%ebp movl %ebp,srcPtr+_SO_(%esp) movl dst_Ptr_LCL(%esi),%edi movl src_ByteCnt_LCL(%esi),%ebp /* enter here from EncryptBytes */ processUserData: movl %edi,dstPtr+_SO_(%esp) movl %edi,dstPtr0_LCL(%esi) movl %ebp,msgLen0_LCL(%esi) _pop esi /* restore esi */ movl loopByteCnt+_SO_(%esp),%edi andl $4*(UNROLL_CNT-1),%edi /* get the loop "phase" */ subl %edi,dstPtr+_SO_(%esp) /* adjust pointers accordingly */ subl %edi,srcPtr+_SO_(%esp) /* ################ */ /* now process the bulk of the data in "full" loop chunks (ebp = src_ByteCnt) */ addl %edi,%ebp subl $UNROLL_CNT*4,%ebp /* enough for one "full" loop? */ movl %ebp,loopByteCnt+_SO_(%esp) /* save the pre-subtracted value for use in the loop */ _if ae add jmpTabPtr+_SO_(%esp),%edi /* get ready to jump into block processing */ movl $_ret_DataDone1,exitTab+4*(UNROLL_CNT-1)+_SO_(%esp) jmp *(%edi) /* go encrypt or decrypt */ _ret_DataDone1: movl loopByteCnt+_SO_(%esp),%ebp /* restore ebp = loopByteCnt */ xorl %edi,%edi /* starting phase is at ??crypt_0 now */ _endif /* ################ */ /* now process the remainder of the data, if any (partial loop) */ andl $4*(UNROLL_CNT-1),%ebp /* compute ebp = end phase */ cmpl %edi,%ebp /* any partial loop to do? */ _if nz movl %ebp,loopByteCnt+_SO_(%esp) /* make sure that the exit loop test falls thru */ addl jmpTabPtr+_SO_(%esp),%edi /* get ready to jump */ movl $_ret_DataDone2,exitTab-4+_SO_(%esp,%ebp) /* force an exit at the correct point */ jmp *(%edi) _ret_DataDone2: xorl %edi,%edi /* edi = 0 */ movl loopByteCnt+_SO_(%esp),%ebp andl $4*(UNROLL_CNT-1),%ebp /* recompute exitTab index */ movl %edi,exitTab-4+_SO_(%esp,%ebp) /* clear the exitTab entry */ _endif /* ################ */ /* special (i.e. UGLY!!) handling when src_ByteCnt isn't a multiple of 4 */ /* here with ebp = loopByteCnt AND 4*(UNROLL_CNT-1) */ movl msgLen0+_SO_(%esp),%edi /* get original msgLen */ andl $3,%edi /* any partial words? (hopefully rare) */ _if nz movl $_ret_OddBytes,exitTab+_SO_(%esp,%ebp) orl %ebp,%edi /* save word index and odd byte count */ movl %edi,loopByteCnt+_SO_(%esp) /* back into loopByteCnt */ _push esi andl $3,%edi movl srcPtr+_SO_(%esp),%esi addl %ebp,%esi _push ebp movl MASK_TAB(,%edi,4),%edi /* get the mask bits */ movl (%esi),%ebp /* and get the source word */ leal tmpBuf+_SO_(%esp),%esi andl %edi,%ebp /* ebp = masked source word */ movl %edi,8(%esi) /* save the mask bits (for use in Decrypt_OddBytes) */ movl %ebp, (%esi) /* save the masked source word */ _pop ebp subl %ebp,%esi /* adjust src/dst ptrs for hard coded offsets in block code */ movl %esi,srcPtr+_SO_(%esp) /* set up for "single-word" encrypt in tmpBuf[] */ addl $4,%esi movl %esi,dstPtr+_SO_(%esp) mov jmpTabPtr+_SO_(%esp),%edi /* dispatch to different handler for Encrypt & Decrypt */ _pop esi jmp *OddBytes_OFFS(%edi) /* */ /* here to handle the odd-byte encrypt case */ Encrypt_OddBytes: jmp *Encrypt_jmpTab(%ebp) /* go encrypt the single word */ /* */ /* here to handle the funky odd-byte decrypt case */ Decrypt_OddBytes: /* we have to encrypt halfway thru the block to compute keystream :-(( */ /* (i.e., in order to produce the "full" ciphertext word) */ _push eax,ebx,ecx,edx,esi,ebp _o_ "addl %edx,%eax","roll $ROT_3b,%edx","mov X_i_0+_SO_(%esp,%ebp),%ebp" /* get the key word */ _o_ "addl %esi,%ebx","roll $ROT_4b,%esi" _o_ "xorl %eax,%ecx","roll $ROT_0a,%eax" _o_ "xorl %ebx,%edx" ,"add %edx,%ebp" _o_ "addl %ecx,%esi","roll $ROT_2a,%ecx","mov loopByteCnt+_SO_(%esp),%edi" _o_ "xorl %ebp,%eax","roll $ROT_4a,%esi","and $4*3,%edi" _o_ "addl %eax,%ecx" ,"mov oldZ+_SO_(%esp,%edi),%ebp" _o_ "xorl %ecx,%esi" addl %esi,%ebp /* now ebp = keystream */ movl tmpBuf+8+_SO_(%esp),%edi /* get the mask word */ notl %edi /* toggle the maskbits */ andl %ebp,%edi /* mask off unused maskbits */ xorl %edi,tmpBuf+_SO_(%esp) /* re-create the "full" ciphertext word @ tmp src buffer */ _pop ebp,esi,edx,ecx,ebx,eax jmp *Decrypt_jmpTab(%ebp) /* go decrypt */ /* "return" here with the dest word computed at [tmpBuf+4] */ _ret_OddBytes: _push esi,eax leal callerParms+_SO_(%esp),%esi xorl %edi,%edi movl loopByteCnt+_SO_(%esp),%ebp andl $4*(UNROLL_CNT-1),%ebp movl %edi,exitTab+_SO_(%esp,%ebp) /* clear out the exitTab entry we just used */ movl msgLen0+_SO_(%esp),%edi /* now output just the number of dst bytes specified */ movl %edi,%ebp andl $3,%ebp xorl %ebp,%edi /* clear low 2 bits of count */ addl dstPtr0_LCL(%esi),%edi /* point to "final" word offset */ movl tmpBuf_LCL+4(%esi),%eax /* get the dst output word (short offset) */ xorl (%edi),%eax /* do bit diddling to output just the odd bytes */ andl MASK_TAB(,%ebp,4),%eax xorl %eax,(%edi) _pop eax,esi _endif jmp *retAddr+_SO_(%esp) /* "return" to whomever */ _ret_MAC0: /* ################ */ /* here to compute and output/compare the MAC */ movl mac_Ptr+_SO_(%esp),%ebp xorl aad_Len+_SO_(%esp),%esi processMAC: movl %ebp,dstPtr0+_SO_(%esp) /* save MAC ptr */ xorl $MAGIC_MAC_XOR,%eax /* toggle bits to start the MAC */ _push esi movl loopByteCnt+_SO_(%esp),%ebp movl %ebp,%edi addl $3,%ebp /* advance to next full word, if odd bytes */ andl $4*(UNROLL_CNT-1),%ebp /* ebp = next word "offset" within block */ andl $3,%edi /* edi = length of src mod 4 (plaintext for MAC) */ leal tmpBuf+_SO_(%esp),%esi .set _bb_,0 .rept 12 /* 8 for padding, 4 for MAC size */ movl %edi,_bb_(%esi) /* fill tmpBuf with L(P) mod 4 */ .set _bb_,_bb_+4 .endr leal 7*4(%ebp),%edi andl $4*(UNROLL_CNT-1),%edi /* stop point is after 8 blocks (i+0..i+7) */ movl $_ret_MAC1,exitTab+_SO_(%esp,%edi) subl %ebp,%esi /* set up source/dest pointers */ movl %esi,srcPtr+_SO_(%esp) movl %esi,dstPtr+_SO_(%esp) addl $8*4-1,%ebp /* FUNKY wrap logic requires -1 */ movl %ebp,loopByteCnt+_SO_(%esp) incl %ebp /* undo adjustment */ andl $4*(UNROLL_CNT-1),%ebp _pop esi jmp *Encrypt_jmpTab(%ebp) /* go do the encryption */ /* just finished eight blocks of "padding" using L(P) mod 4 */ /* now generate the MAC */ _ret_MAC1: movl loopByteCnt+_SO_(%esp),%ebp incl %ebp /* undo the -1 above */ andl $4*(UNROLL_CNT-1),%ebp leal 3*4(%ebp),%edi /* do four more (0..3 -- stop after #3) */ andl $4*(UNROLL_CNT-1),%edi movl $_ret_MAC2,exitTab+_SO_(%esp,%edi) leal 4*4-1(%ebp),%edi /* FUNKY wrap logic requires -1 */ movl %edi,loopByteCnt+_SO_(%esp) jmp *Encrypt_jmpTab(%ebp) /* */ /* here with the MAC computed. eax..esi now can be trashed */ _ret_MAC2: leal callerParms+_SO_(%esp),%esi movl ctxt_Ptr_LCL(%esi),%edi movl macSize(%edi),%ecx /* ecx = # bits in MAC */ movl dstPtr0_LCL(%esi),%edi leal tmpBuf+8*4+_SO_(%esp),%esi testl $31,%ecx /* can we do it one word at a time? */ _if z shrl $5,%ecx /* if so, it's faster */ rep movsl _else addl $7,%ecx /* round up to byte boundary */ shrl $3,%ecx /* non-word sizes get the slow treatment */ rep movsb _endif /* ################ */ /* tear down the stack and return */ addl $_Phelix_LocalSize,%esp popal /* restore all of callers regs */ ret /* and return to caller */ /* */ /* ---------------------------------------------------------------- */ /* Common subroutine (for use in Phelix_Main) to init subkeys */ /* ---------------------------------------------------------------- */ /* In: ebp --> pCtxt (const) */ /* edx --> nonce (const) */ /* edi = X' value for I */ /* esi = value of I (0..3) */ /* Out: esi incremented. ebp, edx unmodified */ /* edi = oldZ[I] = 0 */ /* X_i_0, X_i_1 set on stack for both i=I and i=I+4 */ /* edi */ .set _SO_,12 /* two words on stack before call */ SetTwoKeys: movl X_0+4*0(%ebp,%esi,4),%eax /* load two key values */ movl X_0+4*4(%ebp,%esi,4),%ebx movl %eax,X_i_0+4*0+_SO_(%esp,%esi,4) /* store the X_i_0 values */ movl %ebx,X_i_0+4*4+_SO_(%esp,%esi,4) movl (%edx,%esi,4),%ecx /* get ecx = N_i */ addl %edi,%eax /* add in 4*L(U), for esi == 1 */ addl %edi,%ebx addl %ecx,%ebx /* add/sub the nonce value */ subl %ecx,%eax addl %esi,%eax xorl %edi,%edi /* set edi = 0 */ movl %ebx,X_i_1+4*0+_SO_(%esp,%esi,4) /* store the X_i_1 values */ movl %eax,X_i_1+4*4+_SO_(%esp,%esi,4) movl %edi,oldZ+_SO_(%esp,%esi,4) /* zero out the oldZ values */ .set _NN_,0 .rept UNROLL_CNT/4 /* init the "block exit" jump table: all zeroes */ movl %edi,exitTab+_NN_+_SO_(%esp,%esi,4) .set _NN_,_NN_ + 16 .endr incl %esi /* bump the counter for next call */ ret /* */ .set _SO_,0 /* back to no offset */ /* */ /* ---------------------------------------------------------------- */ /* Encryption routines */ /* ---------------------------------------------------------------- */ /* */ .align 4 C_global PhelixEncryptPacket,ECRYPT_AE_encrypt_packet PhelixAlgo Encrypt /* instantiate the algorithm ocde */ /* */ /* the main block processing loop */ /* */ _rept .irp _blkNum_,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 .if \_blkNum_ < UNROLL_CNT strCat EncryptBlk_,\_blkNum_,":" /* make a label for re-entry points */ .set _bb_,\_blkNum_ & 7 /* support UNROLL_CNT > 8 */ _o_ "addl %edx,%eax","roll $ROT_3b,%edx","movl X_i_0+4*_bb_+_SO_(%esp),%ebp" _o_ "addl %esi,%ebx","roll $ROT_4b,%esi" _o_ "xorl %eax,%ecx","roll $ROT_0a,%eax","movl srcPtr+_SO_(%esp),%edi" _o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl %edx,%ebp" /* does LEA opcode help here? */ _o_ "addl %ecx,%esi","roll $ROT_2a,%ecx" _o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl 4*_bb_(%edi),%ebp" /* ebp = plaintext */ _o_ "xorl %esi,%ebx","roll $ROT_4a,%esi","movl oldZ+4*(_bb_&3)+_SO_(%esp),%edi" _o_ "addl %eax,%ecx","roll $ROT_0b,%eax" _o_ "addl %ebx,%edx","roll $ROT_1b,%ebx","xorl %edx,%ebp" _o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx" addl %esi,%edi /* now edi = keystream */ xorl %edx,%edi /* set up to compute edi = ciphertext below */ _o_ "addl %ebp,%eax","roll $ROT_3b,%edx","xorl %ebp,%edi" /* now edi = ciphertext */ _o_ "addl %esi,%ebx","roll $ROT_4b,%esi","movl X_i_1+4*_bb_+_SO_(%esp),%ebp" _o_ "xorl %eax,%ecx","roll $ROT_0a,%eax" _o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl _i_+_SO_(%esp),%ebp" _o_ "addl %ecx,%esi","roll $ROT_2a,%ecx","leal _bb_(%ebp,%edx),%ebp" _o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl dstPtr+_SO_(%esp),%ebp" _o_ "xorl %esi,%ebx","roll $ROT_4a,%esi" _o_ "addl %eax,%ecx","roll $ROT_0b,%eax","movl %edi,4*_bb_(%ebp)" /* save ciphertext */ _o_ "addl %ebx,%edx","roll $ROT_1b,%ebx","movl exitTab+4*\_blkNum_+_SO_(%esp),%edi" _o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx" PhelixEarlyExit edi,\_blkNum_ /* do we need to do an early exit? If so, do it */ .endif .endr PhelixEndLoop UNROLL_CNT /* set condition code for _until below */ _until b jmp *exitTab+4*(UNROLL_CNT-1)+_SO_(%esp) /* "return" to do more */ /* */ /* ---------------------------------------------------------------- */ /* Decryption routine */ /* ---------------------------------------------------------------- */ /* */ .align 4 C_global PhelixDecryptPacket,ECRYPT_AE_decrypt_packet PhelixAlgo Decrypt /* instantiate the algorithm ocde */ /* */ /* the main block processing loop */ /* */ _rept .irp _blkNum_,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 .if \_blkNum_ < UNROLL_CNT strCat DecryptBlk_,\_blkNum_,":" /* make a label for re-entry points */ .set _bb_,\_blkNum_ & 7 /* support UNROLL_CNT > 8 (but not really!) */ _o_ "addl %edx,%eax","roll $ROT_3b,%edx","movl X_i_0+4*_bb_+_SO_(%esp),%ebp" _o_ "addl %esi,%ebx","roll $ROT_4b,%esi" _o_ "xorl %eax,%ecx","roll $ROT_0a,%eax","movl srcPtr+_SO_(%esp),%edi" _o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl %edx,%ebp" _o_ "addl %ecx,%esi","roll $ROT_2a,%ecx" _o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl 4*_bb_(%edi),%ebp" /* ebp = ciphertext */ _o_ "xorl %esi,%ebx","roll $ROT_4a,%esi","movl oldZ+4*(_bb_&3)+_SO_(%esp),%edi" _o_ "addl %eax,%ecx","roll $ROT_0b,%eax" _o_ "addl %ebx,%edx","roll $ROT_1b,%ebx" _o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx" addl %esi,%edi /* set edi = keystream */ xorl %ebp,%edi /* now edi = plaintext */ movl %edx,%ebp xorl %edi,%ebp /* now ebp = plaintext ^ edx */ _o_ "addl %ebp,%eax","roll $ROT_3b,%edx" _o_ "addl %esi,%ebx","roll $ROT_4b,%esi","movl X_i_1+4*_bb_+_SO_(%esp),%ebp" _o_ "xorl %eax,%ecx","roll $ROT_0a,%eax" _o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl _i_+_SO_(%esp),%ebp" _o_ "addl %ecx,%esi","roll $ROT_2a,%ecx","leal _bb_(%ebp,%edx),%ebp" _o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl dstPtr+_SO_(%esp),%ebp" _o_ "xorl %esi,%ebx","roll $ROT_4a,%esi" _o_ "addl %eax,%ecx","roll $ROT_0b,%eax","movl %edi,4*_bb_(%ebp)" /* save plaintext computed above */ _o_ "addl %ebx,%edx","roll $ROT_1b,%ebx","movl exitTab+4*\_blkNum_+_SO_(%esp),%edi" _o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx" PhelixEarlyExit edi,\_blkNum_ /* do we need to do an early exit? If so, do it */ .endif .endr PhelixEndLoop UNROLL_CNT /* set condition code for _until below */ _until b jmp *exitTab+4*(UNROLL_CNT-1)+_SO_(%esp) /* "return" to do more */ /* */ _PhelixCodeEnd_: .ifdef PHELIX_INCREMENTAL_API /* */ /* ---------------------------------------------------------------- */ /* "Incremental" function: SetupNonce */ /* ---------------------------------------------------------------- */ /* use same stack as EncryptPacket! */ /* */ C_global PhelixSetupNonce,ECRYPT_AE_ivsetup pushal lea callerParms-_Phelix_LocalSize(%esp),%esi subl $_Phelix_LocalSize,%esp _SO_ = 0 call InitNonce movl $_ret_SetupNonceDone,exitTab+4*(ZERO_INIT_CNT-1)+_SO_(%esp) jmp EncryptBlk_0 _ret_SetupNonceDone: .if UNROLL_CNT > ZERO_INIT_CNT /* do we need to clear out the return point? */ .err "Replicate code here from _ret_InitZeroDone" .endif movl ctxt_Ptr+_SO_(%esp),%ebp /* save our context */ /* */ movl $MAGIC_AAD_XOR,%edi xorl %edi,%ebx movl %edi,aadXor(%ebp) /* */ movl %eax,4*0+_Z_(%ebp) movl %ebx,4*1+_Z_(%ebp) movl %ecx,4*2+_Z_(%ebp) movl %edx,4*3+_Z_(%ebp) movl %esi,4*4+_Z_(%ebp) .irp _nn_,0,1,2,3 movl X_i_1+8*\_nn_ +_SO_(%esp),%eax movl X_i_1+8*\_nn_+4+_SO_(%esp),%ebx movl oldZ +4*\_nn_ +_SO_(%esp),%ecx movl %eax,X_1+ 8*\_nn_(%ebp) movl %ebx,X_1+4+8*\_nn_(%ebp) movl %ecx,old_Z+4*\_nn_(%ebp) .endr /* */ xorl %edi,%edi movl %edi,msgLen (%ebp) movl %edi,aadLen (%ebp) movl %edi,aadLen+4(%ebp) movl _i_+_SO_(%esp),%edi movl %edi,blkNum(%ebp) /* */ addl $_Phelix_LocalSize,%esp popal ret /* */ /* ---------------------------------------------------------------- */ /* "Incremental" function: EncryptBytes/DecryptBytes */ /* ---------------------------------------------------------------- */ /* use same locals stack as EncryptPacket */ /* */ _pOfs_ = _cpOfs_ /* */ _newParm 1,ctxt_Ptr _newParm 1,src_Ptr _newParm 1,dst_Ptr _newParm 1,bCnt /* */ C_global PhelixEncryptBytes,ECRYPT_AE_encrypt_bytes pushal leal Encrypt_jmpTab,%ebp PhelixBytes: leal callerParms-_Phelix_LocalSize(%esp),%esi subl $_Phelix_LocalSize,%esp .set _SO_,0 movl %ebp,jmpTabPtr+_SO_(%esp) /* copy context to local on stack */ movl ctxt_Ptr_LCL(%esi),%ebp _push esi leal X_0(%ebp),%esi leal X_i_0+_SO_(%esp),%edi movl $8+8+4,%ecx /* X_0, X_1, and oldZ */ cld rep movsl /* copy the context */ xorl %eax,%eax movl $UNROLL_CNT,%ecx /* zero out exitTab */ rep stosl _pop esi leal _ret_PhelixBytes,%ebp movl %ebp,retAddr_LCL(%esi) /* set up return address */ movl src_Ptr_LCL(%esi),%ebp /* copy srcPtr and dstPtr */ movl %ebp,srcPtr+_SO_(%esp) movl dst_Ptr_LCL(%esi),%ebp movl %ebp,dstPtr+_SO_(%esp) movl ctxt_Ptr_LCL(%esi),%ebp movl blkNum(%ebp),%edi /* convert blkNum from pCtxt to locals */ andl $~(UNROLL_CNT-1),%edi movl %edi,_i_+_SO_(%esp) movl blkNum(%ebp),%edi shll $2,%edi /* convert blkNum to a word count */ movl %edi,loopByteCnt+_SO_(%esp) /* and save it as the "phase" */ movl _Z_+4*0(%ebp),%eax /* load the Z values */ movl _Z_+4*1(%ebp),%ebx movl _Z_+4*2(%ebp),%ecx movl _Z_+4*3(%ebp),%edx movl _Z_+4*4(%ebp),%esi xorl aadXor(%ebp),%ebx movl $0,aadXor(%ebp) _push esi leal callerParms+_SO_(%esp),%esi movl src_Ptr_LCL(%esi),%ebp movl %ebp,srcPtr+_SO_(%esp) movl bCnt_LCL(%esi),%ebp movl dst_Ptr_LCL(%esi),%edi jmp processUserData _pop esi _ret_PhelixBytes: /* copy modified value back to context */ movl ctxt_Ptr+_SO_(%esp),%ebp movl %eax,_Z_+4*0(%ebp) /* store the values Z0..Z4 */ movl %ebx,_Z_+4*1(%ebp) movl %ecx,_Z_+4*2(%ebp) movl %edx,_Z_+4*3(%ebp) movl %esi,_Z_+4*4(%ebp) movl msgLen0+_SO_(%esp),%edi /* update pCtxt.blkNum */ movl %edi,%esi addl $3,%edi shrl $2,%edi addl %edi,blkNum(%ebp) addl %esi,msgLen(%ebp) /* track low 2 bits of msgLen */ leal old_Z(%ebp),%edi leal oldZ+_SO_(%esp),%esi movl $4,%ecx /* copy back the updated oldZ values */ rep movsl addl $_Phelix_LocalSize,%esp popal ret /* */ /* handle decryption here */ C_global PhelixDecryptBytes,ECRYPT_AE_decrypt_bytes pushal leal Decrypt_jmpTab,%ebp jmp PhelixBytes /* */ /* ---------------------------------------------------------------- */ /* "Incremental" function: Finalize (MAC) */ /* ---------------------------------------------------------------- */ /* use same locals stack as EncryptPacket */ /* */ _pOfs_ = _cpOfs_ _newParm 1,ctxt_Ptr _newParm 1,mac_Ptr /* */ C_global PhelixFinalize,ECRYPT_AE_finalize pushal leal callerParms-_Phelix_LocalSize(%esp),%esi subl $_Phelix_LocalSize,%esp .set _SO_,0 leal Encrypt_jmpTab,%ebp movl %ebp,jmpTabPtr+_SO_(%esp) /* copy context to local on stack */ movl ctxt_Ptr_LCL(%esi),%ebp _push esi leal X_0(%ebp),%esi leal X_i_0+_SO_(%esp),%edi movl $8+8+4,%ecx /* X_0, X_1, and oldZ */ cld rep movsl /* copy the context */ xorl %eax,%eax movl $UNROLL_CNT,%ecx /* zero out exitTab */ rep stosl _pop esi movl ctxt_Ptr_LCL(%esi),%ebp movl blkNum(%ebp),%edi /* convert blkNum from pCtxt to locals */ andl $~(UNROLL_CNT-1),%edi movl %edi,_i_+_SO_(%esp) movl msgLen(%ebp),%eax subl $4,%eax negl %eax andl $3,%eax /* track the low 2 bits of msgLen */ movl blkNum(%ebp),%edi shll $2,%edi /* convert blkNum to a word count */ subl %eax,%edi movl %edi,loopByteCnt+_SO_(%esp) /* and save it as the "phase" */ movl _Z_+4*0(%ebp),%eax /* load the Z values */ movl _Z_+4*1(%ebp),%ebx movl _Z_+4*2(%ebp),%ecx movl _Z_+4*3(%ebp),%edx movl _Z_+4*4(%ebp),%esi xorl aadXor (%ebp),%ebx xorl aadLen (%ebp),%esi xorl aadLen+4(%ebp),%ecx movl mac_Ptr+_SO_(%esp),%ebp jmp processMAC /* */ /* */ /* ---------------------------------------------------------------- */ /* "Incremental" function: ProcessAAD */ /* ---------------------------------------------------------------- */ .set _Phelix_LocalSize,0 _newLocal 1,aad_I /* different local stack from from above! */ _newLocal 1,aad_bb _newLocal 1,aad_tmp /* */ _cpOfs_ = 4+8*4+_Phelix_LocalSize /* caller parms offset from esp */ _pOfs_ = _cpOfs_ /* */ _newParm 1,ctxt_Ptr _newParm 1,aad_Ptr _newParm 1,aad_Len /* */ C_global PhelixProcessAAD,ECRYPT_AE_authenticate_bytes pushal subl $_Phelix_LocalSize,%esp .set _SO_,0 movl ctxt_Ptr+_SO_(%esp),%ebp /* point to context */ movl aad_Len+_SO_(%esp),%edi addl %edi,aadLen (%ebp) /* update accumulated length */ adcl $0 ,aadLen+4(%ebp) movl blkNum(%ebp),%edi movl %edi,aad_I+_SO_(%esp) movl _Z_+4*0(%ebp),%eax /* load the Z values */ movl _Z_+4*1(%ebp),%ebx movl _Z_+4*2(%ebp),%ecx movl _Z_+4*3(%ebp),%edx movl _Z_+4*4(%ebp),%esi subl $4,aad_Len+_SO_(%esp) /* are we done yet? */ _rept ae aad_Again:movl aad_I+_SO_(%esp),%edi andl $7,%edi movl ctxt_Ptr+_SO_(%esp),%ebp _o_ "addl %edx,%eax","roll $ROT_3b,%edx","movl X_0(%ebp,%edi,4),%ebp" _o_ "addl %esi,%ebx","roll $ROT_4b,%esi","movl %edi,aad_bb+_SO_(%esp)" _o_ "xorl %eax,%ecx","roll $ROT_0a,%eax","movl aad_Ptr+_SO_(%esp),%edi" _o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl %edx,%ebp" _o_ "addl %ecx,%esi","roll $ROT_2a,%ecx" _o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl (%edi),%ebp" /* ebp = AAD plaintext */ _o_ "xorl %esi,%ebx","roll $ROT_4a,%esi","addl $4,%edi" _o_ "addl %eax,%ecx","roll $ROT_0b,%eax","movl %edi,aad_Ptr+_SO_(%esp)" _o_ "addl %ebx,%edx","roll $ROT_1b,%ebx","xorl %edx,%ebp" _o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx","movl aad_bb+_SO_(%esp),%edi" _o_ "addl %ebp,%eax","roll $ROT_3b,%edx","movl ctxt_Ptr+_SO_(%esp),%ebp" _o_ "addl %esi,%ebx","roll $ROT_4b,%esi","movl X_1(%ebp,%edi,4),%ebp" _o_ "xorl %eax,%ecx","roll $ROT_0a,%eax" _o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx","addl aad_I+_SO_(%esp),%ebp" _o_ "addl %ecx,%esi","roll $ROT_2a,%ecx","addl %edx,%ebp" _o_ "xorl %ebp,%eax","roll $ROT_3a,%edx","movl ctxt_Ptr+_SO_(%esp),%ebp" _o_ "xorl %esi,%ebx","roll $ROT_4a,%esi","andl $3,%edi" _o_ "addl %eax,%ecx","roll $ROT_0b,%eax","incl aad_I+_SO_(%esp)" _o_ "addl %ebx,%edx","roll $ROT_1b,%ebx" _o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx","movl %esi,old_Z(%ebp,%edi,4)" subl $4,aad_Len+_SO_(%esp) /* are we done yet? */ _until b /* note ebp == ctxt_Ptr here */ movl aad_Len+_SO_(%esp),%edi /* at this point, -4 <= aad_Len < 0 */ andl $3,%edi /* any odd bytes left? */ _if z /* if not, we're done */ movl aad_I+_SO_(%esp),%edi /* copy back the updated blkNum */ movl %edi,blkNum(%ebp) movl %eax,_Z_+4*0(%ebp) /* save the Z values */ movl %ebx,_Z_+4*1(%ebp) movl %ecx,_Z_+4*2(%ebp) movl %edx,_Z_+4*3(%ebp) movl %esi,_Z_+4*4(%ebp) /* clean up the stack and return */ addl $_Phelix_LocalSize,%esp popal ret _endif /* here to handle odd AAD bytes */ movl aad_Ptr+_SO_(%esp),%ebp /* get the final partial word */ movl (%ebp),%ebp andl MASK_TAB(,%edi,4),%ebp /* mask off unused bits */ leal aad_tmp+_SO_(%esp),%edi movl %edi,aad_Ptr+_SO_(%esp) /* point aad_Ptr to aad_Tmp */ movl %ebp,(%edi) /* store zero-padded word there */ xorl %ebp,%ebp /* fix up the count to not come here again */ movl %ebp,aad_Len+_SO_(%esp) jmp aad_Again /* */ /* ---------------------------------------------------------------- */ /* "Incremental" function: SetupKey */ /* ---------------------------------------------------------------- */ /* */ .set _Phelix_LocalSize,0 _newLocal 1,sk_esi _newLocal 1,sk_Cnt /* */ _cpOfs_ = 4+8*4+_Phelix_LocalSize /* caller parms offset from esp */ _pOfs_ = _cpOfs_ /* */ _newParm 1,ctxt_Ptr _newParm 1,key_Ptr _newParm 1,key_Size _newParm 1,iv_Size _newParm 1,mac_Size /* */ /* assert(PHELIX_NONCE_SIZE==ivSize)# Phelix only supports "full" nonces */ /* assert( 0 == (keySize%8))# Phelix only supports byte-sized keys */ /* assert(256 >= keySize)# Phelix only supports keys <= 256 bits*/ /* */ C_global PhelixSetupKey,ECRYPT_AE_keysetup pushal subl $_Phelix_LocalSize,%esp .set _SO_,0 movl ctxt_Ptr+_SO_(%esp),%ebp /* point to the context to be built */ movl key_Size+_SO_(%esp),%eax /* copy keySize */ movl %eax,keySize(%ebp) movl mac_Size+_SO_(%esp),%ebx /* and macSize */ movl %ebx,macSize(%ebp) andl $127,%ebx /* and compute X1_Bump */ shll $8 ,%ebx shrl $1 ,%eax /* eax = keySize/2 (in bits) */ addl %eax,%ebx movl %ebx,X_1_Bump(%ebp) /* then store it */ shrl $2 ,%eax /* eax = keySize/8 (# bytes of key) */ /* now copy in the key bits */ movl key_Ptr+_SO_(%esp),%edi xorl %ebx,%ebx /* ebx = counter */ _rept cmpl %eax,%ebx /* is this full word part of the key? */ _brk ae /* if not, go handle partial word (if any) */ movl (%edi,%ebx),%ecx /* else get next full word of key */ movl %ecx,X_0(%ebp,%ebx) /* and copy it to context */ addl $4,%ebx /* bump counter */ _endr /* go back for more */ testl $3,%eax /* if any partial words, handle that here */ _if nz movl %eax,%esi andl $3,%esi /* esi = (keySize/8) mod 4 */ movl MASK_TAB(,%esi,4),%ecx /* mask off "unused" bits */ andl %ecx,X_0-4(%ebp,%ebx) _endif xorl %ecx,%ecx /* zero out the rest of the context key */ _rept cmpl $8*4,%ebx /* are we done yet? */ _brk ae movl %ecx,X_0(%ebp,%ebx) /* zero context key */ addl $4,%ebx _endr /* now run the Feistel network for initial key mixing */ addl $64,%eax movl %eax,sk_esi+_SO_(%esp) /* precompute L(U)+64 "constant" for mixing */ movl $128,sk_Cnt+_SO_(%esp) /* use this as a counter */ _rept movl sk_Cnt+_SO_(%esp),%edi andl $16,%edi /* isolate one bit */ movl X_0+4*0(%ebp,%edi),%eax movl X_0+4*1(%ebp,%edi),%ebx movl X_0+4*2(%ebp,%edi),%ecx movl X_0+4*3(%ebp,%edi),%edx movl sk_esi+ _SO_(%esp),%esi .rept 2 /* unroll just a bit */ _o_ "addl %edx,%eax","roll $ROT_3b,%edx" _o_ "addl %esi,%ebx","roll $ROT_4b,%esi" _o_ "xorl %eax,%ecx","roll $ROT_0a,%eax" _o_ "xorl %ebx,%edx","roll $ROT_1a,%ebx" _o_ "addl %ecx,%esi","roll $ROT_2a,%ecx" _o_ "xorl %edx,%eax","roll $ROT_3a,%edx" _o_ "xorl %esi,%ebx","roll $ROT_4a,%esi" _o_ "addl %eax,%ecx","roll $ROT_0b,%eax" _o_ "addl %ebx,%edx","roll $ROT_1b,%ebx" _o_ "xorl %ecx,%esi","roll $ROT_2b,%ecx" .endr xorl $16,%edi /* go to other half */ xorl %eax,X_0+4*0(%ebp,%edi) /* perform the Feistel xor */ xorl %ebx,X_0+4*1(%ebp,%edi) xorl %ecx,X_0+4*2(%ebp,%edi) xorl %edx,X_0+4*3(%ebp,%edi) subl $16,sk_Cnt+_SO_(%esp) _until be /* clean up the stack and return */ addl $_Phelix_LocalSize,%esp popal ret /* */ /* ---------------------------------------------------------------- */ /* */ C_global PhelixIncremental_CodeSize mov $(.- _PhelixCodeStart_),%eax ret /* */ .endif /* _INCREMENTAL_API */ /* */ /* ---------------------------------------------------------------- */ /* use this NOP routine to calibrate/check our timing tests */ /* ---------------------------------------------------------------- */ /* */ C_global PhelixNop pushal popal ret /* */ /* ---------------------------------------------------------------- */ /* size statistics at compile time */ /* ---------------------------------------------------------------- */ /* */ C_global PhelixProcessPacket_CodeSize,ECRYPT_AE_process_packet_CodeSize movl $(_PhelixCodeEnd_-_PhelixCodeStart_),%eax ret /* */ .end