[svn] / ecrypt / trunk / submissions / salsa20 / reduced / 12-rounds / p4 / salsa20.s  

svn: ecrypt/trunk/submissions/salsa20/reduced/12-rounds/p4/salsa20.s

File: [svn] / ecrypt / trunk / submissions / salsa20 / reduced / 12-rounds / p4 / salsa20.s (download)
Revision: 161, Thu Feb 23 21:15:20 2006 UTC (8 years, 2 months ago) by cdecanni
File size: 41855 byte(s)
* imported reduced-round variants Salsa20/8 and Salsa20/12.
# salsa20,12_p4.s version 20060209
# D. J. Bernstein
# Public domain.

# int32 a

# stack32 arg1

# stack32 arg2

# stack32 arg3

# stack32 arg4

# input arg1

# input arg2

# input arg3

# input arg4

# int32 eax

# int32 ebx

# int32 esi

# int32 edi

# int32 ebp

# caller eax

# caller ebx

# caller esi

# caller edi

# caller ebp

# int32 k

# int32 kbits

# int32 iv

# int32 i

# int32 x

# int32 m

# int32 out

# stack32 bytes_backup

# int32 bytes

# stack32 eax_stack

# stack32 ebx_stack

# stack32 esi_stack

# stack32 edi_stack

# stack32 ebp_stack

# int6464 diag0

# int6464 diag1

# int6464 diag2

# int6464 diag3

# int6464 a0

# int6464 a1

# int6464 a2

# int6464 a3

# int6464 a4

# int6464 a5

# int6464 a6

# int6464 a7

# int6464 b0

# int6464 b1

# int6464 b2

# int6464 b3

# int6464 b4

# int6464 b5

# int6464 b6

# int6464 b7

# int32 in0

# int32 in1

# int32 in2

# int32 in3

# int32 in4

# int32 in5

# int32 in6

# int32 in7

# int32 in8

# int32 in9

# int32 in10

# int32 in11

# int32 in12

# int32 in13

# int32 in14

# int32 in15

# stack512 tmp

# stack32 ctarget

# enter ECRYPT_keystream_bytes
.text
.p2align 5
.globl _ECRYPT_keystream_bytes
.globl ECRYPT_keystream_bytes
_ECRYPT_keystream_bytes:
ECRYPT_keystream_bytes:
mov %esp,%eax
and $31,%eax
add $96,%eax
sub %eax,%esp

# eax_stack = eax
# movl <eax=int32#1,>eax_stack=stack32#2
# movl <eax=%eax,>eax_stack=68(%esp)
movl %eax,68(%esp)

# ebx_stack = ebx
# movl <ebx=int32#4,>ebx_stack=stack32#3
# movl <ebx=%ebx,>ebx_stack=72(%esp)
movl %ebx,72(%esp)

# esi_stack = esi
# movl <esi=int32#5,>esi_stack=stack32#4
# movl <esi=%esi,>esi_stack=76(%esp)
movl %esi,76(%esp)

# edi_stack = edi
# movl <edi=int32#6,>edi_stack=stack32#5
# movl <edi=%edi,>edi_stack=80(%esp)
movl %edi,80(%esp)

# ebp_stack = ebp
# movl <ebp=int32#7,>ebp_stack=stack32#6
# movl <ebp=%ebp,>ebp_stack=84(%esp)
movl %ebp,84(%esp)

# x = arg1
# movl <arg1=stack32#-1,>x=int32#3
# movl <arg1=4(%esp,%eax),>x=%edx
movl 4(%esp,%eax),%edx

# m = arg2
# movl <arg2=stack32#-2,>m=int32#5
# movl <arg2=8(%esp,%eax),>m=%esi
movl 8(%esp,%eax),%esi

# out = m
# mov  <m=int32#5,>out=int32#6
# mov  <m=%esi,>out=%edi
mov  %esi,%edi

# bytes = arg3
# movl <arg3=stack32#-3,>bytes=int32#4
# movl <arg3=12(%esp,%eax),>bytes=%ebx
movl 12(%esp,%eax),%ebx

# bytes -= 0
# sub  $0,<bytes=int32#4
# sub  $0,<bytes=%ebx
sub  $0,%ebx

# goto done if unsigned<=
jbe ._done

# a = 0
# mov  $0,>a=int32#1
# mov  $0,>a=%eax
mov  $0,%eax

# i = bytes
# mov  <bytes=int32#4,>i=int32#2
# mov  <bytes=%ebx,>i=%ecx
mov  %ebx,%ecx

# while (i) { *out++ = a; --i }
rep stosb

# out -= bytes
# sub  <bytes=int32#4,<out=int32#6
# sub  <bytes=%ebx,<out=%edi
sub  %ebx,%edi

# goto start
jmp ._start

# enter ECRYPT_decrypt_bytes
.text
.p2align 5
.globl _ECRYPT_decrypt_bytes
.globl ECRYPT_decrypt_bytes
_ECRYPT_decrypt_bytes:
ECRYPT_decrypt_bytes:
mov %esp,%eax
and $31,%eax
add $96,%eax
sub %eax,%esp

# eax_stack = eax
# movl <eax=int32#1,>eax_stack=stack32#2
# movl <eax=%eax,>eax_stack=68(%esp)
movl %eax,68(%esp)

# ebx_stack = ebx
# movl <ebx=int32#4,>ebx_stack=stack32#3
# movl <ebx=%ebx,>ebx_stack=72(%esp)
movl %ebx,72(%esp)

# esi_stack = esi
# movl <esi=int32#5,>esi_stack=stack32#4
# movl <esi=%esi,>esi_stack=76(%esp)
movl %esi,76(%esp)

# edi_stack = edi
# movl <edi=int32#6,>edi_stack=stack32#5
# movl <edi=%edi,>edi_stack=80(%esp)
movl %edi,80(%esp)

# ebp_stack = ebp
# movl <ebp=int32#7,>ebp_stack=stack32#6
# movl <ebp=%ebp,>ebp_stack=84(%esp)
movl %ebp,84(%esp)

# x = arg1
# movl <arg1=stack32#-1,>x=int32#3
# movl <arg1=4(%esp,%eax),>x=%edx
movl 4(%esp,%eax),%edx

# m = arg2
# movl <arg2=stack32#-2,>m=int32#5
# movl <arg2=8(%esp,%eax),>m=%esi
movl 8(%esp,%eax),%esi

# out = arg3
# movl <arg3=stack32#-3,>out=int32#6
# movl <arg3=12(%esp,%eax),>out=%edi
movl 12(%esp,%eax),%edi

# bytes = arg4
# movl <arg4=stack32#-4,>bytes=int32#4
# movl <arg4=16(%esp,%eax),>bytes=%ebx
movl 16(%esp,%eax),%ebx

# bytes -= 0
# sub  $0,<bytes=int32#4
# sub  $0,<bytes=%ebx
sub  $0,%ebx

# goto done if unsigned<=
jbe ._done

# goto start
jmp ._start

# enter ECRYPT_encrypt_bytes
.text
.p2align 5
.globl _ECRYPT_encrypt_bytes
.globl ECRYPT_encrypt_bytes
_ECRYPT_encrypt_bytes:
ECRYPT_encrypt_bytes:
mov %esp,%eax
and $31,%eax
add $96,%eax
sub %eax,%esp

# eax_stack = eax
# movl <eax=int32#1,>eax_stack=stack32#2
# movl <eax=%eax,>eax_stack=68(%esp)
movl %eax,68(%esp)

# ebx_stack = ebx
# movl <ebx=int32#4,>ebx_stack=stack32#3
# movl <ebx=%ebx,>ebx_stack=72(%esp)
movl %ebx,72(%esp)

# esi_stack = esi
# movl <esi=int32#5,>esi_stack=stack32#4
# movl <esi=%esi,>esi_stack=76(%esp)
movl %esi,76(%esp)

# edi_stack = edi
# movl <edi=int32#6,>edi_stack=stack32#5
# movl <edi=%edi,>edi_stack=80(%esp)
movl %edi,80(%esp)

# ebp_stack = ebp
# movl <ebp=int32#7,>ebp_stack=stack32#6
# movl <ebp=%ebp,>ebp_stack=84(%esp)
movl %ebp,84(%esp)

# x = arg1
# movl <arg1=stack32#-1,>x=int32#3
# movl <arg1=4(%esp,%eax),>x=%edx
movl 4(%esp,%eax),%edx

# m = arg2
# movl <arg2=stack32#-2,>m=int32#5
# movl <arg2=8(%esp,%eax),>m=%esi
movl 8(%esp,%eax),%esi

# out = arg3
# movl <arg3=stack32#-3,>out=int32#6
# movl <arg3=12(%esp,%eax),>out=%edi
movl 12(%esp,%eax),%edi

# bytes = arg4
# movl <arg4=stack32#-4,>bytes=int32#4
# movl <arg4=16(%esp,%eax),>bytes=%ebx
movl 16(%esp,%eax),%ebx

# bytes -= 0
# sub  $0,<bytes=int32#4
# sub  $0,<bytes=%ebx
sub  $0,%ebx

# goto done if unsigned<=
jbe ._done

# start#
._start:

# bytesatleast1#
._bytesatleast1:

#   bytes - 64
# cmp  $64,<bytes=int32#4
# cmp  $64,<bytes=%ebx
cmp  $64,%ebx

#   goto nocopy if unsigned>=
jae ._nocopy

#     ctarget = out
# movl <out=int32#6,>ctarget=stack32#7
# movl <out=%edi,>ctarget=88(%esp)
movl %edi,88(%esp)

#     out = &tmp
# leal >tmp=stack512#1,>out=int32#6
# leal >tmp=0(%esp),>out=%edi
leal 0(%esp),%edi

#     i = bytes
# mov  <bytes=int32#4,>i=int32#2
# mov  <bytes=%ebx,>i=%ecx
mov  %ebx,%ecx

#     while (i) { *out++ = *m++; --i }
rep movsb

#     out = &tmp
# leal >tmp=stack512#1,>out=int32#6
# leal >tmp=0(%esp),>out=%edi
leal 0(%esp),%edi

#     m = &tmp
# leal >tmp=stack512#1,>m=int32#5
# leal >tmp=0(%esp),>m=%esi
leal 0(%esp),%esi

#   nocopy#
._nocopy:

#   bytes_backup = bytes
# movl <bytes=int32#4,>bytes_backup=stack32#1
# movl <bytes=%ebx,>bytes_backup=64(%esp)
movl %ebx,64(%esp)

# diag0 = *(int128 *) (x + 0)
# movdqa 0(<x=int32#3),>diag0=int6464#1
# movdqa 0(<x=%edx),>diag0=%xmm0
movdqa 0(%edx),%xmm0

# diag1 = *(int128 *) (x + 16)
# movdqa 16(<x=int32#3),>diag1=int6464#2
# movdqa 16(<x=%edx),>diag1=%xmm1
movdqa 16(%edx),%xmm1

# diag2 = *(int128 *) (x + 32)
# movdqa 32(<x=int32#3),>diag2=int6464#3
# movdqa 32(<x=%edx),>diag2=%xmm2
movdqa 32(%edx),%xmm2

# diag3 = *(int128 *) (x + 48)
# movdqa 48(<x=int32#3),>diag3=int6464#4
# movdqa 48(<x=%edx),>diag3=%xmm3
movdqa 48(%edx),%xmm3

#             	a0 = diag1 <<< 0
# pshufd $0xe4,<diag1=int6464#2,>a0=int6464#5
# pshufd $0xe4,<diag1=%xmm1,>a0=%xmm4
pshufd $0xe4,%xmm1,%xmm4

#             	b0 = 0
# pxor   >b0=int6464#7,>b0=int6464#7
# pxor   >b0=%xmm6,>b0=%xmm6
pxor   %xmm6,%xmm6

# i = 12
# mov  $12,>i=int32#1
# mov  $12,>i=%eax
mov  $12,%eax

# mainloop#
._mainloop:

# uint32323232	a0 += diag0
# paddd <diag0=int6464#1,<a0=int6464#5
# paddd <diag0=%xmm0,<a0=%xmm4
paddd %xmm0,%xmm4

# 				a1 = diag0 <<< 0
# pshufd $0xe4,<diag0=int6464#1,>a1=int6464#6
# pshufd $0xe4,<diag0=%xmm0,>a1=%xmm5
pshufd $0xe4,%xmm0,%xmm5

# uint32323232	b0 += a0
# paddd <a0=int6464#5,<b0=int6464#7
# paddd <a0=%xmm4,<b0=%xmm6
paddd %xmm4,%xmm6

# uint32323232	a0 <<= 7
# pslld $7,<a0=int6464#5
# pslld $7,<a0=%xmm4
pslld $7,%xmm4

# 				b1 = 0
# pxor   >b1=int6464#8,>b1=int6464#8
# pxor   >b1=%xmm7,>b1=%xmm7
pxor   %xmm7,%xmm7

# uint32323232	b0 >>= 25
# psrld $25,<b0=int6464#7
# psrld $25,<b0=%xmm6
psrld $25,%xmm6

#                 diag3 ^= a0
# pxor  <a0=int6464#5,<diag3=int6464#4
# pxor  <a0=%xmm4,<diag3=%xmm3
pxor  %xmm4,%xmm3

#                 diag3 ^= b0
# pxor  <b0=int6464#7,<diag3=int6464#4
# pxor  <b0=%xmm6,<diag3=%xmm3
pxor  %xmm6,%xmm3

# uint32323232			a1 += diag3
# paddd <diag3=int6464#4,<a1=int6464#6
# paddd <diag3=%xmm3,<a1=%xmm5
paddd %xmm3,%xmm5

# 						a2 = diag3 <<< 0
# pshufd $0xe4,<diag3=int6464#4,>a2=int6464#5
# pshufd $0xe4,<diag3=%xmm3,>a2=%xmm4
pshufd $0xe4,%xmm3,%xmm4

# uint32323232			b1 += a1
# paddd <a1=int6464#6,<b1=int6464#8
# paddd <a1=%xmm5,<b1=%xmm7
paddd %xmm5,%xmm7

# uint32323232			a1 <<= 9
# pslld $9,<a1=int6464#6
# pslld $9,<a1=%xmm5
pslld $9,%xmm5

# 						b2 = 0
# pxor   >b2=int6464#7,>b2=int6464#7
# pxor   >b2=%xmm6,>b2=%xmm6
pxor   %xmm6,%xmm6

# uint32323232			b1 >>= 23
# psrld $23,<b1=int6464#8
# psrld $23,<b1=%xmm7
psrld $23,%xmm7

# 				diag2 ^= a1
# pxor  <a1=int6464#6,<diag2=int6464#3
# pxor  <a1=%xmm5,<diag2=%xmm2
pxor  %xmm5,%xmm2

# 		diag3 <<<= 32
# pshufd $0x93,<diag3=int6464#4,>diag3=int6464#4
# pshufd $0x93,<diag3=%xmm3,>diag3=%xmm3
pshufd $0x93,%xmm3,%xmm3

# 				diag2 ^= b1
# pxor  <b1=int6464#8,<diag2=int6464#3
# pxor  <b1=%xmm7,<diag2=%xmm2
pxor  %xmm7,%xmm2

# uint32323232					a2 += diag2
# paddd <diag2=int6464#3,<a2=int6464#5
# paddd <diag2=%xmm2,<a2=%xmm4
paddd %xmm2,%xmm4

# 								a3 = diag2 <<< 0
# pshufd $0xe4,<diag2=int6464#3,>a3=int6464#6
# pshufd $0xe4,<diag2=%xmm2,>a3=%xmm5
pshufd $0xe4,%xmm2,%xmm5

# uint32323232					b2 += a2
# paddd <a2=int6464#5,<b2=int6464#7
# paddd <a2=%xmm4,<b2=%xmm6
paddd %xmm4,%xmm6

# uint32323232					a2 <<= 13
# pslld $13,<a2=int6464#5
# pslld $13,<a2=%xmm4
pslld $13,%xmm4

# 								b3 = 0
# pxor   >b3=int6464#8,>b3=int6464#8
# pxor   >b3=%xmm7,>b3=%xmm7
pxor   %xmm7,%xmm7

# uint32323232					b2 >>= 19
# psrld $19,<b2=int6464#7
# psrld $19,<b2=%xmm6
psrld $19,%xmm6

# 						diag1 ^= a2
# pxor  <a2=int6464#5,<diag1=int6464#2
# pxor  <a2=%xmm4,<diag1=%xmm1
pxor  %xmm4,%xmm1

# 				diag2 <<<= 64
# pshufd $0x4e,<diag2=int6464#3,>diag2=int6464#3
# pshufd $0x4e,<diag2=%xmm2,>diag2=%xmm2
pshufd $0x4e,%xmm2,%xmm2

# 						diag1 ^= b2
# pxor  <b2=int6464#7,<diag1=int6464#2
# pxor  <b2=%xmm6,<diag1=%xmm1
pxor  %xmm6,%xmm1

# uint32323232							a3 += diag1
# paddd <diag1=int6464#2,<a3=int6464#6
# paddd <diag1=%xmm1,<a3=%xmm5
paddd %xmm1,%xmm5

# 		a4 = diag3 <<< 0
# pshufd $0xe4,<diag3=int6464#4,>a4=int6464#5
# pshufd $0xe4,<diag3=%xmm3,>a4=%xmm4
pshufd $0xe4,%xmm3,%xmm4

# uint32323232							b3 += a3
# paddd <a3=int6464#6,<b3=int6464#8
# paddd <a3=%xmm5,<b3=%xmm7
paddd %xmm5,%xmm7

# uint32323232							a3 <<= 18
# pslld $18,<a3=int6464#6
# pslld $18,<a3=%xmm5
pslld $18,%xmm5

# 		b4 = 0
# pxor   >b4=int6464#7,>b4=int6464#7
# pxor   >b4=%xmm6,>b4=%xmm6
pxor   %xmm6,%xmm6

# uint32323232							b3 >>= 14
# psrld $14,<b3=int6464#8
# psrld $14,<b3=%xmm7
psrld $14,%xmm7

# 								diag0 ^= a3
# pxor  <a3=int6464#6,<diag0=int6464#1
# pxor  <a3=%xmm5,<diag0=%xmm0
pxor  %xmm5,%xmm0

# 						diag1 <<<= 96
# pshufd $0x39,<diag1=int6464#2,>diag1=int6464#2
# pshufd $0x39,<diag1=%xmm1,>diag1=%xmm1
pshufd $0x39,%xmm1,%xmm1

# 								diag0 ^= b3
# pxor  <b3=int6464#8,<diag0=int6464#1
# pxor  <b3=%xmm7,<diag0=%xmm0
pxor  %xmm7,%xmm0

# uint32323232	a4 += diag0
# paddd <diag0=int6464#1,<a4=int6464#5
# paddd <diag0=%xmm0,<a4=%xmm4
paddd %xmm0,%xmm4

# 				a5 = diag0 <<< 0
# pshufd $0xe4,<diag0=int6464#1,>a5=int6464#6
# pshufd $0xe4,<diag0=%xmm0,>a5=%xmm5
pshufd $0xe4,%xmm0,%xmm5

# uint32323232	b4 += a4
# paddd <a4=int6464#5,<b4=int6464#7
# paddd <a4=%xmm4,<b4=%xmm6
paddd %xmm4,%xmm6

# uint32323232	a4 <<= 7
# pslld $7,<a4=int6464#5
# pslld $7,<a4=%xmm4
pslld $7,%xmm4

# 				b5 = 0
# pxor   >b5=int6464#8,>b5=int6464#8
# pxor   >b5=%xmm7,>b5=%xmm7
pxor   %xmm7,%xmm7

# uint32323232	b4 >>= 25
# psrld $25,<b4=int6464#7
# psrld $25,<b4=%xmm6
psrld $25,%xmm6

#                 diag1 ^= a4
# pxor  <a4=int6464#5,<diag1=int6464#2
# pxor  <a4=%xmm4,<diag1=%xmm1
pxor  %xmm4,%xmm1

#                 diag1 ^= b4
# pxor  <b4=int6464#7,<diag1=int6464#2
# pxor  <b4=%xmm6,<diag1=%xmm1
pxor  %xmm6,%xmm1

# uint32323232			a5 += diag1
# paddd <diag1=int6464#2,<a5=int6464#6
# paddd <diag1=%xmm1,<a5=%xmm5
paddd %xmm1,%xmm5

# 						a6 = diag1 <<< 0
# pshufd $0xe4,<diag1=int6464#2,>a6=int6464#5
# pshufd $0xe4,<diag1=%xmm1,>a6=%xmm4
pshufd $0xe4,%xmm1,%xmm4

# uint32323232			b5 += a5
# paddd <a5=int6464#6,<b5=int6464#8
# paddd <a5=%xmm5,<b5=%xmm7
paddd %xmm5,%xmm7

# uint32323232			a5 <<= 9
# pslld $9,<a5=int6464#6
# pslld $9,<a5=%xmm5
pslld $9,%xmm5

# 						b6 = 0
# pxor   >b6=int6464#7,>b6=int6464#7
# pxor   >b6=%xmm6,>b6=%xmm6
pxor   %xmm6,%xmm6

# uint32323232			b5 >>= 23
# psrld $23,<b5=int6464#8
# psrld $23,<b5=%xmm7
psrld $23,%xmm7

# 				diag2 ^= a5
# pxor  <a5=int6464#6,<diag2=int6464#3
# pxor  <a5=%xmm5,<diag2=%xmm2
pxor  %xmm5,%xmm2

# 		diag1 <<<= 32
# pshufd $0x93,<diag1=int6464#2,>diag1=int6464#2
# pshufd $0x93,<diag1=%xmm1,>diag1=%xmm1
pshufd $0x93,%xmm1,%xmm1

# 				diag2 ^= b5
# pxor  <b5=int6464#8,<diag2=int6464#3
# pxor  <b5=%xmm7,<diag2=%xmm2
pxor  %xmm7,%xmm2

# uint32323232					a6 += diag2
# paddd <diag2=int6464#3,<a6=int6464#5
# paddd <diag2=%xmm2,<a6=%xmm4
paddd %xmm2,%xmm4

# 								a7 = diag2 <<< 0
# pshufd $0xe4,<diag2=int6464#3,>a7=int6464#6
# pshufd $0xe4,<diag2=%xmm2,>a7=%xmm5
pshufd $0xe4,%xmm2,%xmm5

# uint32323232					b6 += a6
# paddd <a6=int6464#5,<b6=int6464#7
# paddd <a6=%xmm4,<b6=%xmm6
paddd %xmm4,%xmm6

# uint32323232					a6 <<= 13
# pslld $13,<a6=int6464#5
# pslld $13,<a6=%xmm4
pslld $13,%xmm4

# 								b7 = 0
# pxor   >b7=int6464#8,>b7=int6464#8
# pxor   >b7=%xmm7,>b7=%xmm7
pxor   %xmm7,%xmm7

# uint32323232					b6 >>= 19
# psrld $19,<b6=int6464#7
# psrld $19,<b6=%xmm6
psrld $19,%xmm6

# 						diag3 ^= a6
# pxor  <a6=int6464#5,<diag3=int6464#4
# pxor  <a6=%xmm4,<diag3=%xmm3
pxor  %xmm4,%xmm3

# 				diag2 <<<= 64
# pshufd $0x4e,<diag2=int6464#3,>diag2=int6464#3
# pshufd $0x4e,<diag2=%xmm2,>diag2=%xmm2
pshufd $0x4e,%xmm2,%xmm2

# 						diag3 ^= b6
# pxor  <b6=int6464#7,<diag3=int6464#4
# pxor  <b6=%xmm6,<diag3=%xmm3
pxor  %xmm6,%xmm3

# uint32323232							a7 += diag3
# paddd <diag3=int6464#4,<a7=int6464#6
# paddd <diag3=%xmm3,<a7=%xmm5
paddd %xmm3,%xmm5

# 		a0 = diag1 <<< 0
# pshufd $0xe4,<diag1=int6464#2,>a0=int6464#5
# pshufd $0xe4,<diag1=%xmm1,>a0=%xmm4
pshufd $0xe4,%xmm1,%xmm4

# uint32323232							b7 += a7
# paddd <a7=int6464#6,<b7=int6464#8
# paddd <a7=%xmm5,<b7=%xmm7
paddd %xmm5,%xmm7

# uint32323232							a7 <<= 18
# pslld $18,<a7=int6464#6
# pslld $18,<a7=%xmm5
pslld $18,%xmm5

# 		b0 = 0
# pxor   >b0=int6464#7,>b0=int6464#7
# pxor   >b0=%xmm6,>b0=%xmm6
pxor   %xmm6,%xmm6

# uint32323232							b7 >>= 14
# psrld $14,<b7=int6464#8
# psrld $14,<b7=%xmm7
psrld $14,%xmm7

# 								diag0 ^= a7
# pxor  <a7=int6464#6,<diag0=int6464#1
# pxor  <a7=%xmm5,<diag0=%xmm0
pxor  %xmm5,%xmm0

# 						diag3 <<<= 96
# pshufd $0x39,<diag3=int6464#4,>diag3=int6464#4
# pshufd $0x39,<diag3=%xmm3,>diag3=%xmm3
pshufd $0x39,%xmm3,%xmm3

# 								diag0 ^= b7
# pxor  <b7=int6464#8,<diag0=int6464#1
# pxor  <b7=%xmm7,<diag0=%xmm0
pxor  %xmm7,%xmm0

# uint32323232	a0 += diag0
# paddd <diag0=int6464#1,<a0=int6464#5
# paddd <diag0=%xmm0,<a0=%xmm4
paddd %xmm0,%xmm4

# 				a1 = diag0 <<< 0
# pshufd $0xe4,<diag0=int6464#1,>a1=int6464#6
# pshufd $0xe4,<diag0=%xmm0,>a1=%xmm5
pshufd $0xe4,%xmm0,%xmm5

# uint32323232	b0 += a0
# paddd <a0=int6464#5,<b0=int6464#7
# paddd <a0=%xmm4,<b0=%xmm6
paddd %xmm4,%xmm6

# uint32323232	a0 <<= 7
# pslld $7,<a0=int6464#5
# pslld $7,<a0=%xmm4
pslld $7,%xmm4

# 				b1 = 0
# pxor   >b1=int6464#8,>b1=int6464#8
# pxor   >b1=%xmm7,>b1=%xmm7
pxor   %xmm7,%xmm7

# uint32323232	b0 >>= 25
# psrld $25,<b0=int6464#7
# psrld $25,<b0=%xmm6
psrld $25,%xmm6

#                 diag3 ^= a0
# pxor  <a0=int6464#5,<diag3=int6464#4
# pxor  <a0=%xmm4,<diag3=%xmm3
pxor  %xmm4,%xmm3

#                 diag3 ^= b0
# pxor  <b0=int6464#7,<diag3=int6464#4
# pxor  <b0=%xmm6,<diag3=%xmm3
pxor  %xmm6,%xmm3

# uint32323232			a1 += diag3
# paddd <diag3=int6464#4,<a1=int6464#6
# paddd <diag3=%xmm3,<a1=%xmm5
paddd %xmm3,%xmm5

# 						a2 = diag3 <<< 0
# pshufd $0xe4,<diag3=int6464#4,>a2=int6464#5
# pshufd $0xe4,<diag3=%xmm3,>a2=%xmm4
pshufd $0xe4,%xmm3,%xmm4

# uint32323232			b1 += a1
# paddd <a1=int6464#6,<b1=int6464#8
# paddd <a1=%xmm5,<b1=%xmm7
paddd %xmm5,%xmm7

# uint32323232			a1 <<= 9
# pslld $9,<a1=int6464#6
# pslld $9,<a1=%xmm5
pslld $9,%xmm5

# 						b2 = 0
# pxor   >b2=int6464#7,>b2=int6464#7
# pxor   >b2=%xmm6,>b2=%xmm6
pxor   %xmm6,%xmm6

# uint32323232			b1 >>= 23
# psrld $23,<b1=int6464#8
# psrld $23,<b1=%xmm7
psrld $23,%xmm7

# 				diag2 ^= a1
# pxor  <a1=int6464#6,<diag2=int6464#3
# pxor  <a1=%xmm5,<diag2=%xmm2
pxor  %xmm5,%xmm2

# 		diag3 <<<= 32
# pshufd $0x93,<diag3=int6464#4,>diag3=int6464#4
# pshufd $0x93,<diag3=%xmm3,>diag3=%xmm3
pshufd $0x93,%xmm3,%xmm3

# 				diag2 ^= b1
# pxor  <b1=int6464#8,<diag2=int6464#3
# pxor  <b1=%xmm7,<diag2=%xmm2
pxor  %xmm7,%xmm2

# uint32323232					a2 += diag2
# paddd <diag2=int6464#3,<a2=int6464#5
# paddd <diag2=%xmm2,<a2=%xmm4
paddd %xmm2,%xmm4

# 								a3 = diag2 <<< 0
# pshufd $0xe4,<diag2=int6464#3,>a3=int6464#6
# pshufd $0xe4,<diag2=%xmm2,>a3=%xmm5
pshufd $0xe4,%xmm2,%xmm5

# uint32323232					b2 += a2
# paddd <a2=int6464#5,<b2=int6464#7
# paddd <a2=%xmm4,<b2=%xmm6
paddd %xmm4,%xmm6

# uint32323232					a2 <<= 13
# pslld $13,<a2=int6464#5
# pslld $13,<a2=%xmm4
pslld $13,%xmm4

# 								b3 = 0
# pxor   >b3=int6464#8,>b3=int6464#8
# pxor   >b3=%xmm7,>b3=%xmm7
pxor   %xmm7,%xmm7

# uint32323232					b2 >>= 19
# psrld $19,<b2=int6464#7
# psrld $19,<b2=%xmm6
psrld $19,%xmm6

# 						diag1 ^= a2
# pxor  <a2=int6464#5,<diag1=int6464#2
# pxor  <a2=%xmm4,<diag1=%xmm1
pxor  %xmm4,%xmm1

# 				diag2 <<<= 64
# pshufd $0x4e,<diag2=int6464#3,>diag2=int6464#3
# pshufd $0x4e,<diag2=%xmm2,>diag2=%xmm2
pshufd $0x4e,%xmm2,%xmm2

# 						diag1 ^= b2
# pxor  <b2=int6464#7,<diag1=int6464#2
# pxor  <b2=%xmm6,<diag1=%xmm1
pxor  %xmm6,%xmm1

# uint32323232							a3 += diag1
# paddd <diag1=int6464#2,<a3=int6464#6
# paddd <diag1=%xmm1,<a3=%xmm5
paddd %xmm1,%xmm5

# 		a4 = diag3 <<< 0
# pshufd $0xe4,<diag3=int6464#4,>a4=int6464#5
# pshufd $0xe4,<diag3=%xmm3,>a4=%xmm4
pshufd $0xe4,%xmm3,%xmm4

# uint32323232							b3 += a3
# paddd <a3=int6464#6,<b3=int6464#8
# paddd <a3=%xmm5,<b3=%xmm7
paddd %xmm5,%xmm7

# uint32323232							a3 <<= 18
# pslld $18,<a3=int6464#6
# pslld $18,<a3=%xmm5
pslld $18,%xmm5

# 		b4 = 0
# pxor   >b4=int6464#7,>b4=int6464#7
# pxor   >b4=%xmm6,>b4=%xmm6
pxor   %xmm6,%xmm6

# uint32323232							b3 >>= 14
# psrld $14,<b3=int6464#8
# psrld $14,<b3=%xmm7
psrld $14,%xmm7

# 								diag0 ^= a3
# pxor  <a3=int6464#6,<diag0=int6464#1
# pxor  <a3=%xmm5,<diag0=%xmm0
pxor  %xmm5,%xmm0

# 						diag1 <<<= 96
# pshufd $0x39,<diag1=int6464#2,>diag1=int6464#2
# pshufd $0x39,<diag1=%xmm1,>diag1=%xmm1
pshufd $0x39,%xmm1,%xmm1

# 								diag0 ^= b3
# pxor  <b3=int6464#8,<diag0=int6464#1
# pxor  <b3=%xmm7,<diag0=%xmm0
pxor  %xmm7,%xmm0

# uint32323232	a4 += diag0
# paddd <diag0=int6464#1,<a4=int6464#5
# paddd <diag0=%xmm0,<a4=%xmm4
paddd %xmm0,%xmm4

# 				a5 = diag0 <<< 0
# pshufd $0xe4,<diag0=int6464#1,>a5=int6464#6
# pshufd $0xe4,<diag0=%xmm0,>a5=%xmm5
pshufd $0xe4,%xmm0,%xmm5

# uint32323232	b4 += a4
# paddd <a4=int6464#5,<b4=int6464#7
# paddd <a4=%xmm4,<b4=%xmm6
paddd %xmm4,%xmm6

# uint32323232	a4 <<= 7
# pslld $7,<a4=int6464#5
# pslld $7,<a4=%xmm4
pslld $7,%xmm4

# 				b5 = 0
# pxor   >b5=int6464#8,>b5=int6464#8
# pxor   >b5=%xmm7,>b5=%xmm7
pxor   %xmm7,%xmm7

# uint32323232	b4 >>= 25
# psrld $25,<b4=int6464#7
# psrld $25,<b4=%xmm6
psrld $25,%xmm6

#                 diag1 ^= a4
# pxor  <a4=int6464#5,<diag1=int6464#2
# pxor  <a4=%xmm4,<diag1=%xmm1
pxor  %xmm4,%xmm1

#                 diag1 ^= b4
# pxor  <b4=int6464#7,<diag1=int6464#2
# pxor  <b4=%xmm6,<diag1=%xmm1
pxor  %xmm6,%xmm1

# uint32323232			a5 += diag1
# paddd <diag1=int6464#2,<a5=int6464#6
# paddd <diag1=%xmm1,<a5=%xmm5
paddd %xmm1,%xmm5

# 						a6 = diag1 <<< 0
# pshufd $0xe4,<diag1=int6464#2,>a6=int6464#5
# pshufd $0xe4,<diag1=%xmm1,>a6=%xmm4
pshufd $0xe4,%xmm1,%xmm4

# uint32323232			b5 += a5
# paddd <a5=int6464#6,<b5=int6464#8
# paddd <a5=%xmm5,<b5=%xmm7
paddd %xmm5,%xmm7

# uint32323232			a5 <<= 9
# pslld $9,<a5=int6464#6
# pslld $9,<a5=%xmm5
pslld $9,%xmm5

# 						b6 = 0
# pxor   >b6=int6464#7,>b6=int6464#7
# pxor   >b6=%xmm6,>b6=%xmm6
pxor   %xmm6,%xmm6

# uint32323232			b5 >>= 23
# psrld $23,<b5=int6464#8
# psrld $23,<b5=%xmm7
psrld $23,%xmm7

# 				diag2 ^= a5
# pxor  <a5=int6464#6,<diag2=int6464#3
# pxor  <a5=%xmm5,<diag2=%xmm2
pxor  %xmm5,%xmm2

# 		diag1 <<<= 32
# pshufd $0x93,<diag1=int6464#2,>diag1=int6464#2
# pshufd $0x93,<diag1=%xmm1,>diag1=%xmm1
pshufd $0x93,%xmm1,%xmm1

# 				diag2 ^= b5
# pxor  <b5=int6464#8,<diag2=int6464#3
# pxor  <b5=%xmm7,<diag2=%xmm2
pxor  %xmm7,%xmm2

# uint32323232					a6 += diag2
# paddd <diag2=int6464#3,<a6=int6464#5
# paddd <diag2=%xmm2,<a6=%xmm4
paddd %xmm2,%xmm4

# 								a7 = diag2 <<< 0
# pshufd $0xe4,<diag2=int6464#3,>a7=int6464#6
# pshufd $0xe4,<diag2=%xmm2,>a7=%xmm5
pshufd $0xe4,%xmm2,%xmm5

# uint32323232					b6 += a6
# paddd <a6=int6464#5,<b6=int6464#7
# paddd <a6=%xmm4,<b6=%xmm6
paddd %xmm4,%xmm6

# uint32323232					a6 <<= 13
# pslld $13,<a6=int6464#5
# pslld $13,<a6=%xmm4
pslld $13,%xmm4

# 								b7 = 0
# pxor   >b7=int6464#8,>b7=int6464#8
# pxor   >b7=%xmm7,>b7=%xmm7
pxor   %xmm7,%xmm7

# uint32323232					b6 >>= 19
# psrld $19,<b6=int6464#7
# psrld $19,<b6=%xmm6
psrld $19,%xmm6

# 						diag3 ^= a6
# pxor  <a6=int6464#5,<diag3=int6464#4
# pxor  <a6=%xmm4,<diag3=%xmm3
pxor  %xmm4,%xmm3

# 				diag2 <<<= 64
# pshufd $0x4e,<diag2=int6464#3,>diag2=int6464#3
# pshufd $0x4e,<diag2=%xmm2,>diag2=%xmm2
pshufd $0x4e,%xmm2,%xmm2

# 						diag3 ^= b6
# pxor  <b6=int6464#7,<diag3=int6464#4
# pxor  <b6=%xmm6,<diag3=%xmm3
pxor  %xmm6,%xmm3

# 		i -= 4
# sub  $4,<i=int32#1
# sub  $4,<i=%eax
sub  $4,%eax

# uint32323232							a7 += diag3
# paddd <diag3=int6464#4,<a7=int6464#6
# paddd <diag3=%xmm3,<a7=%xmm5
paddd %xmm3,%xmm5

# 		a0 = diag1 <<< 0
# pshufd $0xe4,<diag1=int6464#2,>a0=int6464#5
# pshufd $0xe4,<diag1=%xmm1,>a0=%xmm4
pshufd $0xe4,%xmm1,%xmm4

# uint32323232							b7 += a7
# paddd <a7=int6464#6,<b7=int6464#8
# paddd <a7=%xmm5,<b7=%xmm7
paddd %xmm5,%xmm7

# uint32323232							a7 <<= 18
# pslld $18,<a7=int6464#6
# pslld $18,<a7=%xmm5
pslld $18,%xmm5

# 		b0 = 0
# pxor   >b0=int6464#7,>b0=int6464#7
# pxor   >b0=%xmm6,>b0=%xmm6
pxor   %xmm6,%xmm6

# uint32323232							b7 >>= 14
# psrld $14,<b7=int6464#8
# psrld $14,<b7=%xmm7
psrld $14,%xmm7

# 								diag0 ^= a7
# pxor  <a7=int6464#6,<diag0=int6464#1
# pxor  <a7=%xmm5,<diag0=%xmm0
pxor  %xmm5,%xmm0

# 						diag3 <<<= 96
# pshufd $0x39,<diag3=int6464#4,>diag3=int6464#4
# pshufd $0x39,<diag3=%xmm3,>diag3=%xmm3
pshufd $0x39,%xmm3,%xmm3

# 								diag0 ^= b7
# pxor  <b7=int6464#8,<diag0=int6464#1
# pxor  <b7=%xmm7,<diag0=%xmm0
pxor  %xmm7,%xmm0

# goto mainloop if unsigned>
ja ._mainloop

# uint32323232 diag0 += *(int128 *) (x + 0)
# paddd 0(<x=int32#3),<diag0=int6464#1
# paddd 0(<x=%edx),<diag0=%xmm0
paddd 0(%edx),%xmm0

# uint32323232 diag1 += *(int128 *) (x + 16)
# paddd 16(<x=int32#3),<diag1=int6464#2
# paddd 16(<x=%edx),<diag1=%xmm1
paddd 16(%edx),%xmm1

# uint32323232 diag2 += *(int128 *) (x + 32)
# paddd 32(<x=int32#3),<diag2=int6464#3
# paddd 32(<x=%edx),<diag2=%xmm2
paddd 32(%edx),%xmm2

# uint32323232 diag3 += *(int128 *) (x + 48)
# paddd 48(<x=int32#3),<diag3=int6464#4
# paddd 48(<x=%edx),<diag3=%xmm3
paddd 48(%edx),%xmm3

# in0 = diag0
# movd   <diag0=int6464#1,>in0=int32#1
# movd   <diag0=%xmm0,>in0=%eax
movd   %xmm0,%eax

# in12 = diag1
# movd   <diag1=int6464#2,>in12=int32#7
# movd   <diag1=%xmm1,>in12=%ebp
movd   %xmm1,%ebp

# in8 = diag2
# movd   <diag2=int6464#3,>in8=int32#4
# movd   <diag2=%xmm2,>in8=%ebx
movd   %xmm2,%ebx

# in4 = diag3
# movd   <diag3=int6464#4,>in4=int32#2
# movd   <diag3=%xmm3,>in4=%ecx
movd   %xmm3,%ecx

# diag0 <<<= 96
# pshufd $0x39,<diag0=int6464#1,>diag0=int6464#1
# pshufd $0x39,<diag0=%xmm0,>diag0=%xmm0
pshufd $0x39,%xmm0,%xmm0

# diag1 <<<= 96
# pshufd $0x39,<diag1=int6464#2,>diag1=int6464#2
# pshufd $0x39,<diag1=%xmm1,>diag1=%xmm1
pshufd $0x39,%xmm1,%xmm1

# diag2 <<<= 96
# pshufd $0x39,<diag2=int6464#3,>diag2=int6464#3
# pshufd $0x39,<diag2=%xmm2,>diag2=%xmm2
pshufd $0x39,%xmm2,%xmm2

# diag3 <<<= 96
# pshufd $0x39,<diag3=int6464#4,>diag3=int6464#4
# pshufd $0x39,<diag3=%xmm3,>diag3=%xmm3
pshufd $0x39,%xmm3,%xmm3

# in0 ^= *(uint32 *) (m + 0)
# xorl 0(<m=int32#5),<in0=int32#1
# xorl 0(<m=%esi),<in0=%eax
xorl 0(%esi),%eax

# in12 ^= *(uint32 *) (m + 48)
# xorl 48(<m=int32#5),<in12=int32#7
# xorl 48(<m=%esi),<in12=%ebp
xorl 48(%esi),%ebp

# in8 ^= *(uint32 *) (m + 32)
# xorl 32(<m=int32#5),<in8=int32#4
# xorl 32(<m=%esi),<in8=%ebx
xorl 32(%esi),%ebx

# in4 ^= *(uint32 *) (m + 16)
# xorl 16(<m=int32#5),<in4=int32#2
# xorl 16(<m=%esi),<in4=%ecx
xorl 16(%esi),%ecx

# *(uint32 *) (out + 0) = in0
# movl <in0=int32#1,0(<out=int32#6)
# movl <in0=%eax,0(<out=%edi)
movl %eax,0(%edi)

# *(uint32 *) (out + 48) = in12
# movl <in12=int32#7,48(<out=int32#6)
# movl <in12=%ebp,48(<out=%edi)
movl %ebp,48(%edi)

# *(uint32 *) (out + 32) = in8
# movl <in8=int32#4,32(<out=int32#6)
# movl <in8=%ebx,32(<out=%edi)
movl %ebx,32(%edi)

# *(uint32 *) (out + 16) = in4
# movl <in4=int32#2,16(<out=int32#6)
# movl <in4=%ecx,16(<out=%edi)
movl %ecx,16(%edi)

# in5 = diag0
# movd   <diag0=int6464#1,>in5=int32#2
# movd   <diag0=%xmm0,>in5=%ecx
movd   %xmm0,%ecx

# in1 = diag1
# movd   <diag1=int6464#2,>in1=int32#1
# movd   <diag1=%xmm1,>in1=%eax
movd   %xmm1,%eax

# in13 = diag2
# movd   <diag2=int6464#3,>in13=int32#7
# movd   <diag2=%xmm2,>in13=%ebp
movd   %xmm2,%ebp

# in9 = diag3
# movd   <diag3=int6464#4,>in9=int32#4
# movd   <diag3=%xmm3,>in9=%ebx
movd   %xmm3,%ebx

# diag0 <<<= 96
# pshufd $0x39,<diag0=int6464#1,>diag0=int6464#1
# pshufd $0x39,<diag0=%xmm0,>diag0=%xmm0
pshufd $0x39,%xmm0,%xmm0

# diag1 <<<= 96
# pshufd $0x39,<diag1=int6464#2,>diag1=int6464#2
# pshufd $0x39,<diag1=%xmm1,>diag1=%xmm1
pshufd $0x39,%xmm1,%xmm1

# diag2 <<<= 96
# pshufd $0x39,<diag2=int6464#3,>diag2=int6464#3
# pshufd $0x39,<diag2=%xmm2,>diag2=%xmm2
pshufd $0x39,%xmm2,%xmm2

# diag3 <<<= 96
# pshufd $0x39,<diag3=int6464#4,>diag3=int6464#4
# pshufd $0x39,<diag3=%xmm3,>diag3=%xmm3
pshufd $0x39,%xmm3,%xmm3

# in5 ^= *(uint32 *) (m + 20)
# xorl 20(<m=int32#5),<in5=int32#2
# xorl 20(<m=%esi),<in5=%ecx
xorl 20(%esi),%ecx

# in1 ^= *(uint32 *) (m + 4)
# xorl 4(<m=int32#5),<in1=int32#1
# xorl 4(<m=%esi),<in1=%eax
xorl 4(%esi),%eax

# in13 ^= *(uint32 *) (m + 52)
# xorl 52(<m=int32#5),<in13=int32#7
# xorl 52(<m=%esi),<in13=%ebp
xorl 52(%esi),%ebp

# in9 ^= *(uint32 *) (m + 36)
# xorl 36(<m=int32#5),<in9=int32#4
# xorl 36(<m=%esi),<in9=%ebx
xorl 36(%esi),%ebx

# *(uint32 *) (out + 20) = in5
# movl <in5=int32#2,20(<out=int32#6)
# movl <in5=%ecx,20(<out=%edi)
movl %ecx,20(%edi)

# *(uint32 *) (out + 4) = in1
# movl <in1=int32#1,4(<out=int32#6)
# movl <in1=%eax,4(<out=%edi)
movl %eax,4(%edi)

# *(uint32 *) (out + 52) = in13
# movl <in13=int32#7,52(<out=int32#6)
# movl <in13=%ebp,52(<out=%edi)
movl %ebp,52(%edi)

# *(uint32 *) (out + 36) = in9
# movl <in9=int32#4,36(<out=int32#6)
# movl <in9=%ebx,36(<out=%edi)
movl %ebx,36(%edi)

# in10 = diag0
# movd   <diag0=int6464#1,>in10=int32#4
# movd   <diag0=%xmm0,>in10=%ebx
movd   %xmm0,%ebx

# in6 = diag1
# movd   <diag1=int6464#2,>in6=int32#2
# movd   <diag1=%xmm1,>in6=%ecx
movd   %xmm1,%ecx

# in2 = diag2
# movd   <diag2=int6464#3,>in2=int32#1
# movd   <diag2=%xmm2,>in2=%eax
movd   %xmm2,%eax

# in14 = diag3
# movd   <diag3=int6464#4,>in14=int32#7
# movd   <diag3=%xmm3,>in14=%ebp
movd   %xmm3,%ebp

# diag0 <<<= 96
# pshufd $0x39,<diag0=int6464#1,>diag0=int6464#1
# pshufd $0x39,<diag0=%xmm0,>diag0=%xmm0
pshufd $0x39,%xmm0,%xmm0

# diag1 <<<= 96
# pshufd $0x39,<diag1=int6464#2,>diag1=int6464#2
# pshufd $0x39,<diag1=%xmm1,>diag1=%xmm1
pshufd $0x39,%xmm1,%xmm1

# diag2 <<<= 96
# pshufd $0x39,<diag2=int6464#3,>diag2=int6464#3
# pshufd $0x39,<diag2=%xmm2,>diag2=%xmm2
pshufd $0x39,%xmm2,%xmm2

# diag3 <<<= 96
# pshufd $0x39,<diag3=int6464#4,>diag3=int6464#4
# pshufd $0x39,<diag3=%xmm3,>diag3=%xmm3
pshufd $0x39,%xmm3,%xmm3

# in10 ^= *(uint32 *) (m + 40)
# xorl 40(<m=int32#5),<in10=int32#4
# xorl 40(<m=%esi),<in10=%ebx
xorl 40(%esi),%ebx

# in6 ^= *(uint32 *) (m + 24)
# xorl 24(<m=int32#5),<in6=int32#2
# xorl 24(<m=%esi),<in6=%ecx
xorl 24(%esi),%ecx

# in2 ^= *(uint32 *) (m + 8)
# xorl 8(<m=int32#5),<in2=int32#1
# xorl 8(<m=%esi),<in2=%eax
xorl 8(%esi),%eax

# in14 ^= *(uint32 *) (m + 56)
# xorl 56(<m=int32#5),<in14=int32#7
# xorl 56(<m=%esi),<in14=%ebp
xorl 56(%esi),%ebp

# *(uint32 *) (out + 40) = in10
# movl <in10=int32#4,40(<out=int32#6)
# movl <in10=%ebx,40(<out=%edi)
movl %ebx,40(%edi)

# *(uint32 *) (out + 24) = in6
# movl <in6=int32#2,24(<out=int32#6)
# movl <in6=%ecx,24(<out=%edi)
movl %ecx,24(%edi)

# *(uint32 *) (out + 8) = in2
# movl <in2=int32#1,8(<out=int32#6)
# movl <in2=%eax,8(<out=%edi)
movl %eax,8(%edi)

# *(uint32 *) (out + 56) = in14
# movl <in14=int32#7,56(<out=int32#6)
# movl <in14=%ebp,56(<out=%edi)
movl %ebp,56(%edi)

# in15 = diag0
# movd   <diag0=int6464#1,>in15=int32#7
# movd   <diag0=%xmm0,>in15=%ebp
movd   %xmm0,%ebp

# in11 = diag1
# movd   <diag1=int6464#2,>in11=int32#4
# movd   <diag1=%xmm1,>in11=%ebx
movd   %xmm1,%ebx

# in7 = diag2
# movd   <diag2=int6464#3,>in7=int32#2
# movd   <diag2=%xmm2,>in7=%ecx
movd   %xmm2,%ecx

# in3 = diag3
# movd   <diag3=int6464#4,>in3=int32#1
# movd   <diag3=%xmm3,>in3=%eax
movd   %xmm3,%eax

# in15 ^= *(uint32 *) (m + 60)
# xorl 60(<m=int32#5),<in15=int32#7
# xorl 60(<m=%esi),<in15=%ebp
xorl 60(%esi),%ebp

# in11 ^= *(uint32 *) (m + 44)
# xorl 44(<m=int32#5),<in11=int32#4
# xorl 44(<m=%esi),<in11=%ebx
xorl 44(%esi),%ebx

# in7 ^= *(uint32 *) (m + 28)
# xorl 28(<m=int32#5),<in7=int32#2
# xorl 28(<m=%esi),<in7=%ecx
xorl 28(%esi),%ecx

# in3 ^= *(uint32 *) (m + 12)
# xorl 12(<m=int32#5),<in3=int32#1
# xorl 12(<m=%esi),<in3=%eax
xorl 12(%esi),%eax

# *(uint32 *) (out + 60) = in15
# movl <in15=int32#7,60(<out=int32#6)
# movl <in15=%ebp,60(<out=%edi)
movl %ebp,60(%edi)

# *(uint32 *) (out + 44) = in11
# movl <in11=int32#4,44(<out=int32#6)
# movl <in11=%ebx,44(<out=%edi)
movl %ebx,44(%edi)

# *(uint32 *) (out + 28) = in7
# movl <in7=int32#2,28(<out=int32#6)
# movl <in7=%ecx,28(<out=%edi)
movl %ecx,28(%edi)

# *(uint32 *) (out + 12) = in3
# movl <in3=int32#1,12(<out=int32#6)
# movl <in3=%eax,12(<out=%edi)
movl %eax,12(%edi)

#   bytes = bytes_backup
# movl <bytes_backup=stack32#1,>bytes=int32#4
# movl <bytes_backup=64(%esp),>bytes=%ebx
movl 64(%esp),%ebx

#   in8 = *(uint32 *) (x + 32)
# movl 32(<x=int32#3),>in8=int32#1
# movl 32(<x=%edx),>in8=%eax
movl 32(%edx),%eax

#   in9 = *(uint32 *) (x + 52)
# movl 52(<x=int32#3),>in9=int32#2
# movl 52(<x=%edx),>in9=%ecx
movl 52(%edx),%ecx

#   in8 += 1
# add  $1,<in8=int32#1
# add  $1,<in8=%eax
add  $1,%eax

#   in9 += 0 + carry
# adc  $0,<in9=int32#2
# adc  $0,<in9=%ecx
adc  $0,%ecx

#   *(uint32 *) (x + 32) = in8
# movl <in8=int32#1,32(<x=int32#3)
# movl <in8=%eax,32(<x=%edx)
movl %eax,32(%edx)

#   *(uint32 *) (x + 52) = in9
# movl <in9=int32#2,52(<x=int32#3)
# movl <in9=%ecx,52(<x=%edx)
movl %ecx,52(%edx)

#   bytes - 64
# cmp  $64,<bytes=int32#4
# cmp  $64,<bytes=%ebx
cmp  $64,%ebx

#   goto bytesatleast65 if unsigned>
ja ._bytesatleast65

#     goto bytesatleast64 if unsigned>=
jae ._bytesatleast64

#       m = out
# mov  <out=int32#6,>m=int32#5
# mov  <out=%edi,>m=%esi
mov  %edi,%esi

#       out = ctarget
# movl <ctarget=stack32#7,>out=int32#6
# movl <ctarget=88(%esp),>out=%edi
movl 88(%esp),%edi

#       i = bytes
# mov  <bytes=int32#4,>i=int32#2
# mov  <bytes=%ebx,>i=%ecx
mov  %ebx,%ecx

#       while (i) { *out++ = *m++; --i }
rep movsb

#     bytesatleast64#
._bytesatleast64:

#     done#
._done:

#     eax = eax_stack
# movl <eax_stack=stack32#2,>eax=int32#1
# movl <eax_stack=68(%esp),>eax=%eax
movl 68(%esp),%eax

#     ebx = ebx_stack
# movl <ebx_stack=stack32#3,>ebx=int32#4
# movl <ebx_stack=72(%esp),>ebx=%ebx
movl 72(%esp),%ebx

#     esi = esi_stack
# movl <esi_stack=stack32#4,>esi=int32#5
# movl <esi_stack=76(%esp),>esi=%esi
movl 76(%esp),%esi

#     edi = edi_stack
# movl <edi_stack=stack32#5,>edi=int32#6
# movl <edi_stack=80(%esp),>edi=%edi
movl 80(%esp),%edi

#     ebp = ebp_stack
# movl <ebp_stack=stack32#6,>ebp=int32#7
# movl <ebp_stack=84(%esp),>ebp=%ebp
movl 84(%esp),%ebp

#     leave
add %eax,%esp
ret

#   bytesatleast65#
._bytesatleast65:

#   bytes -= 64
# sub  $64,<bytes=int32#4
# sub  $64,<bytes=%ebx
sub  $64,%ebx

#   out += 64
# add  $64,<out=int32#6
# add  $64,<out=%edi
add  $64,%edi

#   m += 64
# add  $64,<m=int32#5
# add  $64,<m=%esi
add  $64,%esi

# goto bytesatleast1
jmp ._bytesatleast1

# enter ECRYPT_init
.text
.p2align 5
.globl _ECRYPT_init
.globl ECRYPT_init
_ECRYPT_init:
ECRYPT_init:
mov %esp,%eax
and $31,%eax
add $96,%eax
sub %eax,%esp

# leave
add %eax,%esp
ret

# enter ECRYPT_keysetup
.text
.p2align 5
.globl _ECRYPT_keysetup
.globl ECRYPT_keysetup
_ECRYPT_keysetup:
ECRYPT_keysetup:
mov %esp,%eax
and $31,%eax
add $96,%eax
sub %eax,%esp

#   eax_stack = eax
# movl <eax=int32#1,>eax_stack=stack32#1
# movl <eax=%eax,>eax_stack=64(%esp)
movl %eax,64(%esp)

#   ebx_stack = ebx
# movl <ebx=int32#4,>ebx_stack=stack32#2
# movl <ebx=%ebx,>ebx_stack=68(%esp)
movl %ebx,68(%esp)

#   esi_stack = esi
# movl <esi=int32#5,>esi_stack=stack32#3
# movl <esi=%esi,>esi_stack=72(%esp)
movl %esi,72(%esp)

#   edi_stack = edi
# movl <edi=int32#6,>edi_stack=stack32#4
# movl <edi=%edi,>edi_stack=76(%esp)
movl %edi,76(%esp)

#   ebp_stack = ebp
# movl <ebp=int32#7,>ebp_stack=stack32#5
# movl <ebp=%ebp,>ebp_stack=80(%esp)
movl %ebp,80(%esp)

#   k = arg2
# movl <arg2=stack32#-2,>k=int32#2
# movl <arg2=8(%esp,%eax),>k=%ecx
movl 8(%esp,%eax),%ecx

#   kbits = arg3
# movl <arg3=stack32#-3,>kbits=int32#3
# movl <arg3=12(%esp,%eax),>kbits=%edx
movl 12(%esp,%eax),%edx

#   x = arg1
# movl <arg1=stack32#-1,>x=int32#1
# movl <arg1=4(%esp,%eax),>x=%eax
movl 4(%esp,%eax),%eax

#   in1 = *(uint32 *) (k + 0)
# movl 0(<k=int32#2),>in1=int32#4
# movl 0(<k=%ecx),>in1=%ebx
movl 0(%ecx),%ebx

#   in2 = *(uint32 *) (k + 4)
# movl 4(<k=int32#2),>in2=int32#5
# movl 4(<k=%ecx),>in2=%esi
movl 4(%ecx),%esi

#   in3 = *(uint32 *) (k + 8)
# movl 8(<k=int32#2),>in3=int32#6
# movl 8(<k=%ecx),>in3=%edi
movl 8(%ecx),%edi

#   in4 = *(uint32 *) (k + 12)
# movl 12(<k=int32#2),>in4=int32#7
# movl 12(<k=%ecx),>in4=%ebp
movl 12(%ecx),%ebp

#   *(uint32 *) (x + 20) = in1
# movl <in1=int32#4,20(<x=int32#1)
# movl <in1=%ebx,20(<x=%eax)
movl %ebx,20(%eax)

#   *(uint32 *) (x + 40) = in2
# movl <in2=int32#5,40(<x=int32#1)
# movl <in2=%esi,40(<x=%eax)
movl %esi,40(%eax)

#   *(uint32 *) (x + 60) = in3
# movl <in3=int32#6,60(<x=int32#1)
# movl <in3=%edi,60(<x=%eax)
movl %edi,60(%eax)

#   *(uint32 *) (x + 48) = in4
# movl <in4=int32#7,48(<x=int32#1)
# movl <in4=%ebp,48(<x=%eax)
movl %ebp,48(%eax)

#   kbits - 256
# cmp  $256,<kbits=int32#3
# cmp  $256,<kbits=%edx
cmp  $256,%edx

#   goto kbits128 if unsigned<
jb ._kbits128

#   kbits256#
._kbits256:

#     in11 = *(uint32 *) (k + 16)
# movl 16(<k=int32#2),>in11=int32#3
# movl 16(<k=%ecx),>in11=%edx
movl 16(%ecx),%edx

#     in12 = *(uint32 *) (k + 20)
# movl 20(<k=int32#2),>in12=int32#4
# movl 20(<k=%ecx),>in12=%ebx
movl 20(%ecx),%ebx

#     in13 = *(uint32 *) (k + 24)
# movl 24(<k=int32#2),>in13=int32#5
# movl 24(<k=%ecx),>in13=%esi
movl 24(%ecx),%esi

#     in14 = *(uint32 *) (k + 28)
# movl 28(<k=int32#2),>in14=int32#2
# movl 28(<k=%ecx),>in14=%ecx
movl 28(%ecx),%ecx

#     *(uint32 *) (x + 28) = in11
# movl <in11=int32#3,28(<x=int32#1)
# movl <in11=%edx,28(<x=%eax)
movl %edx,28(%eax)

#     *(uint32 *) (x + 16) = in12
# movl <in12=int32#4,16(<x=int32#1)
# movl <in12=%ebx,16(<x=%eax)
movl %ebx,16(%eax)

#     *(uint32 *) (x + 36) = in13
# movl <in13=int32#5,36(<x=int32#1)
# movl <in13=%esi,36(<x=%eax)
movl %esi,36(%eax)

#     *(uint32 *) (x + 56) = in14
# movl <in14=int32#2,56(<x=int32#1)
# movl <in14=%ecx,56(<x=%eax)
movl %ecx,56(%eax)

#     in0 = 1634760805
# mov  $1634760805,>in0=int32#2
# mov  $1634760805,>in0=%ecx
mov  $1634760805,%ecx

#     in5 = 857760878
# mov  $857760878,>in5=int32#3
# mov  $857760878,>in5=%edx
mov  $857760878,%edx

#     in10 = 2036477234
# mov  $2036477234,>in10=int32#4
# mov  $2036477234,>in10=%ebx
mov  $2036477234,%ebx

#     in15 = 1797285236
# mov  $1797285236,>in15=int32#5
# mov  $1797285236,>in15=%esi
mov  $1797285236,%esi

#     *(uint32 *) (x + 0) = in0
# movl <in0=int32#2,0(<x=int32#1)
# movl <in0=%ecx,0(<x=%eax)
movl %ecx,0(%eax)

#     *(uint32 *) (x + 4) = in5
# movl <in5=int32#3,4(<x=int32#1)
# movl <in5=%edx,4(<x=%eax)
movl %edx,4(%eax)

#     *(uint32 *) (x + 8) = in10
# movl <in10=int32#4,8(<x=int32#1)
# movl <in10=%ebx,8(<x=%eax)
movl %ebx,8(%eax)

#     *(uint32 *) (x + 12) = in15
# movl <in15=int32#5,12(<x=int32#1)
# movl <in15=%esi,12(<x=%eax)
movl %esi,12(%eax)

#   goto keysetupdone
jmp ._keysetupdone

#   kbits128#
._kbits128:

#     in11 = *(uint32 *) (k + 0)
# movl 0(<k=int32#2),>in11=int32#3
# movl 0(<k=%ecx),>in11=%edx
movl 0(%ecx),%edx

#     in12 = *(uint32 *) (k + 4)
# movl 4(<k=int32#2),>in12=int32#4
# movl 4(<k=%ecx),>in12=%ebx
movl 4(%ecx),%ebx

#     in13 = *(uint32 *) (k + 8)
# movl 8(<k=int32#2),>in13=int32#5
# movl 8(<k=%ecx),>in13=%esi
movl 8(%ecx),%esi

#     in14 = *(uint32 *) (k + 12)
# movl 12(<k=int32#2),>in14=int32#2
# movl 12(<k=%ecx),>in14=%ecx
movl 12(%ecx),%ecx

#     *(uint32 *) (x + 28) = in11
# movl <in11=int32#3,28(<x=int32#1)
# movl <in11=%edx,28(<x=%eax)
movl %edx,28(%eax)

#     *(uint32 *) (x + 16) = in12
# movl <in12=int32#4,16(<x=int32#1)
# movl <in12=%ebx,16(<x=%eax)
movl %ebx,16(%eax)

#     *(uint32 *) (x + 36) = in13
# movl <in13=int32#5,36(<x=int32#1)
# movl <in13=%esi,36(<x=%eax)
movl %esi,36(%eax)

#     *(uint32 *) (x + 56) = in14
# movl <in14=int32#2,56(<x=int32#1)
# movl <in14=%ecx,56(<x=%eax)
movl %ecx,56(%eax)

#     in0 = 1634760805
# mov  $1634760805,>in0=int32#2
# mov  $1634760805,>in0=%ecx
mov  $1634760805,%ecx

#     in5 = 824206446
# mov  $824206446,>in5=int32#3
# mov  $824206446,>in5=%edx
mov  $824206446,%edx

#     in10 = 2036477238
# mov  $2036477238,>in10=int32#4
# mov  $2036477238,>in10=%ebx
mov  $2036477238,%ebx

#     in15 = 1797285236
# mov  $1797285236,>in15=int32#5
# mov  $1797285236,>in15=%esi
mov  $1797285236,%esi

#     *(uint32 *) (x + 0) = in0
# movl <in0=int32#2,0(<x=int32#1)
# movl <in0=%ecx,0(<x=%eax)
movl %ecx,0(%eax)

#     *(uint32 *) (x + 4) = in5
# movl <in5=int32#3,4(<x=int32#1)
# movl <in5=%edx,4(<x=%eax)
movl %edx,4(%eax)

#     *(uint32 *) (x + 8) = in10
# movl <in10=int32#4,8(<x=int32#1)
# movl <in10=%ebx,8(<x=%eax)
movl %ebx,8(%eax)

#     *(uint32 *) (x + 12) = in15
# movl <in15=int32#5,12(<x=int32#1)
# movl <in15=%esi,12(<x=%eax)
movl %esi,12(%eax)

#   keysetupdone#
._keysetupdone:

#   eax = eax_stack
# movl <eax_stack=stack32#1,>eax=int32#1
# movl <eax_stack=64(%esp),>eax=%eax
movl 64(%esp),%eax

#   ebx = ebx_stack
# movl <ebx_stack=stack32#2,>ebx=int32#4
# movl <ebx_stack=68(%esp),>ebx=%ebx
movl 68(%esp),%ebx

#   esi = esi_stack
# movl <esi_stack=stack32#3,>esi=int32#5
# movl <esi_stack=72(%esp),>esi=%esi
movl 72(%esp),%esi

#   edi = edi_stack
# movl <edi_stack=stack32#4,>edi=int32#6
# movl <edi_stack=76(%esp),>edi=%edi
movl 76(%esp),%edi

#   ebp = ebp_stack
# movl <ebp_stack=stack32#5,>ebp=int32#7
# movl <ebp_stack=80(%esp),>ebp=%ebp
movl 80(%esp),%ebp

# leave
add %eax,%esp
ret

# enter ECRYPT_ivsetup
.text
.p2align 5
.globl _ECRYPT_ivsetup
.globl ECRYPT_ivsetup
_ECRYPT_ivsetup:
ECRYPT_ivsetup:
mov %esp,%eax
and $31,%eax
add $96,%eax
sub %eax,%esp

#   eax_stack = eax
# movl <eax=int32#1,>eax_stack=stack32#1
# movl <eax=%eax,>eax_stack=64(%esp)
movl %eax,64(%esp)

#   ebx_stack = ebx
# movl <ebx=int32#4,>ebx_stack=stack32#2
# movl <ebx=%ebx,>ebx_stack=68(%esp)
movl %ebx,68(%esp)

#   esi_stack = esi
# movl <esi=int32#5,>esi_stack=stack32#3
# movl <esi=%esi,>esi_stack=72(%esp)
movl %esi,72(%esp)

#   edi_stack = edi
# movl <edi=int32#6,>edi_stack=stack32#4
# movl <edi=%edi,>edi_stack=76(%esp)
movl %edi,76(%esp)

#   ebp_stack = ebp
# movl <ebp=int32#7,>ebp_stack=stack32#5
# movl <ebp=%ebp,>ebp_stack=80(%esp)
movl %ebp,80(%esp)

#   iv = arg2
# movl <arg2=stack32#-2,>iv=int32#2
# movl <arg2=8(%esp,%eax),>iv=%ecx
movl 8(%esp,%eax),%ecx

#   x = arg1
# movl <arg1=stack32#-1,>x=int32#1
# movl <arg1=4(%esp,%eax),>x=%eax
movl 4(%esp,%eax),%eax

#   in6 = *(uint32 *) (iv + 0)
# movl 0(<iv=int32#2),>in6=int32#3
# movl 0(<iv=%ecx),>in6=%edx
movl 0(%ecx),%edx

#   in7 = *(uint32 *) (iv + 4)
# movl 4(<iv=int32#2),>in7=int32#2
# movl 4(<iv=%ecx),>in7=%ecx
movl 4(%ecx),%ecx

#   in8 = 0
# mov  $0,>in8=int32#4
# mov  $0,>in8=%ebx
mov  $0,%ebx

#   in9 = 0
# mov  $0,>in9=int32#5
# mov  $0,>in9=%esi
mov  $0,%esi

#   *(uint32 *) (x + 24) = in6
# movl <in6=int32#3,24(<x=int32#1)
# movl <in6=%edx,24(<x=%eax)
movl %edx,24(%eax)

#   *(uint32 *) (x + 44) = in7
# movl <in7=int32#2,44(<x=int32#1)
# movl <in7=%ecx,44(<x=%eax)
movl %ecx,44(%eax)

#   *(uint32 *) (x + 32) = in8
# movl <in8=int32#4,32(<x=int32#1)
# movl <in8=%ebx,32(<x=%eax)
movl %ebx,32(%eax)

#   *(uint32 *) (x + 52) = in9
# movl <in9=int32#5,52(<x=int32#1)
# movl <in9=%esi,52(<x=%eax)
movl %esi,52(%eax)

#   eax = eax_stack
# movl <eax_stack=stack32#1,>eax=int32#1
# movl <eax_stack=64(%esp),>eax=%eax
movl 64(%esp),%eax

#   ebx = ebx_stack
# movl <ebx_stack=stack32#2,>ebx=int32#4
# movl <ebx_stack=68(%esp),>ebx=%ebx
movl 68(%esp),%ebx

#   esi = esi_stack
# movl <esi_stack=stack32#3,>esi=int32#5
# movl <esi_stack=72(%esp),>esi=%esi
movl 72(%esp),%esi

#   edi = edi_stack
# movl <edi_stack=stack32#4,>edi=int32#6
# movl <edi_stack=76(%esp),>edi=%edi
movl 76(%esp),%edi

#   ebp = ebp_stack
# movl <ebp_stack=stack32#5,>ebp=int32#7
# movl <ebp_stack=80(%esp),>ebp=%ebp
movl 80(%esp),%ebp

# leave
add %eax,%esp
ret

eSTREAM Project

Powered by ViewCVS 1.0-dev
(Powered by Apache)

ViewCVS and CVS Help