[svn] / ecrypt / trunk / submissions / salsa20 / full / amd64-xmm2 / salsa20.q  

svn: ecrypt/trunk/submissions/salsa20/full/amd64-xmm2/salsa20.q

File: [svn] / ecrypt / trunk / submissions / salsa20 / full / amd64-xmm2 / salsa20.q (download)
Revision: 211, Thu Feb 14 05:39:03 2008 UTC (5 years, 4 months ago) by cdecanni
File size: 8684 byte(s)
* imported new salsa20 implementations
int64 a

int64 arg1
int64 arg2
int64 arg3
int64 arg4
input arg1
input arg2
input arg3
input arg4

int64 k
int64 kbits
int64 iv

int64 i

int64 x
int64 m
int64 out
int64 bytes

int6464 diag0
int6464 diag1
int6464 diag2
int6464 diag3
# situation at beginning of first round:
# diag0: x0 x5 x10 x15
# diag1: x12 x1 x6 x11
# diag2: x8 x13 x2 x7
# diag3: x4 x9 x14 x3
# situation at beginning of second round:
# diag0: x0 x5 x10 x15
# diag1: x1 x6 x11 x12
# diag2: x2 x7 x8 x13
# diag3: x3 x4 x9 x14

int6464 a0
int6464 a1
int6464 a2
int6464 a3
int6464 a4
int6464 a5
int6464 a6
int6464 a7
int6464 b0
int6464 b1
int6464 b2
int6464 b3
int6464 b4
int6464 b5
int6464 b6
int6464 b7

int64 in0
int64 in1
int64 in2
int64 in3
int64 in4
int64 in5
int64 in6
int64 in7
int64 in8
int64 in9
int64 in10
int64 in11
int64 in12
int64 in13
int64 in14
int64 in15

stack512 tmp

int64 ctarget
stack64 bytes_backup


enter ECRYPT_keystream_bytes

x = arg1
m = arg2
out = m
bytes = arg3

              unsigned>? bytes - 0
goto done if !unsigned>

a = 0
i = bytes
while (i) { *out++ = a; --i }
out -= bytes

goto start


enter ECRYPT_decrypt_bytes

x = arg1
m = arg2
out = arg3
bytes = arg4

              unsigned>? bytes - 0
goto done if !unsigned>

goto start


enter ECRYPT_encrypt_bytes

x = arg1
m = arg2
out = arg3
bytes = arg4

              unsigned>? bytes - 0
goto done if !unsigned>


start:


bytesatleast1:

                  unsigned<? bytes - 64
  goto nocopy if !unsigned<

    ctarget = out

    out = &tmp
    i = bytes
    while (i) { *out++ = *m++; --i }
    out = &tmp
    m = &tmp

  nocopy:

bytes_backup = bytes


diag0 = *(int128 *) (x + 0)
diag1 = *(int128 *) (x + 16)
diag2 = *(int128 *) (x + 32)
diag3 = *(int128 *) (x + 48)


            	a0 = diag1
i = 20

mainloop:

uint32323232	a0 += diag0
				a1 = diag0
            	b0 = a0
uint32323232	a0 <<= 7
uint32323232	b0 >>= 25
                diag3 ^= a0

                diag3 ^= b0

uint32323232			a1 += diag3
						a2 = diag3
            			b1 = a1
uint32323232			a1 <<= 9
uint32323232			b1 >>= 23
				diag2 ^= a1
		diag3 <<<= 32
				diag2 ^= b1

uint32323232					a2 += diag2
								a3 = diag2
            					b2 = a2
uint32323232					a2 <<= 13
uint32323232					b2 >>= 19
						diag1 ^= a2
				diag2 <<<= 64
						diag1 ^= b2

uint32323232							a3 += diag1
		a4 = diag3
            							b3 = a3
uint32323232							a3 <<= 18
uint32323232							b3 >>= 14
								diag0 ^= a3
						diag1 <<<= 96
								diag0 ^= b3

uint32323232	a4 += diag0
				a5 = diag0
            	b4 = a4
uint32323232	a4 <<= 7
uint32323232	b4 >>= 25
                diag1 ^= a4

                diag1 ^= b4

uint32323232			a5 += diag1
						a6 = diag1
            			b5 = a5
uint32323232			a5 <<= 9
uint32323232			b5 >>= 23
				diag2 ^= a5
		diag1 <<<= 32
				diag2 ^= b5

uint32323232					a6 += diag2
								a7 = diag2
            					b6 = a6
uint32323232					a6 <<= 13
uint32323232					b6 >>= 19
						diag3 ^= a6
				diag2 <<<= 64
						diag3 ^= b6

uint32323232							a7 += diag3
		a0 = diag1
            							b7 = a7
uint32323232							a7 <<= 18
uint32323232							b7 >>= 14
								diag0 ^= a7
						diag3 <<<= 96
								diag0 ^= b7


uint32323232	a0 += diag0
				a1 = diag0
            	b0 = a0
uint32323232	a0 <<= 7
uint32323232	b0 >>= 25
                diag3 ^= a0

                diag3 ^= b0

uint32323232			a1 += diag3
						a2 = diag3
            			b1 = a1
uint32323232			a1 <<= 9
uint32323232			b1 >>= 23
				diag2 ^= a1
		diag3 <<<= 32
				diag2 ^= b1

uint32323232					a2 += diag2
								a3 = diag2
            					b2 = a2
uint32323232					a2 <<= 13
uint32323232					b2 >>= 19
						diag1 ^= a2
				diag2 <<<= 64
						diag1 ^= b2

uint32323232							a3 += diag1
		a4 = diag3
            							b3 = a3
uint32323232							a3 <<= 18
uint32323232							b3 >>= 14
								diag0 ^= a3
						diag1 <<<= 96
								diag0 ^= b3

uint32323232	a4 += diag0
				a5 = diag0
            	b4 = a4
uint32323232	a4 <<= 7
uint32323232	b4 >>= 25
                diag1 ^= a4

                diag1 ^= b4

uint32323232			a5 += diag1
						a6 = diag1
            			b5 = a5
uint32323232			a5 <<= 9
uint32323232			b5 >>= 23
				diag2 ^= a5
		diag1 <<<= 32
				diag2 ^= b5

uint32323232					a6 += diag2
								a7 = diag2
            					b6 = a6
uint32323232					a6 <<= 13
uint32323232					b6 >>= 19
						diag3 ^= a6
				diag2 <<<= 64
						diag3 ^= b6
                 unsigned>? i -= 4
uint32323232							a7 += diag3
		a0 = diag1
            							b7 = a7
uint32323232							a7 <<= 18
uint32323232							b7 >>= 14
								diag0 ^= a7
						diag3 <<<= 96
								diag0 ^= b7
goto mainloop if unsigned>


uint32323232 diag0 += *(int128 *) (x + 0)
uint32323232 diag1 += *(int128 *) (x + 16)
uint32323232 diag2 += *(int128 *) (x + 32)
uint32323232 diag3 += *(int128 *) (x + 48)


in0 = diag0
in12 = diag1
in8 = diag2
in4 = diag3
diag0 <<<= 96
diag1 <<<= 96
diag2 <<<= 96
diag3 <<<= 96
(uint32) in0 ^= *(uint32 *) (m + 0)
(uint32) in12 ^= *(uint32 *) (m + 48)
(uint32) in8 ^= *(uint32 *) (m + 32)
(uint32) in4 ^= *(uint32 *) (m + 16)
*(uint32 *) (out + 0) = in0
*(uint32 *) (out + 48) = in12
*(uint32 *) (out + 32) = in8
*(uint32 *) (out + 16) = in4

in5 = diag0
in1 = diag1
in13 = diag2
in9 = diag3
diag0 <<<= 96
diag1 <<<= 96
diag2 <<<= 96
diag3 <<<= 96
(uint32) in5 ^= *(uint32 *) (m + 20)
(uint32) in1 ^= *(uint32 *) (m + 4)
(uint32) in13 ^= *(uint32 *) (m + 52)
(uint32) in9 ^= *(uint32 *) (m + 36)
*(uint32 *) (out + 20) = in5
*(uint32 *) (out + 4) = in1
*(uint32 *) (out + 52) = in13
*(uint32 *) (out + 36) = in9

in10 = diag0
in6 = diag1
in2 = diag2
in14 = diag3
diag0 <<<= 96
diag1 <<<= 96
diag2 <<<= 96
diag3 <<<= 96
(uint32) in10 ^= *(uint32 *) (m + 40)
(uint32) in6 ^= *(uint32 *) (m + 24)
(uint32) in2 ^= *(uint32 *) (m + 8)
(uint32) in14 ^= *(uint32 *) (m + 56)
*(uint32 *) (out + 40) = in10
*(uint32 *) (out + 24) = in6
*(uint32 *) (out + 8) = in2
*(uint32 *) (out + 56) = in14

in15 = diag0
in11 = diag1
in7 = diag2
in3 = diag3
(uint32) in15 ^= *(uint32 *) (m + 60)
(uint32) in11 ^= *(uint32 *) (m + 44)
(uint32) in7 ^= *(uint32 *) (m + 28)
(uint32) in3 ^= *(uint32 *) (m + 12)
*(uint32 *) (out + 60) = in15
*(uint32 *) (out + 44) = in11
*(uint32 *) (out + 28) = in7
*(uint32 *) (out + 12) = in3


bytes = bytes_backup

  in8 = *(uint32 *) (x + 32)
  in9 = *(uint32 *) (x + 52)
  in8 += 1
  in9 <<= 32
  in8 += in9
  *(uint32 *) (x + 32) = in8
  (uint64) in8 >>= 32
  *(uint32 *) (x + 52) = in8


                         unsigned>? unsigned<? bytes - 64
  goto bytesatleast65 if unsigned>

    goto bytesatleast64 if !unsigned<
      m = out
      out = ctarget
      i = bytes
      while (i) { *out++ = *m++; --i }
    bytesatleast64:
    done:

    leave

  bytesatleast65:

  bytes -= 64
  out += 64
  m += 64
goto bytesatleast1


enter ECRYPT_init
leave


enter ECRYPT_keysetup

  k = arg2
  kbits = arg3
  x = arg1

# situation at beginning of first round:
# diag0: x0 x5 x10 x15
# diag1: x12 x1 x6 x11
# diag2: x8 x13 x2 x7
# diag3: x4 x9 x14 x3

  in1 = *(uint32 *) (k + 0)
  in2 = *(uint32 *) (k + 4)
  in3 = *(uint32 *) (k + 8)
  in4 = *(uint32 *) (k + 12)
  *(uint32 *) (x + 20) = in1
  *(uint32 *) (x + 40) = in2
  *(uint32 *) (x + 60) = in3
  *(uint32 *) (x + 48) = in4

                   unsigned<? kbits - 256
  goto kbits128 if unsigned<

  kbits256:

    in11 = *(uint32 *) (k + 16)
    in12 = *(uint32 *) (k + 20)
    in13 = *(uint32 *) (k + 24)
    in14 = *(uint32 *) (k + 28)
    *(uint32 *) (x + 28) = in11
    *(uint32 *) (x + 16) = in12
    *(uint32 *) (x + 36) = in13
    *(uint32 *) (x + 56) = in14

    in0 = 1634760805
    in5 = 857760878
    in10 = 2036477234
    in15 = 1797285236
    *(uint32 *) (x + 0) = in0
    *(uint32 *) (x + 4) = in5
    *(uint32 *) (x + 8) = in10
    *(uint32 *) (x + 12) = in15

  goto keysetupdone

  kbits128:

    in11 = *(uint32 *) (k + 0)
    in12 = *(uint32 *) (k + 4)
    in13 = *(uint32 *) (k + 8)
    in14 = *(uint32 *) (k + 12)
    *(uint32 *) (x + 28) = in11
    *(uint32 *) (x + 16) = in12
    *(uint32 *) (x + 36) = in13
    *(uint32 *) (x + 56) = in14

    in0 = 1634760805
    in5 = 824206446
    in10 = 2036477238
    in15 = 1797285236
    *(uint32 *) (x + 0) = in0
    *(uint32 *) (x + 4) = in5
    *(uint32 *) (x + 8) = in10
    *(uint32 *) (x + 12) = in15

  keysetupdone:

leave


enter ECRYPT_ivsetup

  iv = arg2
  x = arg1

# situation at beginning of first round:
# diag0: x0 x5 x10 x15
# diag1: x12 x1 x6 x11
# diag2: x8 x13 x2 x7
# diag3: x4 x9 x14 x3

  in6 = *(uint32 *) (iv + 0)
  in7 = *(uint32 *) (iv + 4)
  in8 = 0
  in9 = 0
  *(uint32 *) (x + 24) = in6
  *(uint32 *) (x + 44) = in7
  *(uint32 *) (x + 32) = in8
  *(uint32 *) (x + 52) = in9

leave

eSTREAM Project

Powered by ViewCVS 1.0-dev
(Powered by Apache)

ViewCVS and CVS Help