| /* $Id: nlsfast.c 388 2005-04-28 21:04:09Z mwp $ */ |
/* $Id: nlsfast.c 444 2006-05-17 07:41:23Z mwp $ */ |
| /* nlsfast: NLS stream cipher and Mundja MAC -- fast implementation */ |
/* nlsfast: NLS stream cipher and Mundja MAC -- fast implementation */ |
| |
/* This is "tweaked" to support non-word-multiple keys and |
| |
* variable "Konst" to address Joe Cho's attack. |
| |
*/ |
| |
|
| /* |
/* |
| THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED |
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED |
| */ |
*/ |
| #define OFF(zero, i) (((zero)+(i)) % N) |
#define OFF(zero, i) (((zero)+(i)) % N) |
| |
|
| |
/* Return a non-linear function of some parts of the register. |
| |
*/ |
| |
#define NLFUNC(c,z) \ |
| |
(c->R[OFF(z,0)] + c->R[OFF(z,16)]) \ |
| |
^ (c->R[OFF(z,1)] + c->R[OFF(z,13)]) \ |
| |
^ (c->R[OFF(z,6)] + c->konst) |
| |
|
| #if NLS_LONG_OUTPUT |
#if NLS_LONG_OUTPUT |
| /* Increment counter and mix into register every so often */ |
/* Increment counter and mix into register every so often */ |
| #define FIXCTR(c,z) \ |
#define FIXCTR(c,z) \ |
| if (++c->CtrModF16 == F16) { \ |
if (++c->CtrModF16 == F16) { \ |
| c->CtrMod232 += c->CtrModF16; \ |
c->CtrMod232 += c->CtrModF16; \ |
| c->R[OFF(z,CTRP)] += c->CtrMod232; \ |
c->R[OFF(z,CTRP)] += c->CtrMod232; \ |
| c->CtrMod232 = 0; \ |
c->CtrModF16 = 0; /* account for konst generation */ \ |
| |
c->konst = NLFUNC(c,z); \ |
| |
cycle(c); \ |
| } \ |
} \ |
| } |
} |
| #endif /*NLS_LONG_OUTPUT*/ |
#endif /*NLS_LONG_OUTPUT*/ |
| |
|
| |
static WORD |
| |
nltap(nls_ctx *c) |
| |
{ |
| |
return NLFUNC(c, 0); |
| |
} |
| |
|
| /* step the shift register */ |
/* step the shift register */ |
| /* After stepping, "zero" moves right one place */ |
/* After stepping, "zero" moves right one place */ |
| #define STEP(c,z) \ |
#define STEP(c,z) \ |
| #endif /*NLS_LONG_OUTPUT*/ |
#endif /*NLS_LONG_OUTPUT*/ |
| } |
} |
| |
|
| /* Return a non-linear function of some parts of the register. |
|
| */ |
|
| #define NLFUNC(c,z) \ |
|
| (c->R[OFF(z,0)] + c->R[OFF(z,16)]) \ |
|
| ^ (c->R[OFF(z,1)] + c->konst) \ |
|
| ^ (c->R[OFF(z,6)] + c->R[OFF(z,13)]) |
|
| |
|
| static WORD |
|
| nltap(nls_ctx *c) |
|
| { |
|
| return NLFUNC(c, 0); |
|
| } |
|
| |
|
| /* The Mundja MAC function is modelled after the round function of SHA-256. |
/* The Mundja MAC function is modelled after the round function of SHA-256. |
| * The following lines establish aliases for the MAC accumulator, just |
* The following lines establish aliases for the MAC accumulator, just |
| * so that the definition of that function looks more like FIPS-180-2. |
* so that the definition of that function looks more like FIPS-180-2. |
| } |
} |
| |
|
| /* Initialise "konst" |
/* Initialise "konst" |
| |
* Tweak -- since Konst now changes regularly, there is no reason |
| |
* to avoid a zero high byte. |
| */ |
*/ |
| static void |
static void |
| nls_genkonst(nls_ctx *c) |
nls_genkonst(nls_ctx *c) |
| { |
{ |
| WORD newkonst; |
|
| |
|
| do { |
|
| cycle(c); |
cycle(c); |
| newkonst = nltap(c); |
c->konst = nltap(c); |
| } while ((newkonst & 0xFF000000) == 0); |
|
| c->konst = newkonst; |
|
| } |
} |
| |
|
| /* Load key material into the register |
/* Load key material into the register |
| } |
} |
| |
|
| /* common actions for loading key material */ |
/* common actions for loading key material */ |
| |
/* Tweak: allow non-word-multiple key and nonce materianl |
| |
*/ |
| static void |
static void |
| nls_loadkey(nls_ctx *c, UCHAR key[], int keylen) |
nls_loadkey(nls_ctx *c, UCHAR key[], int keylen) |
| { |
{ |
| int i; |
int i, j; |
| WORD k; |
WORD k; |
| |
UCHAR xtra[4]; |
| |
|
| /* start folding in key, reject odd sized keys */ |
/* start folding in key */ |
| if ((keylen & 3) != 0) |
for (i = 0; i < (keylen & ~0x3); i += 4) |
| abort(); |
|
| for (i = 0; i < keylen; i += 4) |
|
| { |
{ |
| k = BYTE2WORD(&key[i]); |
k = BYTE2WORD(&key[i]); |
| ADDKEY(k); |
ADDKEY(k); |
| XORNL(nltap(c)); |
XORNL(nltap(c)); |
| } |
} |
| |
|
| |
/* if there were any extra key bytes, zero pad to a word */ |
| |
if (i < keylen) { |
| |
for (j = 0 /* i unchanged */; i < keylen; ++i) |
| |
xtra[j++] = key[i]; |
| |
for (/* j unchanged */; j < 4; ++j) |
| |
xtra[j] = 0; |
| |
k = BYTE2WORD(xtra); |
| |
ADDKEY(k); |
| |
cycle(c); |
| |
XORNL(nltap(c)); |
| |
} |
| |
|
| /* also fold in the length of the key */ |
/* also fold in the length of the key */ |
| ADDKEY(keylen); |
ADDKEY(keylen); |
| |
|
| nls_nonce(nls_ctx *c, UCHAR nonce[], int noncelen) |
nls_nonce(nls_ctx *c, UCHAR nonce[], int noncelen) |
| { |
{ |
| nls_reloadstate(c); |
nls_reloadstate(c); |
| |
c->konst = INITKONST; /* TWEAK */ |
| nls_loadkey(c, nonce, noncelen); |
nls_loadkey(c, nonce, noncelen); |
| |
nls_genkonst(c); /* TWEAK */ |
| nls_macinit(c); |
nls_macinit(c); |
| c->nbuf = 0; |
c->nbuf = 0; |
| ZEROCOUNTER(c); |
ZEROCOUNTER(c); |
| while (nbytes >= N*4) |
while (nbytes >= N*4) |
| { |
{ |
| #if NLS_LONG_OUTPUT |
#if NLS_LONG_OUTPUT |
| if (c->CtrModF16 < (F16-17)) { |
if (c->CtrModF16 < (F16-18)) { |
| #endif /*NLS_LONG_OUTPUT*/ |
#endif /*NLS_LONG_OUTPUT*/ |
| SROUND(0); |
SROUND(0); |
| SROUND(1); |
SROUND(1); |
| SROUND(14); |
SROUND(14); |
| SROUND(15); |
SROUND(15); |
| SROUND(16); |
SROUND(16); |
| |
buf += 4*N; |
| |
nbytes -= N*4; |
| #if NLS_LONG_OUTPUT |
#if NLS_LONG_OUTPUT |
| c->CtrModF16 += 17; |
c->CtrModF16 += 17; |
| } |
} |
| else { |
else { |
| SROUND(0); FIXCTR(c,1); |
do { |
| SROUND(1); FIXCTR(c,2); |
cycle(c); |
| SROUND(2); FIXCTR(c,3); |
t = nltap(c); |
| SROUND(3); FIXCTR(c,4); |
XORWORD(t, buf); |
| SROUND(4); FIXCTR(c,5); |
buf += 4; |
| SROUND(5); FIXCTR(c,6); |
nbytes -= 4; |
| SROUND(6); FIXCTR(c,7); |
} while (F16-18 < c->CtrModF16); |
| SROUND(7); FIXCTR(c,8); |
|
| SROUND(8); FIXCTR(c,9); |
|
| SROUND(9); FIXCTR(c,10); |
|
| SROUND(10); FIXCTR(c,11); |
|
| SROUND(11); FIXCTR(c,12); |
|
| SROUND(12); FIXCTR(c,13); |
|
| SROUND(13); FIXCTR(c,14); |
|
| SROUND(14); FIXCTR(c,15); |
|
| SROUND(15); FIXCTR(c,16); |
|
| SROUND(16); FIXCTR(c,0); |
|
| } |
} |
| #endif /*NLS_LONG_OUTPUT*/ |
#endif /*NLS_LONG_OUTPUT*/ |
| buf += 4*N; |
|
| nbytes -= N*4; |
|
| } |
} |
| |
|
| /* do small or odd size buffers the slow way */ |
/* do small or odd size buffers the slow way */ |
| while (4*N <= nbytes) |
while (4*N <= nbytes) |
| { |
{ |
| #if NLS_LONG_OUTPUT |
#if NLS_LONG_OUTPUT |
| if (c->CtrModF16 < (F16-17)) { |
if (c->CtrModF16 < (F16-18)) { |
| #endif /*NLS_LONG_OUTPUT*/ |
#endif /*NLS_LONG_OUTPUT*/ |
| MROUND( 0,A,B,C,D,E,F,G,H); |
MROUND( 0,A,B,C,D,E,F,G,H); |
| MROUND( 1,H,A,B,C,D,E,F,G); |
MROUND( 1,H,A,B,C,D,E,F,G); |
| MROUND(14,C,D,E,F,G,H,A,B); |
MROUND(14,C,D,E,F,G,H,A,B); |
| MROUND(15,B,C,D,E,F,G,H,A); |
MROUND(15,B,C,D,E,F,G,H,A); |
| MROUND(16,A,B,C,D,E,F,G,H); |
MROUND(16,A,B,C,D,E,F,G,H); |
| #if NLS_LONG_OUTPUT |
|
| c->CtrModF16 += 17; |
|
| } |
|
| else { |
|
| MROUND( 0,A,B,C,D,E,F,G,H); FIXCTR(c,1); |
|
| MROUND( 1,H,A,B,C,D,E,F,G); FIXCTR(c,2); |
|
| MROUND( 2,G,H,A,B,C,D,E,F); FIXCTR(c,3); |
|
| MROUND( 3,F,G,H,A,B,C,D,E); FIXCTR(c,4); |
|
| MROUND( 4,E,F,G,H,A,B,C,D); FIXCTR(c,5); |
|
| MROUND( 5,D,E,F,G,H,A,B,C); FIXCTR(c,6); |
|
| MROUND( 6,C,D,E,F,G,H,A,B); FIXCTR(c,7); |
|
| MROUND( 7,B,C,D,E,F,G,H,A); FIXCTR(c,8); |
|
| MROUND( 8,A,B,C,D,E,F,G,H); FIXCTR(c,9); |
|
| MROUND( 9,H,A,B,C,D,E,F,G); FIXCTR(c,10); |
|
| MROUND(10,G,H,A,B,C,D,E,F); FIXCTR(c,11); |
|
| MROUND(11,F,G,H,A,B,C,D,E); FIXCTR(c,12); |
|
| MROUND(12,E,F,G,H,A,B,C,D); FIXCTR(c,13); |
|
| MROUND(13,D,E,F,G,H,A,B,C); FIXCTR(c,14); |
|
| MROUND(14,C,D,E,F,G,H,A,B); FIXCTR(c,15); |
|
| MROUND(15,B,C,D,E,F,G,H,A); FIXCTR(c,16); |
|
| MROUND(16,A,B,C,D,E,F,G,H); FIXCTR(c,0); |
|
| } |
|
| #endif /*NLS_LONG_OUTPUT*/ |
|
| buf += 4*N; |
buf += 4*N; |
| nbytes -= 4*N; |
nbytes -= 4*N; |
| /* fix alignment of MAC buffer */ |
/* fix alignment of MAC buffer */ |
| for (i = 1; i < NMAC; ++i) |
for (i = 1; i < NMAC; ++i) |
| c->CRC[i-1] = c->CRC[i]; |
c->CRC[i-1] = c->CRC[i]; |
| c->CRC[NMAC-1] = t; |
c->CRC[NMAC-1] = t; |
| |
#if NLS_LONG_OUTPUT |
| |
c->CtrModF16 += 17; |
| |
} |
| |
else { |
| |
do { |
| |
cycle(c); |
| |
macfunc(c, BYTE2WORD(buf)); |
| |
buf += 4; |
| |
nbytes -= 4; |
| |
} while (F16-18 < c->CtrModF16); |
| |
} |
| |
#endif /*NLS_LONG_OUTPUT*/ |
| } |
} |
| |
|
| /* do small or odd size buffers the slow way */ |
/* do small or odd size buffers the slow way */ |
| while (4*N <= nbytes) |
while (4*N <= nbytes) |
| { |
{ |
| #if NLS_LONG_OUTPUT |
#if NLS_LONG_OUTPUT |
| if (c->CtrModF16 < (F16-17)) { |
if (c->CtrModF16 < (F16-18)) { |
| #endif /*NLS_LONG_OUTPUT*/ |
#endif /*NLS_LONG_OUTPUT*/ |
| EROUND( 0,A,B,C,D,E,F,G,H); |
EROUND( 0,A,B,C,D,E,F,G,H); |
| EROUND( 1,H,A,B,C,D,E,F,G); |
EROUND( 1,H,A,B,C,D,E,F,G); |
| EROUND(14,C,D,E,F,G,H,A,B); |
EROUND(14,C,D,E,F,G,H,A,B); |
| EROUND(15,B,C,D,E,F,G,H,A); |
EROUND(15,B,C,D,E,F,G,H,A); |
| EROUND(16,A,B,C,D,E,F,G,H); |
EROUND(16,A,B,C,D,E,F,G,H); |
| #if NLS_LONG_OUTPUT |
|
| c->CtrModF16 += 17; |
|
| } |
|
| else { |
|
| EROUND( 0,A,B,C,D,E,F,G,H); FIXCTR(c,1); |
|
| EROUND( 1,H,A,B,C,D,E,F,G); FIXCTR(c,2); |
|
| EROUND( 2,G,H,A,B,C,D,E,F); FIXCTR(c,3); |
|
| EROUND( 3,F,G,H,A,B,C,D,E); FIXCTR(c,4); |
|
| EROUND( 4,E,F,G,H,A,B,C,D); FIXCTR(c,5); |
|
| EROUND( 5,D,E,F,G,H,A,B,C); FIXCTR(c,6); |
|
| EROUND( 6,C,D,E,F,G,H,A,B); FIXCTR(c,7); |
|
| EROUND( 7,B,C,D,E,F,G,H,A); FIXCTR(c,8); |
|
| EROUND( 8,A,B,C,D,E,F,G,H); FIXCTR(c,9); |
|
| EROUND( 9,H,A,B,C,D,E,F,G); FIXCTR(c,10); |
|
| EROUND(10,G,H,A,B,C,D,E,F); FIXCTR(c,11); |
|
| EROUND(11,F,G,H,A,B,C,D,E); FIXCTR(c,12); |
|
| EROUND(12,E,F,G,H,A,B,C,D); FIXCTR(c,13); |
|
| EROUND(13,D,E,F,G,H,A,B,C); FIXCTR(c,14); |
|
| EROUND(14,C,D,E,F,G,H,A,B); FIXCTR(c,15); |
|
| EROUND(15,B,C,D,E,F,G,H,A); FIXCTR(c,16); |
|
| EROUND(16,A,B,C,D,E,F,G,H); FIXCTR(c,0); |
|
| } |
|
| #endif /*NLS_LONG_OUTPUT*/ |
|
| buf += 4*N; |
buf += 4*N; |
| nbytes -= 4*N; |
nbytes -= 4*N; |
| /* fix alignment of MAC buffer */ |
/* fix alignment of MAC buffer */ |
| for (i = 1; i < NMAC; ++i) |
for (i = 1; i < NMAC; ++i) |
| c->CRC[i-1] = c->CRC[i]; |
c->CRC[i-1] = c->CRC[i]; |
| c->CRC[NMAC-1] = t; |
c->CRC[NMAC-1] = t; |
| |
#if NLS_LONG_OUTPUT |
| |
c->CtrModF16 += 17; |
| |
} |
| |
else { |
| |
do { |
| |
cycle(c); |
| |
t = BYTE2WORD(buf); |
| |
macfunc(c, t); |
| |
t ^= nltap(c); |
| |
WORD2BYTE(t, buf); |
| |
buf += 4; |
| |
nbytes -= 4; |
| |
} while (F16-18 < c->CtrModF16); |
| |
} |
| |
#endif /*NLS_LONG_OUTPUT*/ |
| } |
} |
| |
|
| /* do small or odd size buffers the slow way */ |
/* do small or odd size buffers the slow way */ |
| while (4*N <= nbytes) |
while (4*N <= nbytes) |
| { |
{ |
| #if NLS_LONG_OUTPUT |
#if NLS_LONG_OUTPUT |
| if (c->CtrModF16 < (F16-17)) { |
if (c->CtrModF16 < (F16-18)) { |
| #endif /*NLS_LONG_OUTPUT*/ |
#endif /*NLS_LONG_OUTPUT*/ |
| DROUND( 0,A,B,C,D,E,F,G,H); |
DROUND( 0,A,B,C,D,E,F,G,H); |
| DROUND( 1,H,A,B,C,D,E,F,G); |
DROUND( 1,H,A,B,C,D,E,F,G); |
| DROUND(14,C,D,E,F,G,H,A,B); |
DROUND(14,C,D,E,F,G,H,A,B); |
| DROUND(15,B,C,D,E,F,G,H,A); |
DROUND(15,B,C,D,E,F,G,H,A); |
| DROUND(16,A,B,C,D,E,F,G,H); |
DROUND(16,A,B,C,D,E,F,G,H); |
| #if NLS_LONG_OUTPUT |
|
| c->CtrModF16 += 17; |
|
| } |
|
| else { |
|
| DROUND( 0,A,B,C,D,E,F,G,H); FIXCTR(c,1); |
|
| DROUND( 1,H,A,B,C,D,E,F,G); FIXCTR(c,2); |
|
| DROUND( 2,G,H,A,B,C,D,E,F); FIXCTR(c,3); |
|
| DROUND( 3,F,G,H,A,B,C,D,E); FIXCTR(c,4); |
|
| DROUND( 4,E,F,G,H,A,B,C,D); FIXCTR(c,5); |
|
| DROUND( 5,D,E,F,G,H,A,B,C); FIXCTR(c,6); |
|
| DROUND( 6,C,D,E,F,G,H,A,B); FIXCTR(c,7); |
|
| DROUND( 7,B,C,D,E,F,G,H,A); FIXCTR(c,8); |
|
| DROUND( 8,A,B,C,D,E,F,G,H); FIXCTR(c,9); |
|
| DROUND( 9,H,A,B,C,D,E,F,G); FIXCTR(c,10); |
|
| DROUND(10,G,H,A,B,C,D,E,F); FIXCTR(c,11); |
|
| DROUND(11,F,G,H,A,B,C,D,E); FIXCTR(c,12); |
|
| DROUND(12,E,F,G,H,A,B,C,D); FIXCTR(c,13); |
|
| DROUND(13,D,E,F,G,H,A,B,C); FIXCTR(c,14); |
|
| DROUND(14,C,D,E,F,G,H,A,B); FIXCTR(c,15); |
|
| DROUND(15,B,C,D,E,F,G,H,A); FIXCTR(c,16); |
|
| DROUND(16,A,B,C,D,E,F,G,H); FIXCTR(c,0); |
|
| } |
|
| #endif /*NLS_LONG_OUTPUT*/ |
|
| buf += 4*N; |
buf += 4*N; |
| nbytes -= 4*N; |
nbytes -= 4*N; |
| /* fix alignment of MAC buffer */ |
/* fix alignment of MAC buffer */ |
| for (i = 1; i < NMAC; ++i) |
for (i = 1; i < NMAC; ++i) |
| c->CRC[i-1] = c->CRC[i]; |
c->CRC[i-1] = c->CRC[i]; |
| c->CRC[NMAC-1] = t; |
c->CRC[NMAC-1] = t; |
| |
#if NLS_LONG_OUTPUT |
| |
c->CtrModF16 += 17; |
| |
} |
| |
else { |
| |
do { |
| |
cycle(c); |
| |
t = nltap(c); |
| |
t3 = BYTE2WORD(buf); |
| |
t ^= t3; |
| |
macfunc(c, t); |
| |
WORD2BYTE(t, buf); |
| |
buf += 4; |
| |
nbytes -= 4; |
| |
} while (F16-18 < c->CtrModF16); |
| |
} |
| |
#endif /*NLS_LONG_OUTPUT*/ |
| } |
} |
| |
|
| /* do small or odd size buffers the slow way */ |
/* do small or odd size buffers the slow way */ |