mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-22 22:00:39 -06:00
Update external polarssl to 1.3.8
There were some fixes back on March 13th, 2014 for fixing compiling on MIPS64. Also some fixes on June 25th, 2014 for SPARC64 fixes. Probably more things, but those are what I care about.
This commit is contained in:
410
Externals/polarssl/library/aesni.c
vendored
410
Externals/polarssl/library/aesni.c
vendored
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* AES-NI support functions
|
||||
*
|
||||
* Copyright (C) 2013, Brainspark B.V.
|
||||
* Copyright (C) 2006-2014, Brainspark B.V.
|
||||
*
|
||||
* This file is part of PolarSSL (http://www.polarssl.org)
|
||||
* Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
|
||||
@ -28,7 +28,11 @@
|
||||
* [CLMUL-WP] http://software.intel.com/en-us/articles/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode/
|
||||
*/
|
||||
|
||||
#if !defined(POLARSSL_CONFIG_FILE)
|
||||
#include "polarssl/config.h"
|
||||
#else
|
||||
#include POLARSSL_CONFIG_FILE
|
||||
#endif
|
||||
|
||||
#if defined(POLARSSL_AESNI_C)
|
||||
|
||||
@ -47,8 +51,8 @@ int aesni_supports( unsigned int what )
|
||||
|
||||
if( ! done )
|
||||
{
|
||||
asm( "movl $1, %%eax \n"
|
||||
"cpuid \n"
|
||||
asm( "movl $1, %%eax \n\t"
|
||||
"cpuid \n\t"
|
||||
: "=c" (c)
|
||||
:
|
||||
: "eax", "ebx", "edx" );
|
||||
@ -58,6 +62,32 @@ int aesni_supports( unsigned int what )
|
||||
return( ( c & what ) != 0 );
|
||||
}
|
||||
|
||||
/*
|
||||
* Binutils needs to be at least 2.19 to support AES-NI instructions.
|
||||
* Unfortunately, a lot of users have a lower version now (2014-04).
|
||||
* Emit bytecode directly in order to support "old" version of gas.
|
||||
*
|
||||
* Opcodes from the Intel architecture reference manual, vol. 3.
|
||||
* We always use registers, so we don't need prefixes for memory operands.
|
||||
* Operand macros are in gas order (src, dst) as opposed to Intel order
|
||||
* (dst, src) in order to blend better into the surrounding assembly code.
|
||||
*/
|
||||
#define AESDEC ".byte 0x66,0x0F,0x38,0xDE,"
|
||||
#define AESDECLAST ".byte 0x66,0x0F,0x38,0xDF,"
|
||||
#define AESENC ".byte 0x66,0x0F,0x38,0xDC,"
|
||||
#define AESENCLAST ".byte 0x66,0x0F,0x38,0xDD,"
|
||||
#define AESIMC ".byte 0x66,0x0F,0x38,0xDB,"
|
||||
#define AESKEYGENA ".byte 0x66,0x0F,0x3A,0xDF,"
|
||||
#define PCLMULQDQ ".byte 0x66,0x0F,0x3A,0x44,"
|
||||
|
||||
#define xmm0_xmm0 "0xC0"
|
||||
#define xmm0_xmm1 "0xC8"
|
||||
#define xmm0_xmm2 "0xD0"
|
||||
#define xmm0_xmm3 "0xD8"
|
||||
#define xmm0_xmm4 "0xE0"
|
||||
#define xmm1_xmm0 "0xC1"
|
||||
#define xmm1_xmm2 "0xD1"
|
||||
|
||||
/*
|
||||
* AES-NI AES-ECB block en(de)cryption
|
||||
*/
|
||||
@ -66,35 +96,35 @@ int aesni_crypt_ecb( aes_context *ctx,
|
||||
const unsigned char input[16],
|
||||
unsigned char output[16] )
|
||||
{
|
||||
asm( "movdqu (%3), %%xmm0 \n" // load input
|
||||
"movdqu (%1), %%xmm1 \n" // load round key 0
|
||||
"pxor %%xmm1, %%xmm0 \n" // round 0
|
||||
"addq $16, %1 \n" // point to next round key
|
||||
"subl $1, %0 \n" // normal rounds = nr - 1
|
||||
"test %2, %2 \n" // mode?
|
||||
"jz 2f \n" // 0 = decrypt
|
||||
asm( "movdqu (%3), %%xmm0 \n\t" // load input
|
||||
"movdqu (%1), %%xmm1 \n\t" // load round key 0
|
||||
"pxor %%xmm1, %%xmm0 \n\t" // round 0
|
||||
"addq $16, %1 \n\t" // point to next round key
|
||||
"subl $1, %0 \n\t" // normal rounds = nr - 1
|
||||
"test %2, %2 \n\t" // mode?
|
||||
"jz 2f \n\t" // 0 = decrypt
|
||||
|
||||
"1: \n" // encryption loop
|
||||
"movdqu (%1), %%xmm1 \n" // load round key
|
||||
"aesenc %%xmm1, %%xmm0 \n" // do round
|
||||
"addq $16, %1 \n" // point to next round key
|
||||
"subl $1, %0 \n" // loop
|
||||
"jnz 1b \n"
|
||||
"movdqu (%1), %%xmm1 \n" // load round key
|
||||
"aesenclast %%xmm1, %%xmm0 \n" // last round
|
||||
"jmp 3f \n"
|
||||
"1: \n\t" // encryption loop
|
||||
"movdqu (%1), %%xmm1 \n\t" // load round key
|
||||
AESENC xmm1_xmm0 "\n\t" // do round
|
||||
"addq $16, %1 \n\t" // point to next round key
|
||||
"subl $1, %0 \n\t" // loop
|
||||
"jnz 1b \n\t"
|
||||
"movdqu (%1), %%xmm1 \n\t" // load round key
|
||||
AESENCLAST xmm1_xmm0 "\n\t" // last round
|
||||
"jmp 3f \n\t"
|
||||
|
||||
"2: \n" // decryption loop
|
||||
"movdqu (%1), %%xmm1 \n"
|
||||
"aesdec %%xmm1, %%xmm0 \n"
|
||||
"addq $16, %1 \n"
|
||||
"subl $1, %0 \n"
|
||||
"jnz 2b \n"
|
||||
"movdqu (%1), %%xmm1 \n" // load round key
|
||||
"aesdeclast %%xmm1, %%xmm0 \n" // last round
|
||||
"2: \n\t" // decryption loop
|
||||
"movdqu (%1), %%xmm1 \n\t"
|
||||
AESDEC xmm1_xmm0 "\n\t" // do round
|
||||
"addq $16, %1 \n\t"
|
||||
"subl $1, %0 \n\t"
|
||||
"jnz 2b \n\t"
|
||||
"movdqu (%1), %%xmm1 \n\t" // load round key
|
||||
AESDECLAST xmm1_xmm0 "\n\t" // last round
|
||||
|
||||
"3: \n"
|
||||
"movdqu %%xmm0, (%4) \n" // export output
|
||||
"3: \n\t"
|
||||
"movdqu %%xmm0, (%4) \n\t" // export output
|
||||
:
|
||||
: "r" (ctx->nr), "r" (ctx->rk), "r" (mode), "r" (input), "r" (output)
|
||||
: "memory", "cc", "xmm0", "xmm1" );
|
||||
@ -121,44 +151,44 @@ void aesni_gcm_mult( unsigned char c[16],
|
||||
bb[i] = b[15 - i];
|
||||
}
|
||||
|
||||
asm( "movdqu (%0), %%xmm0 \n" // a1:a0
|
||||
"movdqu (%1), %%xmm1 \n" // b1:b0
|
||||
asm( "movdqu (%0), %%xmm0 \n\t" // a1:a0
|
||||
"movdqu (%1), %%xmm1 \n\t" // b1:b0
|
||||
|
||||
/*
|
||||
* Caryless multiplication xmm2:xmm1 = xmm0 * xmm1
|
||||
* using [CLMUL-WP] algorithm 1 (p. 13).
|
||||
*/
|
||||
"movdqa %%xmm1, %%xmm2 \n" // copy of b1:b0
|
||||
"movdqa %%xmm1, %%xmm3 \n" // same
|
||||
"movdqa %%xmm1, %%xmm4 \n" // same
|
||||
"pclmulqdq $0x00, %%xmm0, %%xmm1 \n" // a0*b0 = c1:c0
|
||||
"pclmulqdq $0x11, %%xmm0, %%xmm2 \n" // a1*b1 = d1:d0
|
||||
"pclmulqdq $0x10, %%xmm0, %%xmm3 \n" // a0*b1 = e1:e0
|
||||
"pclmulqdq $0x01, %%xmm0, %%xmm4 \n" // a1*b0 = f1:f0
|
||||
"pxor %%xmm3, %%xmm4 \n" // e1+f1:e0+f0
|
||||
"movdqa %%xmm4, %%xmm3 \n" // same
|
||||
"psrldq $8, %%xmm4 \n" // 0:e1+f1
|
||||
"pslldq $8, %%xmm3 \n" // e0+f0:0
|
||||
"pxor %%xmm4, %%xmm2 \n" // d1:d0+e1+f1
|
||||
"pxor %%xmm3, %%xmm1 \n" // c1+e0+f1:c0
|
||||
"movdqa %%xmm1, %%xmm2 \n\t" // copy of b1:b0
|
||||
"movdqa %%xmm1, %%xmm3 \n\t" // same
|
||||
"movdqa %%xmm1, %%xmm4 \n\t" // same
|
||||
PCLMULQDQ xmm0_xmm1 ",0x00 \n\t" // a0*b0 = c1:c0
|
||||
PCLMULQDQ xmm0_xmm2 ",0x11 \n\t" // a1*b1 = d1:d0
|
||||
PCLMULQDQ xmm0_xmm3 ",0x10 \n\t" // a0*b1 = e1:e0
|
||||
PCLMULQDQ xmm0_xmm4 ",0x01 \n\t" // a1*b0 = f1:f0
|
||||
"pxor %%xmm3, %%xmm4 \n\t" // e1+f1:e0+f0
|
||||
"movdqa %%xmm4, %%xmm3 \n\t" // same
|
||||
"psrldq $8, %%xmm4 \n\t" // 0:e1+f1
|
||||
"pslldq $8, %%xmm3 \n\t" // e0+f0:0
|
||||
"pxor %%xmm4, %%xmm2 \n\t" // d1:d0+e1+f1
|
||||
"pxor %%xmm3, %%xmm1 \n\t" // c1+e0+f1:c0
|
||||
|
||||
/*
|
||||
* Now shift the result one bit to the left,
|
||||
* taking advantage of [CLMUL-WP] eq 27 (p. 20)
|
||||
*/
|
||||
"movdqa %%xmm1, %%xmm3 \n" // r1:r0
|
||||
"movdqa %%xmm2, %%xmm4 \n" // r3:r2
|
||||
"psllq $1, %%xmm1 \n" // r1<<1:r0<<1
|
||||
"psllq $1, %%xmm2 \n" // r3<<1:r2<<1
|
||||
"psrlq $63, %%xmm3 \n" // r1>>63:r0>>63
|
||||
"psrlq $63, %%xmm4 \n" // r3>>63:r2>>63
|
||||
"movdqa %%xmm3, %%xmm5 \n" // r1>>63:r0>>63
|
||||
"pslldq $8, %%xmm3 \n" // r0>>63:0
|
||||
"pslldq $8, %%xmm4 \n" // r2>>63:0
|
||||
"psrldq $8, %%xmm5 \n" // 0:r1>>63
|
||||
"por %%xmm3, %%xmm1 \n" // r1<<1|r0>>63:r0<<1
|
||||
"por %%xmm4, %%xmm2 \n" // r3<<1|r2>>62:r2<<1
|
||||
"por %%xmm5, %%xmm2 \n" // r3<<1|r2>>62:r2<<1|r1>>63
|
||||
"movdqa %%xmm1, %%xmm3 \n\t" // r1:r0
|
||||
"movdqa %%xmm2, %%xmm4 \n\t" // r3:r2
|
||||
"psllq $1, %%xmm1 \n\t" // r1<<1:r0<<1
|
||||
"psllq $1, %%xmm2 \n\t" // r3<<1:r2<<1
|
||||
"psrlq $63, %%xmm3 \n\t" // r1>>63:r0>>63
|
||||
"psrlq $63, %%xmm4 \n\t" // r3>>63:r2>>63
|
||||
"movdqa %%xmm3, %%xmm5 \n\t" // r1>>63:r0>>63
|
||||
"pslldq $8, %%xmm3 \n\t" // r0>>63:0
|
||||
"pslldq $8, %%xmm4 \n\t" // r2>>63:0
|
||||
"psrldq $8, %%xmm5 \n\t" // 0:r1>>63
|
||||
"por %%xmm3, %%xmm1 \n\t" // r1<<1|r0>>63:r0<<1
|
||||
"por %%xmm4, %%xmm2 \n\t" // r3<<1|r2>>62:r2<<1
|
||||
"por %%xmm5, %%xmm2 \n\t" // r3<<1|r2>>62:r2<<1|r1>>63
|
||||
|
||||
/*
|
||||
* Now reduce modulo the GCM polynomial x^128 + x^7 + x^2 + x + 1
|
||||
@ -166,44 +196,44 @@ void aesni_gcm_mult( unsigned char c[16],
|
||||
* Currently xmm2:xmm1 holds x3:x2:x1:x0 (already shifted).
|
||||
*/
|
||||
/* Step 2 (1) */
|
||||
"movdqa %%xmm1, %%xmm3 \n" // x1:x0
|
||||
"movdqa %%xmm1, %%xmm4 \n" // same
|
||||
"movdqa %%xmm1, %%xmm5 \n" // same
|
||||
"psllq $63, %%xmm3 \n" // x1<<63:x0<<63 = stuff:a
|
||||
"psllq $62, %%xmm4 \n" // x1<<62:x0<<62 = stuff:b
|
||||
"psllq $57, %%xmm5 \n" // x1<<57:x0<<57 = stuff:c
|
||||
"movdqa %%xmm1, %%xmm3 \n\t" // x1:x0
|
||||
"movdqa %%xmm1, %%xmm4 \n\t" // same
|
||||
"movdqa %%xmm1, %%xmm5 \n\t" // same
|
||||
"psllq $63, %%xmm3 \n\t" // x1<<63:x0<<63 = stuff:a
|
||||
"psllq $62, %%xmm4 \n\t" // x1<<62:x0<<62 = stuff:b
|
||||
"psllq $57, %%xmm5 \n\t" // x1<<57:x0<<57 = stuff:c
|
||||
|
||||
/* Step 2 (2) */
|
||||
"pxor %%xmm4, %%xmm3 \n" // stuff:a+b
|
||||
"pxor %%xmm5, %%xmm3 \n" // stuff:a+b+c
|
||||
"pslldq $8, %%xmm3 \n" // a+b+c:0
|
||||
"pxor %%xmm3, %%xmm1 \n" // x1+a+b+c:x0 = d:x0
|
||||
"pxor %%xmm4, %%xmm3 \n\t" // stuff:a+b
|
||||
"pxor %%xmm5, %%xmm3 \n\t" // stuff:a+b+c
|
||||
"pslldq $8, %%xmm3 \n\t" // a+b+c:0
|
||||
"pxor %%xmm3, %%xmm1 \n\t" // x1+a+b+c:x0 = d:x0
|
||||
|
||||
/* Steps 3 and 4 */
|
||||
"movdqa %%xmm1,%%xmm0 \n" // d:x0
|
||||
"movdqa %%xmm1,%%xmm4 \n" // same
|
||||
"movdqa %%xmm1,%%xmm5 \n" // same
|
||||
"psrlq $1, %%xmm0 \n" // e1:x0>>1 = e1:e0'
|
||||
"psrlq $2, %%xmm4 \n" // f1:x0>>2 = f1:f0'
|
||||
"psrlq $7, %%xmm5 \n" // g1:x0>>7 = g1:g0'
|
||||
"pxor %%xmm4, %%xmm0 \n" // e1+f1:e0'+f0'
|
||||
"pxor %%xmm5, %%xmm0 \n" // e1+f1+g1:e0'+f0'+g0'
|
||||
// e0'+f0'+g0' is almost e0+f0+g0, except for some missing
|
||||
// bits carried from d. Now get those bits back in.
|
||||
"movdqa %%xmm1,%%xmm3 \n" // d:x0
|
||||
"movdqa %%xmm1,%%xmm4 \n" // same
|
||||
"movdqa %%xmm1,%%xmm5 \n" // same
|
||||
"psllq $63, %%xmm3 \n" // d<<63:stuff
|
||||
"psllq $62, %%xmm4 \n" // d<<62:stuff
|
||||
"psllq $57, %%xmm5 \n" // d<<57:stuff
|
||||
"pxor %%xmm4, %%xmm3 \n" // d<<63+d<<62:stuff
|
||||
"pxor %%xmm5, %%xmm3 \n" // missing bits of d:stuff
|
||||
"psrldq $8, %%xmm3 \n" // 0:missing bits of d
|
||||
"pxor %%xmm3, %%xmm0 \n" // e1+f1+g1:e0+f0+g0
|
||||
"pxor %%xmm1, %%xmm0 \n" // h1:h0
|
||||
"pxor %%xmm2, %%xmm0 \n" // x3+h1:x2+h0
|
||||
"movdqa %%xmm1,%%xmm0 \n\t" // d:x0
|
||||
"movdqa %%xmm1,%%xmm4 \n\t" // same
|
||||
"movdqa %%xmm1,%%xmm5 \n\t" // same
|
||||
"psrlq $1, %%xmm0 \n\t" // e1:x0>>1 = e1:e0'
|
||||
"psrlq $2, %%xmm4 \n\t" // f1:x0>>2 = f1:f0'
|
||||
"psrlq $7, %%xmm5 \n\t" // g1:x0>>7 = g1:g0'
|
||||
"pxor %%xmm4, %%xmm0 \n\t" // e1+f1:e0'+f0'
|
||||
"pxor %%xmm5, %%xmm0 \n\t" // e1+f1+g1:e0'+f0'+g0'
|
||||
// e0'+f0'+g0' is almost e0+f0+g0, ex\tcept for some missing
|
||||
// bits carried from d. Now get those\t bits back in.
|
||||
"movdqa %%xmm1,%%xmm3 \n\t" // d:x0
|
||||
"movdqa %%xmm1,%%xmm4 \n\t" // same
|
||||
"movdqa %%xmm1,%%xmm5 \n\t" // same
|
||||
"psllq $63, %%xmm3 \n\t" // d<<63:stuff
|
||||
"psllq $62, %%xmm4 \n\t" // d<<62:stuff
|
||||
"psllq $57, %%xmm5 \n\t" // d<<57:stuff
|
||||
"pxor %%xmm4, %%xmm3 \n\t" // d<<63+d<<62:stuff
|
||||
"pxor %%xmm5, %%xmm3 \n\t" // missing bits of d:stuff
|
||||
"psrldq $8, %%xmm3 \n\t" // 0:missing bits of d
|
||||
"pxor %%xmm3, %%xmm0 \n\t" // e1+f1+g1:e0+f0+g0
|
||||
"pxor %%xmm1, %%xmm0 \n\t" // h1:h0
|
||||
"pxor %%xmm2, %%xmm0 \n\t" // x3+h1:x2+h0
|
||||
|
||||
"movdqu %%xmm0, (%2) \n" // done
|
||||
"movdqu %%xmm0, (%2) \n\t" // done
|
||||
:
|
||||
: "r" (aa), "r" (bb), "r" (cc)
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" );
|
||||
@ -227,9 +257,9 @@ void aesni_inverse_key( unsigned char *invkey,
|
||||
memcpy( ik, fk, 16 );
|
||||
|
||||
for( fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16 )
|
||||
asm( "movdqu (%0), %%xmm0 \n"
|
||||
"aesimc %%xmm0, %%xmm0 \n"
|
||||
"movdqu %%xmm0, (%1) \n"
|
||||
asm( "movdqu (%0), %%xmm0 \n\t"
|
||||
AESIMC xmm0_xmm0 "\n\t"
|
||||
"movdqu %%xmm0, (%1) \n\t"
|
||||
:
|
||||
: "r" (fk), "r" (ik)
|
||||
: "memory", "xmm0" );
|
||||
@ -243,9 +273,9 @@ void aesni_inverse_key( unsigned char *invkey,
|
||||
static void aesni_setkey_enc_128( unsigned char *rk,
|
||||
const unsigned char *key )
|
||||
{
|
||||
asm( "movdqu (%1), %%xmm0 \n" // copy the original key
|
||||
"movdqu %%xmm0, (%0) \n" // as round key 0
|
||||
"jmp 2f \n" // skip auxiliary routine
|
||||
asm( "movdqu (%1), %%xmm0 \n\t" // copy the original key
|
||||
"movdqu %%xmm0, (%0) \n\t" // as round key 0
|
||||
"jmp 2f \n\t" // skip auxiliary routine
|
||||
|
||||
/*
|
||||
* Finish generating the next round key.
|
||||
@ -257,31 +287,31 @@ static void aesni_setkey_enc_128( unsigned char *rk,
|
||||
* with r4 = X + r0, r5 = r4 + r1, r6 = r5 + r2, r7 = r6 + r3
|
||||
* and those are written to the round key buffer.
|
||||
*/
|
||||
"1: \n"
|
||||
"pshufd $0xff, %%xmm1, %%xmm1 \n" // X:X:X:X
|
||||
"pxor %%xmm0, %%xmm1 \n" // X+r3:X+r2:X+r1:r4
|
||||
"pslldq $4, %%xmm0 \n" // r2:r1:r0:0
|
||||
"pxor %%xmm0, %%xmm1 \n" // X+r3+r2:X+r2+r1:r5:r4
|
||||
"pslldq $4, %%xmm0 \n" // etc
|
||||
"pxor %%xmm0, %%xmm1 \n"
|
||||
"pslldq $4, %%xmm0 \n"
|
||||
"pxor %%xmm1, %%xmm0 \n" // update xmm0 for next time!
|
||||
"add $16, %0 \n" // point to next round key
|
||||
"movdqu %%xmm0, (%0) \n" // write it
|
||||
"ret \n"
|
||||
"1: \n\t"
|
||||
"pshufd $0xff, %%xmm1, %%xmm1 \n\t" // X:X:X:X
|
||||
"pxor %%xmm0, %%xmm1 \n\t" // X+r3:X+r2:X+r1:r4
|
||||
"pslldq $4, %%xmm0 \n\t" // r2:r1:r0:0
|
||||
"pxor %%xmm0, %%xmm1 \n\t" // X+r3+r2:X+r2+r1:r5:r4
|
||||
"pslldq $4, %%xmm0 \n\t" // etc
|
||||
"pxor %%xmm0, %%xmm1 \n\t"
|
||||
"pslldq $4, %%xmm0 \n\t"
|
||||
"pxor %%xmm1, %%xmm0 \n\t" // update xmm0 for next time!
|
||||
"add $16, %0 \n\t" // point to next round key
|
||||
"movdqu %%xmm0, (%0) \n\t" // write it
|
||||
"ret \n\t"
|
||||
|
||||
/* Main "loop" */
|
||||
"2: \n"
|
||||
"aeskeygenassist $0x01, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x02, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x04, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x08, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x10, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x20, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x40, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x80, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x1B, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x36, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"2: \n\t"
|
||||
AESKEYGENA xmm0_xmm1 ",0x01 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm0_xmm1 ",0x02 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm0_xmm1 ",0x04 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm0_xmm1 ",0x08 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm0_xmm1 ",0x10 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm0_xmm1 ",0x20 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm0_xmm1 ",0x40 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm0_xmm1 ",0x80 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm0_xmm1 ",0x1B \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm0_xmm1 ",0x36 \n\tcall 1b \n\t"
|
||||
:
|
||||
: "r" (rk), "r" (key)
|
||||
: "memory", "cc", "0" );
|
||||
@ -293,13 +323,13 @@ static void aesni_setkey_enc_128( unsigned char *rk,
|
||||
static void aesni_setkey_enc_192( unsigned char *rk,
|
||||
const unsigned char *key )
|
||||
{
|
||||
asm( "movdqu (%1), %%xmm0 \n" // copy original round key
|
||||
"movdqu %%xmm0, (%0) \n"
|
||||
"add $16, %0 \n"
|
||||
"movq 16(%1), %%xmm1 \n"
|
||||
"movq %%xmm1, (%0) \n"
|
||||
"add $8, %0 \n"
|
||||
"jmp 2f \n" // skip auxiliary routine
|
||||
asm( "movdqu (%1), %%xmm0 \n\t" // copy original round key
|
||||
"movdqu %%xmm0, (%0) \n\t"
|
||||
"add $16, %0 \n\t"
|
||||
"movq 16(%1), %%xmm1 \n\t"
|
||||
"movq %%xmm1, (%0) \n\t"
|
||||
"add $8, %0 \n\t"
|
||||
"jmp 2f \n\t" // skip auxiliary routine
|
||||
|
||||
/*
|
||||
* Finish generating the next 6 quarter-keys.
|
||||
@ -310,34 +340,34 @@ static void aesni_setkey_enc_192( unsigned char *rk,
|
||||
* On exit, xmm0 is r9:r8:r7:r6 and xmm1 is stuff:stuff:r11:r10
|
||||
* and those are written to the round key buffer.
|
||||
*/
|
||||
"1: \n"
|
||||
"pshufd $0x55, %%xmm2, %%xmm2 \n" // X:X:X:X
|
||||
"pxor %%xmm0, %%xmm2 \n" // X+r3:X+r2:X+r1:r4
|
||||
"pslldq $4, %%xmm0 \n" // etc
|
||||
"pxor %%xmm0, %%xmm2 \n"
|
||||
"pslldq $4, %%xmm0 \n"
|
||||
"pxor %%xmm0, %%xmm2 \n"
|
||||
"pslldq $4, %%xmm0 \n"
|
||||
"pxor %%xmm2, %%xmm0 \n" // update xmm0 = r9:r8:r7:r6
|
||||
"movdqu %%xmm0, (%0) \n"
|
||||
"add $16, %0 \n"
|
||||
"pshufd $0xff, %%xmm0, %%xmm2 \n" // r9:r9:r9:r9
|
||||
"pxor %%xmm1, %%xmm2 \n" // stuff:stuff:r9+r5:r10
|
||||
"pslldq $4, %%xmm1 \n" // r2:r1:r0:0
|
||||
"pxor %%xmm2, %%xmm1 \n" // update xmm1 = stuff:stuff:r11:r10
|
||||
"movq %%xmm1, (%0) \n"
|
||||
"add $8, %0 \n"
|
||||
"ret \n"
|
||||
"1: \n\t"
|
||||
"pshufd $0x55, %%xmm2, %%xmm2 \n\t" // X:X:X:X
|
||||
"pxor %%xmm0, %%xmm2 \n\t" // X+r3:X+r2:X+r1:r4
|
||||
"pslldq $4, %%xmm0 \n\t" // etc
|
||||
"pxor %%xmm0, %%xmm2 \n\t"
|
||||
"pslldq $4, %%xmm0 \n\t"
|
||||
"pxor %%xmm0, %%xmm2 \n\t"
|
||||
"pslldq $4, %%xmm0 \n\t"
|
||||
"pxor %%xmm2, %%xmm0 \n\t" // update xmm0 = r9:r8:r7:r6
|
||||
"movdqu %%xmm0, (%0) \n\t"
|
||||
"add $16, %0 \n\t"
|
||||
"pshufd $0xff, %%xmm0, %%xmm2 \n\t" // r9:r9:r9:r9
|
||||
"pxor %%xmm1, %%xmm2 \n\t" // stuff:stuff:r9+r5:r10
|
||||
"pslldq $4, %%xmm1 \n\t" // r2:r1:r0:0
|
||||
"pxor %%xmm2, %%xmm1 \n\t" // xmm1 = stuff:stuff:r11:r10
|
||||
"movq %%xmm1, (%0) \n\t"
|
||||
"add $8, %0 \n\t"
|
||||
"ret \n\t"
|
||||
|
||||
"2: \n"
|
||||
"aeskeygenassist $0x01, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x02, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x04, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x08, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x10, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x20, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x40, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x80, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"2: \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x01 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x02 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x04 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x08 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x10 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x20 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x40 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x80 \n\tcall 1b \n\t"
|
||||
|
||||
:
|
||||
: "r" (rk), "r" (key)
|
||||
@ -350,12 +380,12 @@ static void aesni_setkey_enc_192( unsigned char *rk,
|
||||
static void aesni_setkey_enc_256( unsigned char *rk,
|
||||
const unsigned char *key )
|
||||
{
|
||||
asm( "movdqu (%1), %%xmm0 \n"
|
||||
"movdqu %%xmm0, (%0) \n"
|
||||
"add $16, %0 \n"
|
||||
"movdqu 16(%1), %%xmm1 \n"
|
||||
"movdqu %%xmm1, (%0) \n"
|
||||
"jmp 2f \n" // skip auxiliary routine
|
||||
asm( "movdqu (%1), %%xmm0 \n\t"
|
||||
"movdqu %%xmm0, (%0) \n\t"
|
||||
"add $16, %0 \n\t"
|
||||
"movdqu 16(%1), %%xmm1 \n\t"
|
||||
"movdqu %%xmm1, (%0) \n\t"
|
||||
"jmp 2f \n\t" // skip auxiliary routine
|
||||
|
||||
/*
|
||||
* Finish generating the next two round keys.
|
||||
@ -366,45 +396,45 @@ static void aesni_setkey_enc_256( unsigned char *rk,
|
||||
* On exit, xmm0 is r11:r10:r9:r8 and xmm1 is r15:r14:r13:r12
|
||||
* and those have been written to the output buffer.
|
||||
*/
|
||||
"1: \n"
|
||||
"pshufd $0xff, %%xmm2, %%xmm2 \n"
|
||||
"pxor %%xmm0, %%xmm2 \n"
|
||||
"pslldq $4, %%xmm0 \n"
|
||||
"pxor %%xmm0, %%xmm2 \n"
|
||||
"pslldq $4, %%xmm0 \n"
|
||||
"pxor %%xmm0, %%xmm2 \n"
|
||||
"pslldq $4, %%xmm0 \n"
|
||||
"pxor %%xmm2, %%xmm0 \n"
|
||||
"add $16, %0 \n"
|
||||
"movdqu %%xmm0, (%0) \n"
|
||||
"1: \n\t"
|
||||
"pshufd $0xff, %%xmm2, %%xmm2 \n\t"
|
||||
"pxor %%xmm0, %%xmm2 \n\t"
|
||||
"pslldq $4, %%xmm0 \n\t"
|
||||
"pxor %%xmm0, %%xmm2 \n\t"
|
||||
"pslldq $4, %%xmm0 \n\t"
|
||||
"pxor %%xmm0, %%xmm2 \n\t"
|
||||
"pslldq $4, %%xmm0 \n\t"
|
||||
"pxor %%xmm2, %%xmm0 \n\t"
|
||||
"add $16, %0 \n\t"
|
||||
"movdqu %%xmm0, (%0) \n\t"
|
||||
|
||||
/* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
|
||||
* and proceed to generate next round key from there */
|
||||
"aeskeygenassist $0, %%xmm0, %%xmm2\n"
|
||||
"pshufd $0xaa, %%xmm2, %%xmm2 \n"
|
||||
"pxor %%xmm1, %%xmm2 \n"
|
||||
"pslldq $4, %%xmm1 \n"
|
||||
"pxor %%xmm1, %%xmm2 \n"
|
||||
"pslldq $4, %%xmm1 \n"
|
||||
"pxor %%xmm1, %%xmm2 \n"
|
||||
"pslldq $4, %%xmm1 \n"
|
||||
"pxor %%xmm2, %%xmm1 \n"
|
||||
"add $16, %0 \n"
|
||||
"movdqu %%xmm1, (%0) \n"
|
||||
"ret \n"
|
||||
AESKEYGENA xmm0_xmm2 ",0x00 \n\t"
|
||||
"pshufd $0xaa, %%xmm2, %%xmm2 \n\t"
|
||||
"pxor %%xmm1, %%xmm2 \n\t"
|
||||
"pslldq $4, %%xmm1 \n\t"
|
||||
"pxor %%xmm1, %%xmm2 \n\t"
|
||||
"pslldq $4, %%xmm1 \n\t"
|
||||
"pxor %%xmm1, %%xmm2 \n\t"
|
||||
"pslldq $4, %%xmm1 \n\t"
|
||||
"pxor %%xmm2, %%xmm1 \n\t"
|
||||
"add $16, %0 \n\t"
|
||||
"movdqu %%xmm1, (%0) \n\t"
|
||||
"ret \n\t"
|
||||
|
||||
/*
|
||||
* Main "loop" - Generating one more key than necessary,
|
||||
* see definition of aes_context.buf
|
||||
*/
|
||||
"2: \n"
|
||||
"aeskeygenassist $0x01, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x02, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x04, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x08, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x10, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x20, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x40, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"2: \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x01 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x02 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x04 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x08 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x10 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x20 \n\tcall 1b \n\t"
|
||||
AESKEYGENA xmm1_xmm2 ",0x40 \n\tcall 1b \n\t"
|
||||
:
|
||||
: "r" (rk), "r" (key)
|
||||
: "memory", "cc", "0" );
|
||||
|
Reference in New Issue
Block a user