diff --git a/src/DSi_AES.h b/src/DSi_AES.h index 9baf747e..f3aa550c 100644 --- a/src/DSi_AES.h +++ b/src/DSi_AES.h @@ -28,14 +28,14 @@ // NOTE: Yes, the compiler does *not* recognize this code pattern, so it is indeed an optimization. __attribute((always_inline)) static void Bswap128(void* Dst, void* Src) { - *(__int128*)&Dst = __builtin_bswap128(*(__int128*)&Src); + *(__int128*)Dst = __builtin_bswap128(*(__int128*)Src); } #else __attribute((always_inline)) static void Bswap128(void* Dst, void* Src) { for (int i = 0; i < 16; ++i) { - ((char*)Src)[i] = ((char*)Src)[15 - i]; + ((u8*)Dst)[i] = ((u8*)Src)[15 - i]; } } #endif diff --git a/src/DSi_NAND.cpp b/src/DSi_NAND.cpp index 21da7ed3..df9cdd2b 100644 --- a/src/DSi_NAND.cpp +++ b/src/DSi_NAND.cpp @@ -337,13 +337,16 @@ bool ESEncrypt(u8* data, u32 len) { u8 rem[16]; - Bswap128(rem, &data[coarselen]); + memset(rem, 0, 16); + for (int i = 0; i < remlen; i++) + rem[15-i] = data[coarselen+i]; for (int i = 0; i < 16; i++) mac[i] ^= rem[i]; AES_CTR_xcrypt_buffer(&ctx, rem, sizeof(rem)); AES_ECB_encrypt(&ctx, mac); - Bswap128(&data[coarselen], rem); + for (int i = 0; i < remlen; i++) + data[coarselen+i] = rem[15-i]; } ctx.Iv[13] = 0x00; @@ -424,14 +427,20 @@ bool ESDecrypt(u8* data, u32 len) iv[14] = (ivnum >> 8) & 0xFF; iv[15] = ivnum & 0xFF; - Bswap128(rem, &data[coarselen]); + memset(rem, 0, 16); + AES_ctx_set_iv(&ctx, iv); + AES_CTR_xcrypt_buffer(&ctx, rem, 16); + + for (int i = 0; i < remlen; i++) + rem[15-i] = data[coarselen+i]; AES_ctx_set_iv(&ctx, iv); AES_CTR_xcrypt_buffer(&ctx, rem, 16); for (int i = 0; i < 16; i++) mac[i] ^= rem[i]; AES_ECB_encrypt(&ctx, mac); - Bswap128(&data[coarselen], rem); + for (int i = 0; i < remlen; i++) + data[coarselen+i] = rem[15-i]; } ctx.Iv[13] = 0x00;