arm7 muls carry flag emulation.

This commit is contained in:
Jaklyy 2024-11-06 00:18:29 -05:00
parent 3c7db9b21f
commit 3d49f5f256
2 changed files with 146 additions and 9 deletions

View File

@ -19,6 +19,7 @@
#include <stdio.h>
#include "ARM.h"
#include "NDS.h"
#include "ARMInterpreter_MultiplySuperLLE.h"
namespace melonDS::ARMInterpreter
{
@ -854,7 +855,6 @@ void A_MUL(ARM* cpu)
{
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
@ -866,6 +866,7 @@ void A_MUL(ARM* cpu)
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3;
else cycles = 4;
if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, 0, cycles==4));
}
cpu->AddCycles_CI(cycles);
@ -886,7 +887,6 @@ void A_MLA(ARM* cpu)
{
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
@ -898,6 +898,7 @@ void A_MLA(ARM* cpu)
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, rn, cycles==5));
}
cpu->AddCycles_CI(cycles);
@ -919,7 +920,6 @@ void A_UMULL(ARM* cpu)
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
@ -931,6 +931,7 @@ void A_UMULL(ARM* cpu)
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
else cycles = 5;
if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(0, rm, rs, cycles==5));
}
cpu->AddCycles_CI(cycles);
@ -955,7 +956,6 @@ void A_UMLAL(ARM* cpu)
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
@ -967,6 +967,7 @@ void A_UMLAL(ARM* cpu)
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
else cycles = 5;
if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(rd, rm, rs, cycles==5));
}
cpu->AddCycles_CI(cycles);
@ -988,7 +989,6 @@ void A_SMULL(ARM* cpu)
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
@ -1000,6 +1000,7 @@ void A_SMULL(ARM* cpu)
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(0, rm, rs, cycles==5));
}
cpu->AddCycles_CI(cycles);
@ -1024,7 +1025,6 @@ void A_SMLAL(ARM* cpu)
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
@ -1036,6 +1036,7 @@ void A_SMLAL(ARM* cpu)
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(rd, rm, rs, cycles==5));
}
cpu->AddCycles_CI(cycles);
@ -1575,18 +1576,18 @@ void T_MUL_REG(ARM* cpu)
cpu->SetNZ(res & 0x80000000,
!res);
s32 cycles = 0;
s32 cycles;
if (cpu->Num == 0)
{
cycles += 3;
cycles = 3;
}
else
{
cpu->SetC(0); // carry flag destroyed, they say. whatever that means...
if ((a & 0xFFFFFF00) == 0x00000000 || (a & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1;
else if ((a & 0xFFFF0000) == 0x00000000 || (a & 0xFFFF0000) == 0xFFFF0000) cycles = 2;
else if ((a & 0xFF000000) == 0x00000000 || (a & 0xFF000000) == 0xFF000000) cycles = 3;
else cycles = 4;
cpu->SetC(MULSCarry(b, a, 0, cycles==4)); // carry flag destroyed, they say. whatever that means...
}
cpu->AddCycles_CI(cycles);
}

View File

@ -0,0 +1,136 @@
#ifndef ARMINTERPRETER_MULTIPLYSUPERLLE_H
#define ARMINTERPRETER_MULTIPLYSUPERLLE_H
#include "types.h"
using namespace melonDS;
/*
Copyright (c) 2024 zaydlang
This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software.
If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
// code taken from: (also features a few alternative implementations that could maybe be worth looking at?)
// https://github.com/calc84maniac/multiplication-algorithm/blob/master/impl_opt.h
// based on research that can be found here: https://bmchtech.github.io/post/multiply/
// the code in this file is dedicated to handling the calculation of the carry flag for multiplication (S variant) instructions on the ARM7TDMI.
// Takes a multiplier between -0x01000000 and 0x00FFFFFF, cycles between 0 and 2
static inline bool booths_multiplication32_opt(u32 multiplicand, u32 multiplier, u32 accumulator) {
// Set the low bit of the multiplicand to cause negation to invert the upper bits, this bit can't propagate to bit 31
multiplicand |= 1;
// Optimized first iteration
u32 booth = (s32)(multiplier << 31) >> 31;
u32 carry = booth * multiplicand;
// Pre-populate accumulator for output
u32 output = accumulator;
u32 sum = output + carry;
int shift = 29;
do {
for (int i = 0; i < 4; i++, shift -= 2) {
// Get next booth factor (-2 to 2, shifted left by 30-shift)
u32 next_booth = (s32)(multiplier << shift) >> shift;
u32 factor = next_booth - booth;
booth = next_booth;
// Get scaled value of booth addend
u32 addend = multiplicand * factor;
// Combine the addend with the CSA
// Not performing any masking seems to work because the lower carries can't propagate to bit 31
output ^= carry ^ addend;
sum += addend;
carry = sum - output;
}
} while (booth != multiplier);
return carry >> 31;
}
// Takes a multiplicand shifted right by 6 and a multiplier shifted right by 26 (zero or sign extended)
static inline bool booths_multiplication64_opt(u32 multiplicand, u32 multiplier, u32 accum_hi) {
// Skipping the first 14 iterations seems to work because the lower carries can't propagate to bit 63
// This means only magic bits 62-61 are needed (which requires decoding 3 booth chunks),
// and only the last two booth iterations are needed
// Set the low bit of the multiplicand to cause negation to invert the upper bits
multiplicand |= 1;
// Pre-populate magic bit 61 for carry
u32 carry = ~accum_hi & UINT32_C(0x20000000);
// Pre-populate magic bits 63-60 for output (with carry magic pre-added in)
u32 output = accum_hi - UINT32_C(0x08000000);
// Get factors from the top 3 booth chunks
u32 booth0 = (s32)(multiplier << 27) >> 27;
u32 booth1 = (s32)(multiplier << 29) >> 29;
u32 booth2 = (s32)(multiplier << 31) >> 31;
u32 factor0 = multiplier - booth0;
u32 factor1 = booth0 - booth1;
u32 factor2 = booth1 - booth2;
// Get scaled value of the 3rd top booth addend
u32 addend = multiplicand * factor2;
// Finalize bits 61-60 of output magic using its sign
output -= addend & UINT32_C(0x10000000);
// Get scaled value of the 2nd top booth addend
addend = multiplicand * factor1;
// Finalize bits 63-62 of output magic using its sign
output -= addend & UINT32_C(0x40000000);
// Get the carry from the CSA in bit 61 and propagate it to bit 62, which is not processed in this iteration
u32 sum = output + (addend & UINT32_C(0x20000000));
// Subtract out the carry magic to get the actual output magic
output -= carry;
// Get scaled value of the 1st top booth addend
addend = multiplicand * factor0;
// Add to bit 62 and propagate the carry
sum += addend & UINT32_C(0x40000000);
// Cancel out the output magic bit 63 to get the carry bit 63
return (sum ^ output) >> 31;
}
// also for MLAS and MUL (thumb ver.)
inline bool MULSCarry(s32 rm, s32 rs, u32 rn, bool lastcycle)
{
if (lastcycle)
return (rs >> 30) == -2;
else
return booths_multiplication32_opt(rm, rs, rn);
}
// also for UMLALS
inline bool UMULLSCarry(u64 rd, u32 rm, u32 rs, bool lastcycle)
{
if (lastcycle)
return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32);
else
return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF);
}
// also for SMLALS
inline bool SMULLSCarry(u64 rd, s32 rm, s32 rs, bool lastcycle)
{
if (lastcycle)
return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32);
else
return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF);
}
#endif