mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2024-11-14 13:27:41 -07:00
arm7 muls carry flag emulation.
This commit is contained in:
parent
3c7db9b21f
commit
3d49f5f256
@ -19,6 +19,7 @@
|
||||
#include <stdio.h>
|
||||
#include "ARM.h"
|
||||
#include "NDS.h"
|
||||
#include "ARMInterpreter_MultiplySuperLLE.h"
|
||||
|
||||
namespace melonDS::ARMInterpreter
|
||||
{
|
||||
@ -854,7 +855,6 @@ void A_MUL(ARM* cpu)
|
||||
{
|
||||
cpu->SetNZ(res & 0x80000000,
|
||||
!res);
|
||||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
@ -866,6 +866,7 @@ void A_MUL(ARM* cpu)
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2;
|
||||
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3;
|
||||
else cycles = 4;
|
||||
if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, 0, cycles==4));
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
@ -886,7 +887,6 @@ void A_MLA(ARM* cpu)
|
||||
{
|
||||
cpu->SetNZ(res & 0x80000000,
|
||||
!res);
|
||||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
@ -898,6 +898,7 @@ void A_MLA(ARM* cpu)
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
|
||||
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
|
||||
else cycles = 5;
|
||||
if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, rn, cycles==5));
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
@ -919,7 +920,6 @@ void A_UMULL(ARM* cpu)
|
||||
{
|
||||
cpu->SetNZ((u32)(res >> 63ULL),
|
||||
!res);
|
||||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
@ -931,6 +931,7 @@ void A_UMULL(ARM* cpu)
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
|
||||
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
|
||||
else cycles = 5;
|
||||
if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(0, rm, rs, cycles==5));
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
@ -955,7 +956,6 @@ void A_UMLAL(ARM* cpu)
|
||||
{
|
||||
cpu->SetNZ((u32)(res >> 63ULL),
|
||||
!res);
|
||||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
@ -967,6 +967,7 @@ void A_UMLAL(ARM* cpu)
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
|
||||
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
|
||||
else cycles = 5;
|
||||
if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(rd, rm, rs, cycles==5));
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
@ -988,7 +989,6 @@ void A_SMULL(ARM* cpu)
|
||||
{
|
||||
cpu->SetNZ((u32)(res >> 63ULL),
|
||||
!res);
|
||||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
@ -1000,6 +1000,7 @@ void A_SMULL(ARM* cpu)
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
|
||||
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
|
||||
else cycles = 5;
|
||||
if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(0, rm, rs, cycles==5));
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
@ -1024,7 +1025,6 @@ void A_SMLAL(ARM* cpu)
|
||||
{
|
||||
cpu->SetNZ((u32)(res >> 63ULL),
|
||||
!res);
|
||||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
@ -1036,6 +1036,7 @@ void A_SMLAL(ARM* cpu)
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
|
||||
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
|
||||
else cycles = 5;
|
||||
if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(rd, rm, rs, cycles==5));
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
@ -1575,18 +1576,18 @@ void T_MUL_REG(ARM* cpu)
|
||||
cpu->SetNZ(res & 0x80000000,
|
||||
!res);
|
||||
|
||||
s32 cycles = 0;
|
||||
s32 cycles;
|
||||
if (cpu->Num == 0)
|
||||
{
|
||||
cycles += 3;
|
||||
cycles = 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
cpu->SetC(0); // carry flag destroyed, they say. whatever that means...
|
||||
if ((a & 0xFFFFFF00) == 0x00000000 || (a & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1;
|
||||
else if ((a & 0xFFFF0000) == 0x00000000 || (a & 0xFFFF0000) == 0xFFFF0000) cycles = 2;
|
||||
else if ((a & 0xFF000000) == 0x00000000 || (a & 0xFF000000) == 0xFF000000) cycles = 3;
|
||||
else cycles = 4;
|
||||
cpu->SetC(MULSCarry(b, a, 0, cycles==4)); // carry flag destroyed, they say. whatever that means...
|
||||
}
|
||||
cpu->AddCycles_CI(cycles);
|
||||
}
|
||||
|
136
src/ARMInterpreter_MultiplySuperLLE.h
Normal file
136
src/ARMInterpreter_MultiplySuperLLE.h
Normal file
@ -0,0 +1,136 @@
|
||||
#ifndef ARMINTERPRETER_MULTIPLYSUPERLLE_H
|
||||
#define ARMINTERPRETER_MULTIPLYSUPERLLE_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
using namespace melonDS;
|
||||
|
||||
/*
|
||||
Copyright (c) 2024 zaydlang
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software.
|
||||
If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
// code taken from: (also features a few alternative implementations that could maybe be worth looking at?)
|
||||
// https://github.com/calc84maniac/multiplication-algorithm/blob/master/impl_opt.h
|
||||
// based on research that can be found here: https://bmchtech.github.io/post/multiply/
|
||||
|
||||
// the code in this file is dedicated to handling the calculation of the carry flag for multiplication (S variant) instructions on the ARM7TDMI.
|
||||
|
||||
|
||||
// Takes a multiplier between -0x01000000 and 0x00FFFFFF, cycles between 0 and 2
|
||||
static inline bool booths_multiplication32_opt(u32 multiplicand, u32 multiplier, u32 accumulator) {
|
||||
// Set the low bit of the multiplicand to cause negation to invert the upper bits, this bit can't propagate to bit 31
|
||||
multiplicand |= 1;
|
||||
|
||||
// Optimized first iteration
|
||||
u32 booth = (s32)(multiplier << 31) >> 31;
|
||||
u32 carry = booth * multiplicand;
|
||||
// Pre-populate accumulator for output
|
||||
u32 output = accumulator;
|
||||
|
||||
u32 sum = output + carry;
|
||||
int shift = 29;
|
||||
do {
|
||||
for (int i = 0; i < 4; i++, shift -= 2) {
|
||||
// Get next booth factor (-2 to 2, shifted left by 30-shift)
|
||||
u32 next_booth = (s32)(multiplier << shift) >> shift;
|
||||
u32 factor = next_booth - booth;
|
||||
booth = next_booth;
|
||||
// Get scaled value of booth addend
|
||||
u32 addend = multiplicand * factor;
|
||||
// Combine the addend with the CSA
|
||||
// Not performing any masking seems to work because the lower carries can't propagate to bit 31
|
||||
output ^= carry ^ addend;
|
||||
sum += addend;
|
||||
carry = sum - output;
|
||||
}
|
||||
} while (booth != multiplier);
|
||||
|
||||
return carry >> 31;
|
||||
}
|
||||
|
||||
// Takes a multiplicand shifted right by 6 and a multiplier shifted right by 26 (zero or sign extended)
|
||||
static inline bool booths_multiplication64_opt(u32 multiplicand, u32 multiplier, u32 accum_hi) {
|
||||
// Skipping the first 14 iterations seems to work because the lower carries can't propagate to bit 63
|
||||
// This means only magic bits 62-61 are needed (which requires decoding 3 booth chunks),
|
||||
// and only the last two booth iterations are needed
|
||||
|
||||
// Set the low bit of the multiplicand to cause negation to invert the upper bits
|
||||
multiplicand |= 1;
|
||||
|
||||
// Pre-populate magic bit 61 for carry
|
||||
u32 carry = ~accum_hi & UINT32_C(0x20000000);
|
||||
// Pre-populate magic bits 63-60 for output (with carry magic pre-added in)
|
||||
u32 output = accum_hi - UINT32_C(0x08000000);
|
||||
|
||||
// Get factors from the top 3 booth chunks
|
||||
u32 booth0 = (s32)(multiplier << 27) >> 27;
|
||||
u32 booth1 = (s32)(multiplier << 29) >> 29;
|
||||
u32 booth2 = (s32)(multiplier << 31) >> 31;
|
||||
u32 factor0 = multiplier - booth0;
|
||||
u32 factor1 = booth0 - booth1;
|
||||
u32 factor2 = booth1 - booth2;
|
||||
|
||||
// Get scaled value of the 3rd top booth addend
|
||||
u32 addend = multiplicand * factor2;
|
||||
// Finalize bits 61-60 of output magic using its sign
|
||||
output -= addend & UINT32_C(0x10000000);
|
||||
// Get scaled value of the 2nd top booth addend
|
||||
addend = multiplicand * factor1;
|
||||
// Finalize bits 63-62 of output magic using its sign
|
||||
output -= addend & UINT32_C(0x40000000);
|
||||
|
||||
// Get the carry from the CSA in bit 61 and propagate it to bit 62, which is not processed in this iteration
|
||||
u32 sum = output + (addend & UINT32_C(0x20000000));
|
||||
// Subtract out the carry magic to get the actual output magic
|
||||
output -= carry;
|
||||
|
||||
// Get scaled value of the 1st top booth addend
|
||||
addend = multiplicand * factor0;
|
||||
// Add to bit 62 and propagate the carry
|
||||
sum += addend & UINT32_C(0x40000000);
|
||||
|
||||
// Cancel out the output magic bit 63 to get the carry bit 63
|
||||
return (sum ^ output) >> 31;
|
||||
}
|
||||
|
||||
|
||||
// also for MLAS and MUL (thumb ver.)
|
||||
inline bool MULSCarry(s32 rm, s32 rs, u32 rn, bool lastcycle)
|
||||
{
|
||||
if (lastcycle)
|
||||
return (rs >> 30) == -2;
|
||||
else
|
||||
return booths_multiplication32_opt(rm, rs, rn);
|
||||
}
|
||||
|
||||
// also for UMLALS
|
||||
inline bool UMULLSCarry(u64 rd, u32 rm, u32 rs, bool lastcycle)
|
||||
{
|
||||
if (lastcycle)
|
||||
return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32);
|
||||
else
|
||||
return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF);
|
||||
}
|
||||
|
||||
// also for SMLALS
|
||||
inline bool SMULLSCarry(u64 rd, s32 rm, s32 rs, bool lastcycle)
|
||||
{
|
||||
if (lastcycle)
|
||||
return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32);
|
||||
else
|
||||
return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF);
|
||||
}
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user