diff --git a/ARMInterpreter_ALU.cpp b/ARMInterpreter_ALU.cpp index 8e897b63..88b69e75 100644 --- a/ARMInterpreter_ALU.cpp +++ b/ARMInterpreter_ALU.cpp @@ -863,6 +863,101 @@ void A_SMLAL(ARM* cpu) cpu->Cycles += cycles; } +void A_SMLAxy(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; + + if (cpu->CurInstr & (1<<5)) rm >>= 16; + else rm &= 0xFFFF; + if (cpu->CurInstr & (1<<6)) rs >>= 16; + else rs &= 0xFFFF; + + u32 res_mul = ((s16)rm * (s16)rs); + u32 res = res_mul + rn; + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + if (OVERFLOW_ADD(res_mul, rn, res)) + cpu->CPSR |= 0x08000000; +} + +void A_SMLAWy(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; + + if (cpu->CurInstr & (1<<6)) rs >>= 16; + else rs &= 0xFFFF; + + u32 res_mul = ((s32)rm * (s16)rs) >> 16; // CHECKME + u32 res = res_mul + rn; + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + if (OVERFLOW_ADD(res_mul, rn, res)) + cpu->CPSR |= 0x08000000; +} + +void A_SMULxy(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + + if (cpu->CurInstr & (1<<5)) rm >>= 16; + else rm &= 0xFFFF; + if (cpu->CurInstr & (1<<6)) rs >>= 16; + else rs &= 0xFFFF; + + u32 res = ((s16)rm * (s16)rs); + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + +void A_SMULWy(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + + if (cpu->CurInstr & (1<<6)) rs >>= 16; + else rs &= 0xFFFF; + + u32 res = ((s32)rm * (s16)rs) >> 16; // CHECKME + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + +void A_SMLALxy(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + + if (cpu->CurInstr & (1<<5)) rm >>= 16; + else rm &= 0xFFFF; + if (cpu->CurInstr & (1<<6)) rs >>= 16; + else rs &= 0xFFFF; + + s64 res = (s64)(s16)rm * (s64)(s16)rs; + + s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); + res += rd; + + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + + cpu->Cycles += 1; +} + void A_CLZ(ARM* cpu) @@ -888,6 +983,90 @@ void A_CLZ(ARM* cpu) cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; } +void A_QADD(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + + u32 res = rm + rn; + if (OVERFLOW_ADD(rm, rn, res)) + { + res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; + cpu->CPSR |= 0x08000000; + } + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + +void A_QSUB(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + + u32 res = rm - rn; + if (OVERFLOW_SUB(rm, rn, res)) + { + res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; + cpu->CPSR |= 0x08000000; + } + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + +void A_QDADD(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + + if (rn & 0x40000000) + { + rn = (rn & 0x80000000) ? 0x80000000 : 0x7FFFFFFF; + cpu->CPSR |= 0x08000000; // CHECKME + } + else + rn <<= 1; + + u32 res = rm + rn; + if (OVERFLOW_ADD(rm, rn, res)) + { + res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; + cpu->CPSR |= 0x08000000; + } + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + +void A_QDSUB(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + + if (rn & 0x40000000) + { + rn = (rn & 0x80000000) ? 0x80000000 : 0x7FFFFFFF; + cpu->CPSR |= 0x08000000; // CHECKME + } + else + rn <<= 1; + + u32 res = rm - rn; + if (OVERFLOW_SUB(rm, rn, res)) + { + res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; + cpu->CPSR |= 0x08000000; + } + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + // ---- THUMB ---------------------------------- diff --git a/ARMInterpreter_ALU.h b/ARMInterpreter_ALU.h index 15b99488..4cc3760e 100644 --- a/ARMInterpreter_ALU.h +++ b/ARMInterpreter_ALU.h @@ -78,8 +78,17 @@ void A_UMULL(ARM* cpu); void A_UMLAL(ARM* cpu); void A_SMULL(ARM* cpu); void A_SMLAL(ARM* cpu); +void A_SMLAxy(ARM* cpu); +void A_SMLAWy(ARM* cpu); +void A_SMULxy(ARM* cpu); +void A_SMULWy(ARM* cpu); +void A_SMLALxy(ARM* cpu); void A_CLZ(ARM* cpu); +void A_QADD(ARM* cpu); +void A_QSUB(ARM* cpu); +void A_QDADD(ARM* cpu); +void A_QDSUB(ARM* cpu); void T_LSL_IMM(ARM* cpu); diff --git a/ARM_InstrTable.h b/ARM_InstrTable.h index 1cc3fd2f..9b7119d4 100644 --- a/ARM_InstrTable.h +++ b/ARM_InstrTable.h @@ -118,9 +118,9 @@ INSTRFUNC_PROTO(ARMInstrTable[4096]) = // 0001 0000 0000 A_MRS, A_UNK, A_UNK, A_UNK, - A_UNK, A_UNK, A_UNK, A_UNK, - A_UNK, A_SWP, A_UNK, A_STRH_REG, - A_UNK, A_LDRD_REG, A_UNK, A_STRD_REG, + A_UNK, A_QADD, A_UNK, A_UNK, + A_SMLAxy, A_SWP, A_SMLAxy, A_STRH_REG, + A_SMLAxy, A_LDRD_REG, A_SMLAxy, A_STRD_REG, // 0001 0001 0000 A_TST_REG_LSL_IMM, A_TST_REG_LSL_REG, A_TST_REG_LSR_IMM, A_TST_REG_LSR_REG, @@ -130,9 +130,9 @@ INSTRFUNC_PROTO(ARMInstrTable[4096]) = // 0001 0010 0000 A_MSR_REG, A_BX, A_UNK, A_BLX_REG, - A_UNK, A_UNK, A_UNK, A_UNK, - A_UNK, A_UNK, A_UNK, A_STRH_REG, - A_UNK, A_LDRD_REG, A_UNK, A_STRD_REG, + A_UNK, A_QSUB, A_UNK, A_UNK, + A_SMLAWy, A_UNK, A_SMULWy, A_STRH_REG, + A_SMLAWy, A_LDRD_REG, A_SMULWy, A_STRD_REG, // 0001 0011 0000 A_TEQ_REG_LSL_IMM, A_TEQ_REG_LSL_REG, A_TEQ_REG_LSR_IMM, A_TEQ_REG_LSR_REG, @@ -142,9 +142,9 @@ INSTRFUNC_PROTO(ARMInstrTable[4096]) = // 0001 0100 0000 A_MRS, A_UNK, A_UNK, A_UNK, - A_UNK, A_UNK, A_UNK, A_UNK, - A_UNK, A_SWPB, A_UNK, A_STRH_IMM, - A_UNK, A_LDRD_IMM, A_UNK, A_STRD_IMM, + A_UNK, A_QDADD, A_UNK, A_UNK, + A_SMLALxy, A_SWPB, A_SMLALxy, A_STRH_IMM, + A_SMLALxy, A_LDRD_IMM, A_SMLALxy, A_STRD_IMM, // 0001 0101 0000 A_CMP_REG_LSL_IMM, A_CMP_REG_LSL_REG, A_CMP_REG_LSR_IMM, A_CMP_REG_LSR_REG, @@ -154,9 +154,9 @@ INSTRFUNC_PROTO(ARMInstrTable[4096]) = // 0001 0110 0000 A_MSR_REG, A_CLZ, A_UNK, A_UNK, - A_UNK, A_UNK, A_UNK, A_UNK, - A_UNK, A_UNK, A_UNK, A_STRH_IMM, - A_UNK, A_LDRD_IMM, A_UNK, A_STRD_IMM, + A_UNK, A_QDSUB, A_UNK, A_UNK, + A_SMULxy, A_UNK, A_SMULxy, A_STRH_IMM, + A_SMULxy, A_LDRD_IMM, A_SMULxy, A_STRD_IMM, // 0001 0111 0000 A_CMN_REG_LSL_IMM, A_CMN_REG_LSL_REG, A_CMN_REG_LSR_IMM, A_CMN_REG_LSR_REG, diff --git a/CP15.cpp b/CP15.cpp index a9b03dcc..55bc43ff 100644 --- a/CP15.cpp +++ b/CP15.cpp @@ -140,6 +140,8 @@ void Write(u32 id, u32 val) u32 Read(u32 id) { + //printf("CP15 read op %03X %08X\n", id, NDS::ARM9->R[15]); + switch (id) { case 0x000: // CPU ID @@ -150,9 +152,8 @@ u32 Read(u32 id) case 0x007: return 0x41059461; - case 0x001: - // cache type. todo - return 0; + case 0x001: // cache type + return 0x0F0D2112; case 0x002: // TCM size return (6 << 6) | (5 << 18); diff --git a/NDS.cpp b/NDS.cpp index d98f8458..ba39f9c5 100644 --- a/NDS.cpp +++ b/NDS.cpp @@ -1783,6 +1783,11 @@ void ARM9IOWrite32(u32 addr, u32 val) GPU::GPU2D_B->Write32(addr, val); return; } + if (addr >= 0x04000320 && addr < 0x040006A4) + { + // 3D GPU + return; + } printf("unknown ARM9 IO write32 %08X %08X\n", addr, val); } diff --git a/main.cpp b/main.cpp index 262b5518..3a4f3212 100644 --- a/main.cpp +++ b/main.cpp @@ -60,6 +60,8 @@ LRESULT CALLBACK derpo(HWND window, UINT msg, WPARAM wparam, LPARAM lparam) case VK_LEFT: NDS::PressKey(5); break; case VK_RIGHT: NDS::PressKey(4); break; case 'P': NDS::PressKey(16+6); break; + case 'A': NDS::PressKey(0); break; + case 'B': NDS::PressKey(1); break; case 'D': NDS::debug(0); break; } return 0; @@ -74,6 +76,8 @@ LRESULT CALLBACK derpo(HWND window, UINT msg, WPARAM wparam, LPARAM lparam) case VK_LEFT: NDS::ReleaseKey(5); break; case VK_RIGHT: NDS::ReleaseKey(4); break; case 'P': NDS::ReleaseKey(16+6); break; + case 'A': NDS::ReleaseKey(0); break; + case 'B': NDS::ReleaseKey(1); break; } return 0; diff --git a/melonDS.depend b/melonDS.depend index e32d0025..dbdda239 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -1,5 +1,5 @@ # depslib dependency file v1.0 -1485834524 source:c:\documents\sources\melonds\main.cpp +1485884302 source:c:\documents\sources\melonds\main.cpp "NDS.h" @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1485879135 source:c:\documents\sources\melonds\nds.cpp +1485884262 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -35,7 +35,7 @@ "NDS.h" "CP15.h" -1484530962 c:\documents\sources\melonds\arm_instrtable.h +1485883199 c:\documents\sources\melonds\arm_instrtable.h 1485795549 c:\documents\sources\melonds\arminterpreter.h "types.h" @@ -57,9 +57,9 @@ "ARM.h" -1485795628 c:\documents\sources\melonds\arminterpreter_alu.h +1485883149 c:\documents\sources\melonds\arminterpreter_alu.h -1485797076 source:c:\documents\sources\melonds\arminterpreter_alu.cpp +1485884226 source:c:\documents\sources\melonds\arminterpreter_alu.cpp "ARM.h" @@ -71,7 +71,7 @@ 1485799621 c:\documents\sources\melonds\cp15.h -1485833371 source:c:\documents\sources\melonds\cp15.cpp +1485883361 source:c:\documents\sources\melonds\cp15.cpp "NDS.h" @@ -137,7 +137,7 @@ 1485878561 c:\documents\sources\melonds\ndscart.h "types.h" -1485813068 source:c:\documents\sources\melonds\ndscart.cpp +1485880110 source:c:\documents\sources\melonds\ndscart.cpp "NDS.h"