Merge pull request #10692 from Pokechu22/dsp-manual-set40-and-write-backlog

docs/DSP: Add sections on 16-bit and 40-bit modes and on main and extended opcode writing to the same register
This commit is contained in:
Mai M
2022-06-02 20:26:31 -04:00
committed by GitHub
7 changed files with 567 additions and 102 deletions

View File

@ -45,7 +45,7 @@ void Interpreter::clrl(const UDSPInstruction opc)
//----
// ANDCF $acD.m, #I
// 0000 001r 1100 0000
// 0000 001d 1100 0000
// iiii iiii iiii iiii
// Set logic zero (LZ) flag in status register $sr if result of logic AND of
// accumulator mid part $acD.m with immediate value I is equal to I.
@ -61,7 +61,7 @@ void Interpreter::andcf(const UDSPInstruction opc)
}
// ANDF $acD.m, #I
// 0000 001r 1010 0000
// 0000 001d 1010 0000
// iiii iiii iiii iiii
// Set logic zero (LZ) flag in status register $sr if result of logical AND
// operation of accumulator mid part $acD.m with immediate value I is equal
@ -81,7 +81,7 @@ void Interpreter::andf(const UDSPInstruction opc)
// TST
// 1011 r001 xxxx xxxx
// Test accumulator %acR.
// Test accumulator $acR.
//
// flags out: --xx xx00
void Interpreter::tst(const UDSPInstruction opc)
@ -143,11 +143,12 @@ void Interpreter::cmpaxh(const UDSPInstruction opc)
ZeroWriteBackLog();
}
// CMPI $amD, #I
// 0000 001r 1000 0000
// CMPI $acD, #I
// 0000 001d 1000 0000
// iiii iiii iiii iiii
// Compares mid accumulator $acD.hm ($amD) with sign extended immediate value I.
// Although flags are being set regarding whole accumulator register.
// Compares accumulator with immediate. Comparison is executed
// by subtracting the immediate (16-bit sign extended) from mid accumulator
// $acD.hm and computing flags based on whole accumulator $acD.
//
// flags out: x-xx xxxx
void Interpreter::cmpi(const UDSPInstruction opc)
@ -166,8 +167,8 @@ void Interpreter::cmpi(const UDSPInstruction opc)
// CMPIS $acD, #I
// 0000 011d iiii iiii
// Compares accumulator with short immediate. Comaprison is executed
// by subtracting short immediate (8bit sign extended) from mid accumulator
// Compares accumulator with short immediate. Comparison is executed
// by subtracting the short immediate (8-bit sign extended) from mid accumulator
// $acD.hm and computing flags based on whole accumulator $acD.
//
// flags out: x-xx xxxx
@ -320,7 +321,7 @@ void Interpreter::notc(const UDSPInstruction opc)
}
// XORI $acD.m, #I
// 0000 001r 0010 0000
// 0000 001d 0010 0000
// iiii iiii iiii iiii
// Logic exclusive or (XOR) of accumulator mid part $acD.m with
// immediate value I.
@ -337,7 +338,7 @@ void Interpreter::xori(const UDSPInstruction opc)
}
// ANDI $acD.m, #I
// 0000 001r 0100 0000
// 0000 001d 0100 0000
// iiii iiii iiii iiii
// Logic AND of accumulator mid part $acD.m with immediate value I.
//
@ -354,7 +355,7 @@ void Interpreter::andi(const UDSPInstruction opc)
}
// ORI $acD.m, #I
// 0000 001r 0110 0000
// 0000 001d 0110 0000
// iiii iiii iiii iiii
// Logic OR of accumulator mid part $acD.m with immediate value I.
//
@ -489,8 +490,8 @@ void Interpreter::addaxl(const UDSPInstruction opc)
UpdateSR64Add(acc, acx, GetLongAcc(dreg));
}
// ADDI $amR, #I
// 0000 001r 0000 0000
// ADDI $acD, #I
// 0000 001d 0000 0000
// iiii iiii iiii iiii
// Adds immediate (16-bit sign extended) to mid accumulator $acD.hm.
//

View File

@ -16,8 +16,23 @@
namespace DSP::Interpreter
{
// Not needed for game ucodes (it slows down interpreter + easier to compare int VS
// dspjit64 without it)
// Correctly handle instructions such as `INC'L $ac0 : $ac0.l, @$ar0` (encoded as 0x7660) where both
// the main opcode and the extension opcode modify the same register. See the "Extended opcodes"
// section in the manual for more details. No official uCode writes to the same register twice like
// this, so we don't emulate it by default (and also don't support it in the recompiler).
//
// Dolphin only supports this behavior in the interpreter when PRECISE_BACKLOG is defined.
// In ExecuteInstruction, if an extended opcode is in use, the extended opcode's behavior is
// executed first, followed by the main opcode's behavior. The extended opcode does not directly
// write to registers, but instead records the writes into a backlog (WriteToBackLog). The main
// opcode calls ZeroWriteBackLog after it is done reading the register values; this directly
// writes zero to all registers that have pending writes in the backlog. The main opcode then is
// free to write directly to registers it changes. Afterwards, ApplyWriteBackLog bitwise-ors the
// value of the register and the value in the backlog; if the main opcode didn't write to the
// register then ZeroWriteBackLog means that the pending value is being or'd with zero, so it's
// used without changes. When PRECISE_BACKLOG is not defined, ZeroWriteBackLog does nothing and
// ApplyWriteBackLog overwrites the register value with the value from the backlog (so writes from
// extended opcodes "win" over the main opcode).
//#define PRECISE_BACKLOG
Interpreter::Interpreter(DSPCore& dsp) : m_dsp_core{dsp}
@ -809,7 +824,7 @@ void Interpreter::ConditionalExtendAccum(int reg)
void Interpreter::ApplyWriteBackLog()
{
// Always make sure to have an extra entry at the end w/ -1 to avoid
// infinitive loops
// infinite loops
for (int i = 0; m_write_back_log_idx[i] != -1; i++)
{
u16 value = m_write_back_log[i];
@ -823,6 +838,11 @@ void Interpreter::ApplyWriteBackLog()
}
}
// The ext ops are calculated in parallel with the actual op. That means that
// both the main op and the ext op see the same register state as input. The
// output is simple as long as the main and ext ops don't change the same
// register. If they do the output is the bitwise OR of the result of both the
// main and ext ops.
void Interpreter::WriteToBackLog(int i, int idx, u16 value)
{
m_write_back_log[i] = value;
@ -840,7 +860,7 @@ void Interpreter::ZeroWriteBackLog()
{
#ifdef PRECISE_BACKLOG
// always make sure to have an extra entry at the end w/ -1 to avoid
// infinitive loops
// infinite loops
for (int i = 0; m_write_back_log_idx[i] != -1; i++)
{
OpWriteRegister(m_write_back_log_idx[i], 0);

View File

@ -235,11 +235,6 @@ private:
void ConditionalExtendAccum(int reg);
// The ext ops are calculated in parallel with the actual op. That means that
// both the main op and the ext op see the same register state as input. The
// output is simple as long as the main and ext ops don't change the same
// register. If they do the output is the bitwise OR of the result of both the
// main and ext ops.
void WriteToBackLog(int i, int idx, u16 value);
void ZeroWriteBackLog();
void ZeroWriteBackLogPreserveAcc(u8 acc);

View File

@ -53,7 +53,7 @@ void DSPEmitter::clrl(const UDSPInstruction opc)
//----
// ANDCF $acD.m, #I
// 0000 001r 1100 0000
// 0000 001d 1100 0000
// iiii iiii iiii iiii
// Set logic zero (LZ) flag in status register $sr if result of logic AND of
// accumulator mid part $acD.m with immediate value I is equal to I.
@ -88,7 +88,7 @@ void DSPEmitter::andcf(const UDSPInstruction opc)
}
// ANDF $acD.m, #I
// 0000 001r 1010 0000
// 0000 001d 1010 0000
// iiii iiii iiii iiii
// Set logic zero (LZ) flag in status register $sr if result of logical AND
// operation of accumulator mid part $acD.m with immediate value I is equal
@ -126,7 +126,7 @@ void DSPEmitter::andf(const UDSPInstruction opc)
// TST
// 1011 r001 xxxx xxxx
// Test accumulator %acR.
// Test accumulator $acR.
//
// flags out: --xx xx00
void DSPEmitter::tst(const UDSPInstruction opc)
@ -220,11 +220,12 @@ void DSPEmitter::cmpaxh(const UDSPInstruction opc)
}
}
// CMPI $amD, #I
// 0000 001r 1000 0000
// CMPI $acD, #I
// 0000 001d 1000 0000
// iiii iiii iiii iiii
// Compares mid accumulator $acD.hm ($amD) with sign extended immediate value I.
// Although flags are being set regarding whole accumulator register.
// Compares accumulator with immediate. Comparison is executed
// by subtracting the immediate (16-bit sign extended) from mid accumulator
// $acD.hm and computing flags based on whole accumulator $acD.
//
// flags out: x-xx xxxx
void DSPEmitter::cmpi(const UDSPInstruction opc)
@ -257,7 +258,7 @@ void DSPEmitter::cmpi(const UDSPInstruction opc)
// CMPIS $acD, #I
// 0000 011d iiii iiii
// Compares accumulator with short immediate. Comparison is executed
// by subtracting short immediate (8bit sign extended) from mid accumulator
// by subtracting the short immediate (8-bit sign extended) from mid accumulator
// $acD.hm and computing flags based on whole accumulator $acD.
//
// flags out: x-xx xxxx
@ -472,7 +473,7 @@ void DSPEmitter::notc(const UDSPInstruction opc)
}
// XORI $acD.m, #I
// 0000 001r 0010 0000
// 0000 001d 0010 0000
// iiii iiii iiii iiii
// Logic exclusive or (XOR) of accumulator mid part $acD.m with
// immediate value I.
@ -498,7 +499,7 @@ void DSPEmitter::xori(const UDSPInstruction opc)
}
// ANDI $acD.m, #I
// 0000 001r 0100 0000
// 0000 001d 0100 0000
// iiii iiii iiii iiii
// Logic AND of accumulator mid part $acD.m with immediate value I.
//
@ -523,7 +524,7 @@ void DSPEmitter::andi(const UDSPInstruction opc)
}
// ORI $acD.m, #I
// 0000 001r 0110 0000
// 0000 001d 0110 0000
// iiii iiii iiii iiii
// Logic OR of accumulator mid part $acD.m with immediate value I.
//
@ -706,8 +707,8 @@ void DSPEmitter::addaxl(const UDSPInstruction opc)
}
}
// ADDI $amR, #I
// 0000 001r 0000 0000
// ADDI $acD, #I
// 0000 001d 0000 0000
// iiii iiii iiii iiii
// Adds immediate (16-bit sign extended) to mid accumulator $acD.hm.
//

View File

@ -0,0 +1,202 @@
; This test covers the behavior of 40-bit mode with various instructions.
incdir "tests"
include "dsp_base.inc"
positive_value: EQU #0x1234
negative_value: EQU #0x9876
negative_imem_value_addr:
CW negative_value
; DSPSpy doesn't pre-populating DMEM currently, so instead use these addresses to store values.
positive_dmem_value_addr: EQU #0x100
negative_dmem_value_addr: EQU #0x101
readback_dmem_addr: EQU #0x102
test_main:
LRI $ar0, #positive_dmem_value_addr
LRI $ar1, #negative_dmem_value_addr
LRI $ar2, #negative_imem_value_addr
LRI $ar3, #readback_dmem_addr
LRI $ix0, #0
LRI $ix1, #0
LRI $ix2, #0
LRI $ix3, #0
LRI $ax0.h, #positive_value
LRI $ax1.h, #negative_value
SR @positive_dmem_value_addr, $ax0.h
SR @negative_dmem_value_addr, $ax1.h
LRI $cr, #(positive_dmem_value_addr / 256)
SET40
; Instructions that perform sign-extension
; $acc0 should alternate between being positive and negative here
; (though none of these instructions update $sr)
; [1] ILRR (also ILRRD/ILRRI/ILRRN, not covered)
ILRR $ac0.m, @$ar2 ; -
CALL send_back
; [2] LR
LR $ac0.m, @positive_dmem_value_addr ; +
CALL send_back
; [3] LRI
LRI $ac0.m, #negative_value ; -
CALL send_back
; [4] LRIS
LRIS $ac0.m, #42 ; +
CALL send_back
; [5] LRR (also LRRD/LRRI/LRRN)
LRR $ac0.m, @$ar1 ; -
CALL send_back
; [6] LRS
LRS $ac0.m, @(positive_dmem_value_addr & 0xff) ; +
CALL send_back
; [7] MRR
MRR $ac0.m, $ax1.h ; -
CALL send_back
; [8] 'LN (and 'L, but 'LN lets us set $ix0 to not increment $ar0)
NX'LN : $ac0.m, @$ar0 ; +
CALL send_back
; Instructions that experience saturation
; $ax1.l should alternate between 0x8000 and 0x7fff.
LRI $ac0.m, #0x4231
LRI $ac0.h, #0x12 ; positive
LRI $ac1.m, #0x2816
LRI $ac1.h, #0x99 ; negative
; [9] MRR (again)
MRR $ax1.l, $ac1.m ; -
CALL send_back
; [10] SR
SR @readback_dmem_addr, $ac0.m
LR $ax1.l, @readback_dmem_addr ; +
CALL send_back
; [11] SRRN (also SRR/SRRD/SRRI)
SRRN @$ar3, $ac1.m
LR $ax1.l, @readback_dmem_addr ; -
CALL send_back
; [12] SRS
SRS @(readback_dmem_addr & 0xff), $ac0.m
LR $ax1.l, @readback_dmem_addr ; +
CALL send_back
; [13] 'LSNM (also 'LS/'LSM/'LSN) - the $ax0.l read is not relevant
NX'LSNM : $ax0.l, $ac1.m
LR $ax1.l, @readback_dmem_addr ; -
CALL send_back
; [14] 'MV
NX'MV : $ax1.l, $ac0.m ; +
CALL send_back
; [15] 'SLNM (also 'SL/'SLM/'SLN) - the $ax0.l read is not relevant
; Note that 'SL stores to @$ar0, while 'LS stores to @$ar3
LRI $ar0, #readback_dmem_addr
NX'SLNM : $ac1.m, $ax0.l
LR $ax1.l, @readback_dmem_addr ; -
CALL send_back
LRI $ar0, #positive_dmem_value_addr
; [16] 'SN (also 'S)
NX'SN : @$ar3, $ac0.m
LR $ax1.l, @readback_dmem_addr ; +
CALL send_back
; Instructions that are not affected
; [17] ADDI
ADDI $ac0.m, #8
CALL send_back
; [18] ADDIS
ADDIS $ac0.m, #-8
CALL send_back
; [19] ANDC
ANDC $ac1.m, $ac0.m
CALL send_back
; [20] ANDI
ANDI $ac0.m, #0x6666
CALL send_back
; [21] ANDR
ANDR $ac0.m, $ax0.h
CALL send_back
; [22] ORC
ORC $ac0.m, $ac1.m
CALL send_back
; [23] ORI
ORI $ac0.m, #0xfeed
CALL send_back
; [24] ORR
ORR $ac1.m, $ax0.h
CALL send_back
; [25] NOT
NOT $ac1.m
CALL send_back
; [26] XORC
XORC $ac0.m, $ac1.m
CALL send_back
; [27] XORI
XORI $ac0.m, #0x5555
CALL send_back
; [28] XORR
XORR $ac1.m, $ax1.h
CALL send_back
; [29] MOVR always sign extends...
MOVR $acc1, $ax0.h
CALL send_back
; [30] ... even in SET16 mode
SET16
MOVR $acc1, $ax1.h
CALL send_back
SET40
; Shift instructions - do these see saturated $ac1.m?
LRI $ac0.m, #positive_value
LRI $ac1.m, #2
LRI $ac1.h, #1
; [31] - for diffs only
CALL send_back
; [32]
LSRNR $acc0, $ac1.m
CALL send_back
; [33] Shifts $acc0 by $ac1.m (in the other direction)
LSRN
CALL send_back
; Does LOOP experience saturation?
CLR $acc0
LRI $ac1.m, #0x1234
LRI $ac1.h, #1
; [34] - for diffs only
CALL send_back
; [35] LOOP
LOOP $ac1.m
INC $acc0
CALL send_back
LRI $ac1.h, #0x99
; [36] BLOOP
BLOOP $ac1.m, bloop_last_ins
INCM $ac0.m
bloop_last_ins:
NOP
CALL send_back
; For the sake of clarity, the same LOOP/BLOOP calls in SET16 mode don't have saturation:
SET16
CLR $acc0
LRI $ac1.m, #0x1234
LRI $ac1.h, #1
; [37] - for diffs only
CALL send_back
; [38] LOOP
LOOP $ac1.m
INC $acc0
CALL send_back
LRI $ac1.h, #0x99
; [39] BLOOP
BLOOP $ac1.m, bloop2_last_ins
INCM $ac0.m
bloop2_last_ins:
NOP
CALL send_back
; We're done, DO NOT DELETE THIS LINE
JMP end_of_test

View File

@ -0,0 +1,164 @@
; This test covers the behavior of 40-bit mode for a variety of values.
; It takes a while to run completely (~5 minutes), but progress is indicated via mail shown at the
; top of the screen in DSPSpy. The value will go from 80000000 to 8041ffff.
incdir "tests"
include "dsp_base.inc"
test_main:
LRI $ar0, #0
LRI $ar1, #0
LRI $ar2, #0
LRI $ar3, #0
LRI $ix0, #0
LRI $ix1, #0
LRI $ix2, #0
LRI $ix3, #0
; Test with $ac0.l from 0xfff0 to 0x0010
LRI $ac0.l, #0xfff0
BLOOPI #0x21, first_loop_last_ins
CALL test_saturation
IAR $ar0
first_loop_last_ins:
INC $acc0
; Test with $ac0.l from 0x7ff0 to 0x8010
LRI $ac0.l, #0xfff0
BLOOPI #0x21, second_loop_last_ins
CALL test_saturation
IAR $ar0
second_loop_last_ins:
INC $acc0
; We're done. Report the test results.
; $ix1 should be 0, or else saturation occurred on $ac0.l or $ac0.h.
; $ix2 should be 0, or else sign-extension occurred on $ac0.l or $ac0.h.
; $ix3 should be 0, or else we incorrectly predicted saturation on $ac0.m.
; $ar1/$ar2/$ar3 records the number of times it happened
CALL send_back
; We're done, DO NOT DELETE THIS LINE
JMP end_of_test
test_saturation:
; We start with $ac0.h at -0x80 since we can use the overflow flag to check when wrapping around
; occurs; starting at 0 and ending when it wraps back to 0 doesn't work since we can't check the
; zero flag since $ac0.l may be nonzero ($ac0.l is used as an input to this subroutine)
LRI $ac0.m, #0
LRI $ac0.h, #-0x80
loop_start:
; Compare the value of $ac0.m when in SET16 mode and in SET40 mode
SET40
; Reading $ac0.m in SET40 mode results in saturation if $ac0.h doesn't match the sign-extension
; of $ac0.h. Also, storing to $ac1.m in SET40 mode clears $ac1.l and sets $ac1.h to the
; sign-extension of $ac1.m, and $ac1.l.
MRR $ac1.m, $ac0.m
SET16
; Attempt to compute the saturated value of $ac1.m in $ax1.h,
; using what we know of $acc0.
TST'MV $acc0 : $ax1.h, $ac0.m
JL negative_acc0
; $acc0 is nonnegative.
JMPx8 check_saturated_ax1h ; If the above s32 bit is not set, we don't need to saturate
; If the above s32 bit _is_ set, then saturate $ax1.h.
LRI $ax1.h, #0x7fff
JMP check_saturated_ax1h
negative_acc0:
JMPx8 check_saturated_ax1h ; If the above s32 bit is not set, we don't need to saturate
LRI $ax1.h, #0x8000
; Fall through to check_saturated_ax1h
check_saturated_ax1h:
; $acc1 has the value of $ac0.m in SET40 mode.
; And, $ax1.h has what we computed that value should be, and CMPAXH always sign-extends $ax1.h
; (and ignores $ax1.l), so we can compare using it directly.
CMPAXH $acc1, $ax1.h
JZ check_read_low
; Our prediction was wrong (shouldn't happen)
LRI $ix3, #1
IAR $ar3
TST $acc0
CALL send_back
; Fall through to check_read_low
check_read_low:
SET40
MRR $ac1.m, $ac0.l
SET16
MRR $ax1.h, $ac0.l
CMPAXH $acc1, $ax1.h
JZ check_read_high
; Reading $ac0.l gave different results in SET40 and SET16 modes (shouldn't happen)
LRI $ix1, #1
IAR $ar1
TST $acc0
CALL send_back
; Fall through to check_read_high
check_read_high:
SET40
MRR $ac1.m, $ac0.h
SET16
MRR $ax1.h, $ac0.h
CMPAXH $acc1, $ax1.h
JZ check_write_low
; Reading $ac0.h gave different results in SET40 and SET16 modes (shouldn't happen)
LRI $ix1, #1
IAR $ar1
TST $acc0
CALL send_back
; Fall through to check_write_low
check_write_low:
MOV $acc1, $acc0
SET40
MRR $ac1.l, $ac0.l
SET16
CMP
JZ check_write_high
; Writing to $ac1.l caused $acc1 to not match $acc0 (shouldn't happen)
LRI $ix2, #1
IAR $ar2
CALL send_back
; Fall through to check_write_high
check_write_high:
MOV $acc1, $acc0
SET40
MRR $ac1.h, $ac0.h
SET16
CMP
JZ increment_loop
; Writing to $ac1.h caused $acc1 to not match $acc0 (shouldn't happen)
LRI $ix2, #1
IAR $ar2
CALL send_back
; Fall through to increment_loop
increment_loop:
INCM $ac0.m
; If incrementing results in overflowing, then we're done.
RETO
; If ($ac0.m & 0x00ff) != 0, continue the loop without sending mail.
ANDF $ac0.m, #0x00ff
JLNZ loop_start
; Otherwise, send mail to report the progress. (This shows at the top of the screen in DSPSpy,
; but otherwise isn't handled in any meaningful way.)
MOV $acc1, $acc0
LSR $acc1, #-8
; Compensate for starting at INT_MIN (0x80'0000'0000) and ending at INT_MAX (0x7f'0000'0000)
; instead of going from 0 (0x00'0000'0000) to -1 (0xff'ffff'ffff)
XORI $ac1.m, #0x8000
SR @DMBH, $ar0
SR @DMBL, $ac1.m
SI @DIRQ, #0x0001
; We don't wait for the mail to be read, because we don't care about the response.
JMP loop_start