mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2024-11-14 13:27:41 -07:00
Merge vram dirty tracking
Squashed commit of the following: commit b463a05d4b909372f0cd1ad91caa0c77a25e5901 Author: RSDuck <rsduck@users.noreply.github.com> Date: Mon Nov 30 01:55:35 2020 +0100 minor fix commit ce73cebbdf5da243d7ebade82d8799ded9cd6b28 Author: RSDuck <rsduck@users.noreply.github.com> Date: Mon Nov 30 00:43:08 2020 +0100 fix dirty flags of BG/OBJ mappings not being reset commit fc5d73a6178e3adc444398bdd23de8314b5ca8f8 Author: RSDuck <rsduck@users.noreply.github.com> Date: Mon Nov 30 00:11:13 2020 +0100 use flat vram for gpu2d everywhere commit 34ee9fe2bf04fcfa2a5a1c8d78d70007e606f1a2 Author: RSDuck <rsduck@users.noreply.github.com> Date: Sat Nov 28 19:10:34 2020 +0100 mark VRAM dirty for display capture commit e8778fa2f429c6df0eece19d6a5ee83ae23a0cf4 Author: RSDuck <rsduck@users.noreply.github.com> Date: Sat Nov 28 18:59:31 2020 +0100 use flat VRAM for textures and texpals also skip rendering if nothing changed and a bunch of fixes commit 53f2041e2e1a28b35702a2ed51de885c36689f71 Author: RSDuck <rsduck@users.noreply.github.com> Date: Fri Nov 27 18:29:56 2020 +0100 use vram dirty tracking for extpals also preparations to take this further commit 4cdfa329e95aed26d3b21319c8fd86a04abf20f7 Author: RSDuck <rsduck@users.noreply.github.com> Date: Mon Nov 16 23:32:22 2020 +0100 VRAM dirty tracking
This commit is contained in:
parent
acb272ed78
commit
6e8bac3909
338
src/GPU.cpp
338
src/GPU.cpp
@ -49,8 +49,8 @@ u8 VRAM_F[ 16*1024];
|
||||
u8 VRAM_G[ 16*1024];
|
||||
u8 VRAM_H[ 32*1024];
|
||||
u8 VRAM_I[ 16*1024];
|
||||
u8* VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I};
|
||||
u32 VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF};
|
||||
u8* const VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I};
|
||||
u32 const VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF};
|
||||
|
||||
u8 VRAMCNT[9];
|
||||
u8 VRAMSTAT;
|
||||
@ -85,6 +85,62 @@ bool Accelerated;
|
||||
GPU2D* GPU2D_A;
|
||||
GPU2D* GPU2D_B;
|
||||
|
||||
/*
|
||||
VRAM invalidation tracking
|
||||
|
||||
- we want to know when a VRAM region used for graphics changed
|
||||
- for some regions unmapping is mandatory to modify them (Texture, TexPal and ExtPal) and
|
||||
we don't want to completely invalidate them every time they're unmapped and remapped
|
||||
|
||||
For this reason we don't track the dirtyness per mapping region, but instead per VRAM bank
|
||||
with VRAMDirty. Writes to LCDC go directly into VRAMDirty, while writes via other mapping regions
|
||||
like BG or OBJ are first tracked in VRAMWritten_* and need to be flushed using SyncDirtyFlags.
|
||||
|
||||
This is more or less a description of VRAMTrackingSet::DeriveState
|
||||
Each time before the memory is read two things could have happened
|
||||
to each 16kb piece (16kb is the smallest unit in which mappings can
|
||||
be made thus also the size VRAMMap_* use):
|
||||
- this piece was remapped compared to last time we checked,
|
||||
which means this location in memory is invalid.
|
||||
- this piece wasn't remapped, which means we need to check whether
|
||||
it was changed. This can be archived by checking VRAMDirty.
|
||||
VRAMDirty need to be reset for the respective VRAM bank.
|
||||
*/
|
||||
|
||||
VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
|
||||
VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
|
||||
VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
|
||||
VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
|
||||
|
||||
VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
|
||||
VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
|
||||
VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
|
||||
VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
|
||||
|
||||
VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
|
||||
VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
|
||||
|
||||
|
||||
NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
|
||||
NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
|
||||
NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
|
||||
NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
|
||||
NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
|
||||
|
||||
NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
|
||||
|
||||
u8 VRAMFlat_ABG[512*1024];
|
||||
u8 VRAMFlat_BBG[128*1024];
|
||||
u8 VRAMFlat_AOBJ[256*1024];
|
||||
u8 VRAMFlat_BOBJ[128*1024];
|
||||
|
||||
u8 VRAMFlat_ABGExtPal[32*1024];
|
||||
u8 VRAMFlat_BBGExtPal[32*1024];
|
||||
u8 VRAMFlat_AOBJExtPal[8*1024];
|
||||
u8 VRAMFlat_BOBJExtPal[8*1024];
|
||||
|
||||
u8 VRAMFlat_Texture[512*1024];
|
||||
u8 VRAMFlat_TexPal[128*1024];
|
||||
|
||||
bool Init()
|
||||
{
|
||||
@ -113,6 +169,30 @@ void DeInit()
|
||||
if (Framebuffer[1][1]) delete[] Framebuffer[1][1];
|
||||
}
|
||||
|
||||
void ResetVRAMCache()
|
||||
{
|
||||
for (int i = 0; i < 9; i++)
|
||||
VRAMDirty[i] = NonStupidBitField<128*1024/VRAMDirtyGranularity>();
|
||||
|
||||
VRAMDirty_ABG.Reset();
|
||||
VRAMDirty_BBG.Reset();
|
||||
VRAMDirty_AOBJ.Reset();
|
||||
VRAMDirty_BOBJ.Reset();
|
||||
VRAMDirty_ABGExtPal.Reset();
|
||||
VRAMDirty_BBGExtPal.Reset();
|
||||
VRAMDirty_AOBJExtPal.Reset();
|
||||
VRAMDirty_BOBJExtPal.Reset();
|
||||
|
||||
memset(VRAMFlat_ABG, 0, sizeof(VRAMFlat_ABG));
|
||||
memset(VRAMFlat_BBG, 0, sizeof(VRAMFlat_BBG));
|
||||
memset(VRAMFlat_AOBJ, 0, sizeof(VRAMFlat_AOBJ));
|
||||
memset(VRAMFlat_BOBJ, 0, sizeof(VRAMFlat_BOBJ));
|
||||
memset(VRAMFlat_ABGExtPal, 0, sizeof(VRAMFlat_ABGExtPal));
|
||||
memset(VRAMFlat_BBGExtPal, 0, sizeof(VRAMFlat_BBGExtPal));
|
||||
memset(VRAMFlat_AOBJExtPal, 0, sizeof(VRAMFlat_AOBJExtPal));
|
||||
memset(VRAMFlat_BOBJExtPal, 0, sizeof(VRAMFlat_BOBJExtPal));
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
VCount = 0;
|
||||
@ -186,6 +266,8 @@ void Reset()
|
||||
GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]);
|
||||
|
||||
ResetRenderer();
|
||||
|
||||
ResetVRAMCache();
|
||||
}
|
||||
|
||||
void Stop()
|
||||
@ -261,6 +343,8 @@ void DoSavestate(Savestate* file)
|
||||
GPU2D_A->DoSavestate(file);
|
||||
GPU2D_B->DoSavestate(file);
|
||||
GPU3D::DoSavestate(file);
|
||||
|
||||
ResetVRAMCache();
|
||||
}
|
||||
|
||||
void AssignFramebuffers()
|
||||
@ -411,18 +495,8 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
|
||||
|
||||
u8* GetUniqueBankPtr(u32 mask, u32 offset)
|
||||
{
|
||||
if (!mask) return NULL;
|
||||
|
||||
int num = 0;
|
||||
if (!(mask & 0xFF)) { mask >>= 8; num += 8; }
|
||||
else
|
||||
{
|
||||
if (!(mask & 0xF)) { mask >>= 4; num += 4; }
|
||||
if (!(mask & 0x3)) { mask >>= 2; num += 2; }
|
||||
if (!(mask & 0x1)) { mask >>= 1; num += 1; }
|
||||
}
|
||||
if (mask != 1) return NULL;
|
||||
|
||||
if (!mask || (mask & (mask - 1)) != 0) return NULL;
|
||||
int num = __builtin_ctz(mask);
|
||||
return &VRAM[num][offset & VRAMMask[num]];
|
||||
}
|
||||
|
||||
@ -606,8 +680,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
|
||||
|
||||
case 4: // ABG ext palette
|
||||
UNMAP_RANGE(ABGExtPal, 0, 4);
|
||||
GPU2D_A->BGExtPalDirty(0);
|
||||
GPU2D_A->BGExtPalDirty(2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -634,8 +706,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
|
||||
|
||||
case 4: // ABG ext palette
|
||||
MAP_RANGE(ABGExtPal, 0, 4);
|
||||
GPU2D_A->BGExtPalDirty(0);
|
||||
GPU2D_A->BGExtPalDirty(2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -687,12 +757,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
|
||||
case 4: // ABG ext palette
|
||||
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask;
|
||||
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask;
|
||||
GPU2D_A->BGExtPalDirty((oldofs & 0x1) << 1);
|
||||
break;
|
||||
|
||||
case 5: // AOBJ ext palette
|
||||
VRAMMap_AOBJExtPal &= ~bankmask;
|
||||
GPU2D_A->OBJExtPalDirty();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -732,12 +800,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
|
||||
case 4: // ABG ext palette
|
||||
VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask;
|
||||
VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask;
|
||||
GPU2D_A->BGExtPalDirty((ofs & 0x1) << 1);
|
||||
break;
|
||||
|
||||
case 5: // AOBJ ext palette
|
||||
VRAMMap_AOBJExtPal |= bankmask;
|
||||
GPU2D_A->OBJExtPalDirty();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -773,8 +839,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
|
||||
|
||||
case 2: // BBG ext palette
|
||||
UNMAP_RANGE(BBGExtPal, 0, 4);
|
||||
GPU2D_B->BGExtPalDirty(0);
|
||||
GPU2D_B->BGExtPalDirty(2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -800,8 +864,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
|
||||
|
||||
case 2: // BBG ext palette
|
||||
MAP_RANGE(BBGExtPal, 0, 4);
|
||||
GPU2D_B->BGExtPalDirty(0);
|
||||
GPU2D_B->BGExtPalDirty(2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -841,7 +903,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
|
||||
|
||||
case 3: // BOBJ ext palette
|
||||
VRAMMap_BOBJExtPal &= ~bankmask;
|
||||
GPU2D_B->OBJExtPalDirty();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -871,7 +932,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
|
||||
|
||||
case 3: // BOBJ ext palette
|
||||
VRAMMap_BOBJExtPal |= bankmask;
|
||||
GPU2D_B->OBJExtPalDirty();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -937,6 +997,8 @@ void StartHBlank(u32 line)
|
||||
DispStat[0] |= (1<<1);
|
||||
DispStat[1] |= (1<<1);
|
||||
|
||||
SyncDirtyFlags();
|
||||
|
||||
if (VCount < 192)
|
||||
{
|
||||
// draw
|
||||
@ -1096,4 +1158,224 @@ void SetVCount(u16 val)
|
||||
NextVCount = val;
|
||||
}
|
||||
|
||||
template <u32 Size, u32 MappingGranularity>
|
||||
NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranularity>::DeriveState(u32* currentMappings)
|
||||
{
|
||||
NonStupidBitField<Size/VRAMDirtyGranularity> result;
|
||||
u16 banksToBeZeroed = 0;
|
||||
for (u32 i = 0; i < Size / MappingGranularity; i++)
|
||||
{
|
||||
if (currentMappings[i] != Mapping[i])
|
||||
{
|
||||
result |= NonStupidBitField<Size/VRAMDirtyGranularity>(i*VRAMBitsPerMapping, VRAMBitsPerMapping);
|
||||
banksToBeZeroed |= currentMappings[i];
|
||||
Mapping[i] = currentMappings[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 mapping = Mapping[i];
|
||||
|
||||
banksToBeZeroed |= mapping;
|
||||
|
||||
while (mapping != 0)
|
||||
{
|
||||
u32 num = __builtin_ctz(mapping);
|
||||
mapping &= ~(1 << num);
|
||||
|
||||
// hack for **speed**
|
||||
// this could probably be done less ugly but then we would rely
|
||||
// on the compiler for vectorisation
|
||||
static_assert(VRAMDirtyGranularity == 512);
|
||||
if (MappingGranularity == 16*1024)
|
||||
{
|
||||
u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)];
|
||||
((u32*)result.Data)[i] |= dirty;
|
||||
}
|
||||
else if (MappingGranularity == 8*1024)
|
||||
{
|
||||
u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)];
|
||||
((u16*)result.Data)[i] |= dirty;
|
||||
}
|
||||
else if (MappingGranularity == 128*1024)
|
||||
{
|
||||
((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0];
|
||||
((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1];
|
||||
((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2];
|
||||
((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3];
|
||||
}
|
||||
else
|
||||
{
|
||||
// welp
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (banksToBeZeroed != 0)
|
||||
{
|
||||
u32 num = __builtin_ctz(banksToBeZeroed);
|
||||
banksToBeZeroed &= ~(1 << num);
|
||||
memset(VRAMDirty[num].Data, 0, sizeof(VRAMDirty[num].Data));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*);
|
||||
template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*);
|
||||
template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*);
|
||||
template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*);
|
||||
template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*);
|
||||
template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*);
|
||||
|
||||
template <u32 Size>
|
||||
void SyncDirtyFlags(u32* mappings, NonStupidBitField<Size>& writtenFlags)
|
||||
{
|
||||
const u32 VRAMWrittenBitsPer16KB = 16*1024/VRAMDirtyGranularity;
|
||||
|
||||
for (typename NonStupidBitField<Size>::Iterator it = writtenFlags.Begin(); it != writtenFlags.End(); it++)
|
||||
{
|
||||
u32 mapping = mappings[*it / VRAMWrittenBitsPer16KB];
|
||||
while (mapping != 0)
|
||||
{
|
||||
u32 num = __builtin_ctz(mapping);
|
||||
|
||||
VRAMDirty[num][*it & (VRAMMask[num] / VRAMDirtyGranularity)] = true;
|
||||
|
||||
mapping &= ~(1 << num);
|
||||
}
|
||||
}
|
||||
memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data));
|
||||
}
|
||||
|
||||
void SyncDirtyFlags()
|
||||
{
|
||||
SyncDirtyFlags(VRAMMap_ABG, VRAMWritten_ABG);
|
||||
SyncDirtyFlags(VRAMMap_AOBJ, VRAMWritten_AOBJ);
|
||||
SyncDirtyFlags(VRAMMap_BBG, VRAMWritten_BBG);
|
||||
SyncDirtyFlags(VRAMMap_BOBJ, VRAMWritten_BOBJ);
|
||||
SyncDirtyFlags(VRAMMap_ARM7, VRAMWritten_ARM7);
|
||||
}
|
||||
|
||||
template <u32 MappingGranularity, u32 Size>
|
||||
inline bool CopyLinearVRAM(u8* flat, u32* mappings, NonStupidBitField<Size>& dirty, u64 (*slowAccess)(u32 addr))
|
||||
{
|
||||
const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
|
||||
|
||||
bool change = false;
|
||||
|
||||
typename NonStupidBitField<Size>::Iterator it = dirty.Begin();
|
||||
while (it != dirty.End())
|
||||
{
|
||||
u32 offset = *it * VRAMDirtyGranularity;
|
||||
u8* dst = flat + offset;
|
||||
u8* fastAccess = GetUniqueBankPtr(mappings[*it / VRAMBitsPerMapping], offset);
|
||||
if (fastAccess)
|
||||
{
|
||||
memcpy(dst, fastAccess, VRAMDirtyGranularity);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (u32 i = 0; i < VRAMDirtyGranularity; i += 8)
|
||||
*(u64*)&dst[i] = slowAccess(offset + i);
|
||||
}
|
||||
change = true;
|
||||
it++;
|
||||
}
|
||||
return change;
|
||||
}
|
||||
|
||||
bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
|
||||
{
|
||||
return CopyLinearVRAM<128*1024>(VRAMFlat_Texture, VRAMMap_Texture, dirty, ReadVRAM_Texture<u64>);
|
||||
}
|
||||
bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
|
||||
{
|
||||
return CopyLinearVRAM<16*1024>(VRAMFlat_TexPal, VRAMMap_TexPal, dirty, ReadVRAM_TexPal<u64>);
|
||||
}
|
||||
|
||||
bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
|
||||
{
|
||||
return CopyLinearVRAM<16*1024>(VRAMFlat_ABG, VRAMMap_ABG, dirty, ReadVRAM_ABG<u64>);
|
||||
}
|
||||
bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
|
||||
{
|
||||
return CopyLinearVRAM<16*1024>(VRAMFlat_BBG, VRAMMap_BBG, dirty, ReadVRAM_BBG<u64>);
|
||||
}
|
||||
|
||||
bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty)
|
||||
{
|
||||
return CopyLinearVRAM<16*1024>(VRAMFlat_AOBJ, VRAMMap_AOBJ, dirty, ReadVRAM_AOBJ<u64>);
|
||||
}
|
||||
bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
|
||||
{
|
||||
return CopyLinearVRAM<16*1024>(VRAMFlat_BOBJ, VRAMMap_BOBJ, dirty, ReadVRAM_BOBJ<u64>);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T ReadVRAM_ABGExtPal(u32 addr)
|
||||
{
|
||||
u32 mask = VRAMMap_ABGExtPal[(addr >> 13) & 0x3];
|
||||
|
||||
T ret = 0;
|
||||
if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0x7FFF];
|
||||
if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF];
|
||||
if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T ReadVRAM_BBGExtPal(u32 addr)
|
||||
{
|
||||
u32 mask = VRAMMap_BBGExtPal[(addr >> 13) & 0x3];
|
||||
|
||||
T ret = 0;
|
||||
if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T ReadVRAM_AOBJExtPal(u32 addr)
|
||||
{
|
||||
u32 mask = VRAMMap_AOBJExtPal;
|
||||
|
||||
T ret = 0;
|
||||
if (mask & (1<<4)) ret |= *(T*)&VRAM_F[addr & 0x1FFF];
|
||||
if (mask & (1<<5)) ret |= *(T*)&VRAM_G[addr & 0x1FFF];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T ReadVRAM_BOBJExtPal(u32 addr)
|
||||
{
|
||||
u32 mask = VRAMMap_BOBJExtPal;
|
||||
|
||||
T ret = 0;
|
||||
if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x1FFF];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
|
||||
{
|
||||
return CopyLinearVRAM<8*1024>(VRAMFlat_ABGExtPal, VRAMMap_ABGExtPal, dirty, ReadVRAM_ABGExtPal<u64>);
|
||||
}
|
||||
bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
|
||||
{
|
||||
return CopyLinearVRAM<8*1024>(VRAMFlat_BBGExtPal, VRAMMap_BBGExtPal, dirty, ReadVRAM_BBGExtPal<u64>);
|
||||
}
|
||||
|
||||
bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
|
||||
{
|
||||
return CopyLinearVRAM<8*1024>(VRAMFlat_AOBJExtPal, &VRAMMap_AOBJExtPal, dirty, ReadVRAM_AOBJExtPal<u64>);
|
||||
}
|
||||
bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
|
||||
{
|
||||
return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, ReadVRAM_BOBJExtPal<u64>);
|
||||
}
|
||||
|
||||
}
|
87
src/GPU.h
87
src/GPU.h
@ -20,6 +20,7 @@
|
||||
#define GPU_H
|
||||
|
||||
#include "GPU2D.h"
|
||||
#include "NonStupidBitfield.h"
|
||||
|
||||
namespace GPU
|
||||
{
|
||||
@ -45,7 +46,7 @@ extern u8 VRAM_G[ 16*1024];
|
||||
extern u8 VRAM_H[ 32*1024];
|
||||
extern u8 VRAM_I[ 16*1024];
|
||||
|
||||
extern u8* VRAM[9];
|
||||
extern u8* const VRAM[9];
|
||||
|
||||
extern u32 VRAMMap_LCDC;
|
||||
extern u32 VRAMMap_ABG[0x20];
|
||||
@ -73,6 +74,73 @@ extern GPU2D* GPU2D_B;
|
||||
|
||||
extern int Renderer;
|
||||
|
||||
const u32 VRAMDirtyGranularity = 512;
|
||||
|
||||
extern NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
|
||||
extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
|
||||
extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
|
||||
extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
|
||||
extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
|
||||
|
||||
extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
|
||||
|
||||
template <u32 Size, u32 MappingGranularity>
|
||||
struct VRAMTrackingSet
|
||||
{
|
||||
u16 Mapping[Size / MappingGranularity];
|
||||
|
||||
const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
|
||||
|
||||
void Reset()
|
||||
{
|
||||
memset(Mapping, 0, sizeof(Mapping));
|
||||
}
|
||||
NonStupidBitField<Size/VRAMDirtyGranularity> DeriveState(u32* currentMappings);
|
||||
};
|
||||
|
||||
extern VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
|
||||
extern VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
|
||||
extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
|
||||
extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
|
||||
|
||||
extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
|
||||
extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
|
||||
extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
|
||||
extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
|
||||
|
||||
extern VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
|
||||
extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
|
||||
|
||||
extern u8 VRAMFlat_ABG[512*1024];
|
||||
extern u8 VRAMFlat_BBG[128*1024];
|
||||
extern u8 VRAMFlat_AOBJ[256*1024];
|
||||
extern u8 VRAMFlat_BOBJ[128*1024];
|
||||
|
||||
extern u8 VRAMFlat_ABGExtPal[32*1024];
|
||||
extern u8 VRAMFlat_BBGExtPal[32*1024];
|
||||
|
||||
extern u8 VRAMFlat_AOBJExtPal[8*1024];
|
||||
extern u8 VRAMFlat_BOBJExtPal[8*1024];
|
||||
|
||||
extern u8 VRAMFlat_Texture[512*1024];
|
||||
extern u8 VRAMFlat_TexPal[128*1024];
|
||||
|
||||
bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
|
||||
bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
|
||||
|
||||
bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty);
|
||||
bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
|
||||
|
||||
bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
|
||||
bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
|
||||
|
||||
bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
|
||||
bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
|
||||
|
||||
bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
|
||||
bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
|
||||
|
||||
void SyncDirtyFlags();
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -233,7 +301,11 @@ void WriteVRAM_LCDC(u32 addr, T val)
|
||||
default: return;
|
||||
}
|
||||
|
||||
if (VRAMMap_LCDC & (1<<bank)) *(T*)&VRAM[bank][addr] = val;
|
||||
if (VRAMMap_LCDC & (1<<bank))
|
||||
{
|
||||
*(T*)&VRAM[bank][addr] = val;
|
||||
VRAMDirty[bank][addr / VRAMDirtyGranularity] = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -262,6 +334,8 @@ void WriteVRAM_ABG(u32 addr, T val)
|
||||
{
|
||||
u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F];
|
||||
|
||||
VRAMWritten_ABG[(addr & 0x7FFFF) / VRAMDirtyGranularity] = true;
|
||||
|
||||
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
|
||||
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
|
||||
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
|
||||
@ -295,6 +369,8 @@ void WriteVRAM_AOBJ(u32 addr, T val)
|
||||
{
|
||||
u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF];
|
||||
|
||||
VRAMWritten_AOBJ[(addr & 0x3FFFF) / VRAMDirtyGranularity] = true;
|
||||
|
||||
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
|
||||
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
|
||||
if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val;
|
||||
@ -324,6 +400,8 @@ void WriteVRAM_BBG(u32 addr, T val)
|
||||
{
|
||||
u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7];
|
||||
|
||||
VRAMWritten_BBG[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
|
||||
|
||||
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
|
||||
if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val;
|
||||
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
|
||||
@ -350,11 +428,12 @@ void WriteVRAM_BOBJ(u32 addr, T val)
|
||||
{
|
||||
u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7];
|
||||
|
||||
VRAMWritten_BOBJ[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
|
||||
|
||||
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
|
||||
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
T ReadVRAM_ARM7(u32 addr)
|
||||
{
|
||||
@ -372,6 +451,8 @@ void WriteVRAM_ARM7(u32 addr, T val)
|
||||
{
|
||||
u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1];
|
||||
|
||||
VRAMWritten_ARM7[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
|
||||
|
||||
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
|
||||
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
|
||||
}
|
||||
|
294
src/GPU2D.cpp
294
src/GPU2D.cpp
@ -148,12 +148,6 @@ void GPU2D::Reset()
|
||||
CaptureCnt = 0;
|
||||
|
||||
MasterBrightness = 0;
|
||||
|
||||
BGExtPalStatus[0] = 0;
|
||||
BGExtPalStatus[1] = 0;
|
||||
BGExtPalStatus[2] = 0;
|
||||
BGExtPalStatus[3] = 0;
|
||||
OBJExtPalStatus = 0;
|
||||
}
|
||||
|
||||
void GPU2D::DoSavestate(Savestate* file)
|
||||
@ -208,13 +202,6 @@ void GPU2D::DoSavestate(Savestate* file)
|
||||
|
||||
if (!file->Saving)
|
||||
{
|
||||
// refresh those
|
||||
BGExtPalStatus[0] = 0;
|
||||
BGExtPalStatus[1] = 0;
|
||||
BGExtPalStatus[2] = 0;
|
||||
BGExtPalStatus[3] = 0;
|
||||
OBJExtPalStatus = 0;
|
||||
|
||||
CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]];
|
||||
CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[0]];
|
||||
}
|
||||
@ -758,6 +745,25 @@ void GPU2D::DrawScanline(u32 line)
|
||||
int n3dline = line;
|
||||
line = GPU::VCount;
|
||||
|
||||
if (Num == 0)
|
||||
{
|
||||
auto bgDirty = GPU::VRAMDirty_ABG.DeriveState(GPU::VRAMMap_ABG);
|
||||
GPU::MakeVRAMFlat_ABGCoherent(bgDirty);
|
||||
auto bgExtPalDirty = GPU::VRAMDirty_ABGExtPal.DeriveState(GPU::VRAMMap_ABGExtPal);
|
||||
GPU::MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty);
|
||||
auto objExtPalDirty = GPU::VRAMDirty_AOBJExtPal.DeriveState(&GPU::VRAMMap_AOBJExtPal);
|
||||
GPU::MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto bgDirty = GPU::VRAMDirty_BBG.DeriveState(GPU::VRAMMap_BBG);
|
||||
GPU::MakeVRAMFlat_BBGCoherent(bgDirty);
|
||||
auto bgExtPalDirty = GPU::VRAMDirty_BBGExtPal.DeriveState(GPU::VRAMMap_BBGExtPal);
|
||||
GPU::MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty);
|
||||
auto objExtPalDirty = GPU::VRAMDirty_BOBJExtPal.DeriveState(&GPU::VRAMMap_BOBJExtPal);
|
||||
GPU::MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty);
|
||||
}
|
||||
|
||||
bool forceblank = false;
|
||||
|
||||
// scanlines that end up outside of the GPU drawing range
|
||||
@ -970,6 +976,9 @@ void GPU2D::DoCapture(u32 line, u32 width)
|
||||
u16* dst = (u16*)GPU::VRAM[dstvram];
|
||||
u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width);
|
||||
|
||||
static_assert(GPU::VRAMDirtyGranularity == 512);
|
||||
GPU::VRAMDirty[dstvram][(dstaddr & 0x1FFFF) / GPU::VRAMDirtyGranularity] = true;
|
||||
|
||||
// TODO: handle 3D in accelerated mode!!
|
||||
|
||||
u32* srcA;
|
||||
@ -1188,85 +1197,20 @@ void GPU2D::SampleFIFO(u32 offset, u32 num)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void GPU2D::BGExtPalDirty(u32 base)
|
||||
{
|
||||
BGExtPalStatus[base] = 0;
|
||||
BGExtPalStatus[base+1] = 0;
|
||||
}
|
||||
|
||||
void GPU2D::OBJExtPalDirty()
|
||||
{
|
||||
OBJExtPalStatus = 0;
|
||||
}
|
||||
|
||||
|
||||
u16* GPU2D::GetBGExtPal(u32 slot, u32 pal)
|
||||
{
|
||||
u16* dst = &BGExtPalCache[slot][pal << 8];
|
||||
|
||||
if (!(BGExtPalStatus[slot] & (1<<pal)))
|
||||
{
|
||||
if (Num)
|
||||
{
|
||||
if (GPU::VRAMMap_BBGExtPal[slot] & (1<<7))
|
||||
memcpy(dst, &GPU::VRAM_H[(slot << 13) + (pal << 9)], 256*2);
|
||||
else
|
||||
memset(dst, 0, 256*2);
|
||||
}
|
||||
else
|
||||
{
|
||||
memset(dst, 0, 256*2);
|
||||
|
||||
if (GPU::VRAMMap_ABGExtPal[slot] & (1<<4))
|
||||
for (int i = 0; i < 256; i+=2)
|
||||
*(u32*)&dst[i] |= *(u32*)&GPU::VRAM_E[(slot << 13) + (pal << 9) + (i << 1)];
|
||||
|
||||
if (GPU::VRAMMap_ABGExtPal[slot] & (1<<5))
|
||||
for (int i = 0; i < 256; i+=2)
|
||||
*(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[((slot&1) << 13) + (pal << 9) + (i << 1)];
|
||||
|
||||
if (GPU::VRAMMap_ABGExtPal[slot] & (1<<6))
|
||||
for (int i = 0; i < 256; i+=2)
|
||||
*(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[((slot&1) << 13) + (pal << 9) + (i << 1)];
|
||||
}
|
||||
|
||||
BGExtPalStatus[slot] |= (1<<pal);
|
||||
}
|
||||
|
||||
return dst;
|
||||
const u32 PaletteSize = 256 * 2;
|
||||
const u32 SlotSize = PaletteSize * 16;
|
||||
return (u16*)&(Num == 0
|
||||
? GPU::VRAMFlat_ABGExtPal
|
||||
: GPU::VRAMFlat_BBGExtPal)[slot * SlotSize + pal * PaletteSize];
|
||||
}
|
||||
|
||||
u16* GPU2D::GetOBJExtPal()
|
||||
{
|
||||
u16* dst = OBJExtPalCache;
|
||||
|
||||
if (!OBJExtPalStatus)
|
||||
{
|
||||
if (Num)
|
||||
{
|
||||
if (GPU::VRAMMap_BOBJExtPal & (1<<8))
|
||||
memcpy(dst, &GPU::VRAM_I[0], 16*256*2);
|
||||
else
|
||||
memset(dst, 0, 16*256*2);
|
||||
}
|
||||
else
|
||||
{
|
||||
memset(dst, 0, 16*256*2);
|
||||
|
||||
if (GPU::VRAMMap_AOBJExtPal & (1<<5))
|
||||
for (int i = 0; i < 16*256; i+=2)
|
||||
*(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[i << 1];
|
||||
|
||||
if (GPU::VRAMMap_AOBJExtPal & (1<<6))
|
||||
for (int i = 0; i < 16*256; i+=2)
|
||||
*(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[i << 1];
|
||||
}
|
||||
|
||||
OBJExtPalStatus = 1;
|
||||
}
|
||||
|
||||
return dst;
|
||||
return Num == 0
|
||||
? (u16*)GPU::VRAMFlat_AOBJExtPal
|
||||
: (u16*)GPU::VRAMFlat_BOBJExtPal;
|
||||
}
|
||||
|
||||
|
||||
@ -1697,6 +1641,20 @@ void GPU2D::DrawBG_3D()
|
||||
}
|
||||
}
|
||||
|
||||
void GetBGVRAM(u32 num, u8*& data, u32& mask)
|
||||
{
|
||||
if (num == 0)
|
||||
{
|
||||
data = GPU::VRAMFlat_ABG;
|
||||
mask = 0x7FFFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
data = GPU::VRAMFlat_BBG;
|
||||
mask = 0x1FFFF;
|
||||
}
|
||||
}
|
||||
|
||||
template<bool mosaic, GPU2D::DrawPixel drawPixel>
|
||||
void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
|
||||
{
|
||||
@ -1720,17 +1678,20 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
|
||||
extpal = (DispCnt & 0x40000000);
|
||||
if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum;
|
||||
|
||||
u8* bgvram;
|
||||
u32 bgvrammask;
|
||||
GetBGVRAM(Num, bgvram, bgvrammask);
|
||||
if (Num)
|
||||
{
|
||||
tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
|
||||
tilesetaddr = ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = ((bgcnt & 0x1F00) << 3);
|
||||
|
||||
pal = (u16*)&GPU::Palette[0x400];
|
||||
}
|
||||
else
|
||||
{
|
||||
tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
|
||||
tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
|
||||
|
||||
pal = (u16*)&GPU::Palette[0];
|
||||
}
|
||||
@ -1758,7 +1719,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
|
||||
// preload shit as needed
|
||||
if ((xoff & 0x7) || mosaic)
|
||||
{
|
||||
curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
|
||||
curtile = *(u16*)&bgvram[(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)) & bgvrammask];
|
||||
|
||||
if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
|
||||
else curpal = pal;
|
||||
@ -1779,7 +1740,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
|
||||
(mosaic && ((xpos >> 3) != (lastxpos >> 3))))
|
||||
{
|
||||
// load a new tile
|
||||
curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3));
|
||||
curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask];
|
||||
|
||||
if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
|
||||
else curpal = pal;
|
||||
@ -1794,7 +1755,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
|
||||
if (WindowMask[i] & (1<<bgnum))
|
||||
{
|
||||
u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7);
|
||||
color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff);
|
||||
color = bgvram[(pixelsaddr + tilexoff) & bgvrammask];
|
||||
|
||||
if (color)
|
||||
drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
|
||||
@ -1810,7 +1771,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
|
||||
// preload shit as needed
|
||||
if ((xoff & 0x7) || mosaic)
|
||||
{
|
||||
curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
|
||||
curtile = *(u16*)&bgvram[((tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3))) & bgvrammask];
|
||||
curpal = pal + ((curtile & 0xF000) >> 8);
|
||||
pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
|
||||
+ (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
|
||||
@ -1828,7 +1789,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
|
||||
(mosaic && ((xpos >> 3) != (lastxpos >> 3))))
|
||||
{
|
||||
// load a new tile
|
||||
curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3));
|
||||
curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask];
|
||||
curpal = pal + ((curtile & 0xF000) >> 8);
|
||||
pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
|
||||
+ (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
|
||||
@ -1842,11 +1803,11 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
|
||||
u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7);
|
||||
if (tilexoff & 0x1)
|
||||
{
|
||||
color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) >> 4;
|
||||
color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] >> 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) & 0x0F;
|
||||
color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] & 0x0F;
|
||||
}
|
||||
|
||||
if (color)
|
||||
@ -1895,17 +1856,20 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum)
|
||||
rotY -= (BGMosaicY * rotD);
|
||||
}
|
||||
|
||||
u8* bgvram;
|
||||
u32 bgvrammask;
|
||||
|
||||
if (Num)
|
||||
{
|
||||
tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
|
||||
tilesetaddr = ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = ((bgcnt & 0x1F00) << 3);
|
||||
|
||||
pal = (u16*)&GPU::Palette[0x400];
|
||||
}
|
||||
else
|
||||
{
|
||||
tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
|
||||
tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
|
||||
|
||||
pal = (u16*)&GPU::Palette[0];
|
||||
}
|
||||
@ -1934,13 +1898,13 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum)
|
||||
|
||||
if ((!((finalX|finalY) & overflowmask)))
|
||||
{
|
||||
curtile = GPU::ReadVRAM_BG<u8>(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)));
|
||||
curtile = bgvram[(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11))) & bgvrammask];
|
||||
|
||||
// draw pixel
|
||||
u32 tilexoff = (finalX >> 8) & 0x7;
|
||||
u32 tileyoff = (finalY >> 8) & 0x7;
|
||||
|
||||
color = GPU::ReadVRAM_BG<u8>(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff);
|
||||
color = bgvram[(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
|
||||
|
||||
if (color)
|
||||
drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
|
||||
@ -1964,6 +1928,10 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
|
||||
u16* pal;
|
||||
u32 extpal;
|
||||
|
||||
u8* bgvram;
|
||||
u32 bgvrammask;
|
||||
GetBGVRAM(Num, bgvram, bgvrammask);
|
||||
|
||||
extpal = (DispCnt & 0x40000000);
|
||||
|
||||
s16 rotA = BGRotA[bgnum-2];
|
||||
@ -2007,8 +1975,8 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
|
||||
ofymask = ~ymask;
|
||||
}
|
||||
|
||||
if (Num) tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 6);
|
||||
else tilemapaddr = 0x06000000 + ((bgcnt & 0x1F00) << 6);
|
||||
if (Num) tilemapaddr = ((bgcnt & 0x1F00) << 6);
|
||||
else tilemapaddr = ((bgcnt & 0x1F00) << 6);
|
||||
|
||||
if (bgcnt & 0x0004)
|
||||
{
|
||||
@ -2035,7 +2003,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
|
||||
|
||||
if (!(finalX & ofxmask) && !(finalY & ofymask))
|
||||
{
|
||||
color = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1));
|
||||
color = *(u16*)&bgvram[(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)) & bgvrammask];
|
||||
|
||||
if (color & 0x8000)
|
||||
drawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum);
|
||||
@ -2074,7 +2042,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
|
||||
|
||||
if (!(finalX & ofxmask) && !(finalY & ofymask))
|
||||
{
|
||||
color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8));
|
||||
color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
|
||||
|
||||
if (color)
|
||||
drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
|
||||
@ -2106,15 +2074,15 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
|
||||
|
||||
if (Num)
|
||||
{
|
||||
tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
|
||||
tilesetaddr = ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = ((bgcnt & 0x1F00) << 3);
|
||||
|
||||
pal = (u16*)&GPU::Palette[0x400];
|
||||
}
|
||||
else
|
||||
{
|
||||
tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
|
||||
tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
|
||||
tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
|
||||
|
||||
pal = (u16*)&GPU::Palette[0];
|
||||
}
|
||||
@ -2144,7 +2112,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
|
||||
|
||||
if ((!((finalX|finalY) & overflowmask)))
|
||||
{
|
||||
curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1));
|
||||
curtile = *(u16*)&bgvram[(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)) & bgvrammask];
|
||||
|
||||
if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12);
|
||||
else curpal = pal;
|
||||
@ -2156,7 +2124,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
|
||||
if (curtile & 0x0400) tilexoff = 7-tilexoff;
|
||||
if (curtile & 0x0800) tileyoff = 7-tileyoff;
|
||||
|
||||
color = GPU::ReadVRAM_BG<u8>(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff);
|
||||
color = bgvram[(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
|
||||
|
||||
if (color)
|
||||
drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
|
||||
@ -2222,8 +2190,9 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2
|
||||
rotY -= (BGMosaicY * rotD);
|
||||
}
|
||||
|
||||
if (Num) tilemapaddr = 0x06200000;
|
||||
else tilemapaddr = 0x06000000;
|
||||
u8* bgvram;
|
||||
u32 bgvrammask;
|
||||
GetBGVRAM(Num, bgvram, bgvrammask);
|
||||
|
||||
// 256-color bitmap
|
||||
|
||||
@ -2251,7 +2220,7 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2
|
||||
|
||||
if (!(finalX & ofxmask) && !(finalY & ofymask))
|
||||
{
|
||||
color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8));
|
||||
color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
|
||||
|
||||
if (color)
|
||||
drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2);
|
||||
@ -2346,6 +2315,20 @@ void GPU2D::InterleaveSprites(u32 prio)
|
||||
}
|
||||
}
|
||||
|
||||
void GetOBJVRAM(u32 num, u8*& data, u32& mask)
|
||||
{
|
||||
if (num == 0)
|
||||
{
|
||||
data = GPU::VRAMFlat_AOBJ;
|
||||
mask = 0x3FFFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
data = GPU::VRAMFlat_BOBJ;
|
||||
mask = 0x1FFFF;
|
||||
}
|
||||
}
|
||||
|
||||
#define DoDrawSprite(type, ...) \
|
||||
if (iswin) \
|
||||
{ \
|
||||
@ -2370,6 +2353,17 @@ void GPU2D::DrawSprites(u32 line)
|
||||
OBJMosaicYCount = 0;
|
||||
}
|
||||
|
||||
if (Num == 0)
|
||||
{
|
||||
auto objDirty = GPU::VRAMDirty_AOBJ.DeriveState(GPU::VRAMMap_AOBJ);
|
||||
GPU::MakeVRAMFlat_AOBJCoherent(objDirty);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto objDirty = GPU::VRAMDirty_BOBJ.DeriveState(GPU::VRAMMap_BOBJ);
|
||||
GPU::MakeVRAMFlat_BOBJCoherent(objDirty);
|
||||
}
|
||||
|
||||
NumSprites = 0;
|
||||
memset(OBJLine, 0, 256*4);
|
||||
memset(OBJWindow, 0, 256);
|
||||
@ -2482,6 +2476,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
|
||||
|
||||
u32 ytilefactor;
|
||||
|
||||
u8* objvram;
|
||||
u32 objvrammask;
|
||||
GetOBJVRAM(Num, objvram, objvrammask);
|
||||
|
||||
s32 centerX = boundwidth >> 1;
|
||||
s32 centerY = boundheight >> 1;
|
||||
|
||||
@ -2525,6 +2523,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
|
||||
|
||||
pixelattr |= (0xC0000000 | (alpha << 24));
|
||||
|
||||
u32 pixelsaddr;
|
||||
if (DispCnt & 0x40)
|
||||
{
|
||||
if (DispCnt & 0x20)
|
||||
@ -2536,7 +2535,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
|
||||
}
|
||||
else
|
||||
{
|
||||
tilenum <<= (7 + ((DispCnt >> 22) & 0x1));
|
||||
pixelsaddr = tilenum << (7 + ((DispCnt >> 22) & 0x1));
|
||||
ytilefactor = ((width >> 8) * 2);
|
||||
}
|
||||
}
|
||||
@ -2544,23 +2543,21 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
|
||||
{
|
||||
if (DispCnt & 0x20)
|
||||
{
|
||||
tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
|
||||
pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
|
||||
ytilefactor = (256 * 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
|
||||
pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
|
||||
ytilefactor = (128 * 2);
|
||||
}
|
||||
}
|
||||
|
||||
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
|
||||
|
||||
for (; xoff < boundwidth;)
|
||||
{
|
||||
if ((u32)rotX < width && (u32)rotY < height)
|
||||
{
|
||||
color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1));
|
||||
color = *(u16*)&objvram[(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)) & objvrammask];
|
||||
|
||||
if (color & 0x8000)
|
||||
{
|
||||
@ -2585,9 +2582,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 pixelsaddr = tilenum;
|
||||
if (DispCnt & 0x10)
|
||||
{
|
||||
tilenum <<= ((DispCnt >> 20) & 0x3);
|
||||
pixelsaddr <<= ((DispCnt >> 20) & 0x3);
|
||||
ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0);
|
||||
}
|
||||
else
|
||||
@ -2601,9 +2599,8 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
|
||||
if (attrib[0] & 0x2000)
|
||||
{
|
||||
// 256-color
|
||||
tilenum <<= 5;
|
||||
ytilefactor <<= 5;
|
||||
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
|
||||
pixelsaddr <<= 5;
|
||||
|
||||
if (!window)
|
||||
{
|
||||
@ -2617,7 +2614,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
|
||||
{
|
||||
if ((u32)rotX < width && (u32)rotY < height)
|
||||
{
|
||||
color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8));
|
||||
color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)) & objvrammask];
|
||||
|
||||
if (color)
|
||||
{
|
||||
@ -2657,7 +2654,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
|
||||
{
|
||||
if ((u32)rotX < width && (u32)rotY < height)
|
||||
{
|
||||
color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9));
|
||||
color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)) & objvrammask];
|
||||
if (rotX & 0x100)
|
||||
color >>= 4;
|
||||
else
|
||||
@ -2705,6 +2702,10 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
|
||||
pixelattr |= 0x100000;
|
||||
}
|
||||
|
||||
u8* objvram;
|
||||
u32 objvrammask;
|
||||
GetOBJVRAM(Num, objvram, objvrammask);
|
||||
|
||||
// yflip
|
||||
if (attrib[1] & 0x2000)
|
||||
ypos = height-1 - ypos;
|
||||
@ -2735,6 +2736,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
|
||||
|
||||
pixelattr |= (0xC0000000 | (alpha << 24));
|
||||
|
||||
u32 pixelsaddr = tilenum;
|
||||
if (DispCnt & 0x40)
|
||||
{
|
||||
if (DispCnt & 0x20)
|
||||
@ -2746,25 +2748,24 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
|
||||
}
|
||||
else
|
||||
{
|
||||
tilenum <<= (7 + ((DispCnt >> 22) & 0x1));
|
||||
tilenum += (ypos * width * 2);
|
||||
pixelsaddr <<= (7 + ((DispCnt >> 22) & 0x1));
|
||||
pixelsaddr += (ypos * width * 2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (DispCnt & 0x20)
|
||||
{
|
||||
tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
|
||||
tilenum += (ypos * 256 * 2);
|
||||
pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
|
||||
pixelsaddr += (ypos * 256 * 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
|
||||
tilenum += (ypos * 128 * 2);
|
||||
pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
|
||||
pixelsaddr += (ypos * 128 * 2);
|
||||
}
|
||||
}
|
||||
|
||||
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
|
||||
s32 pixelstride;
|
||||
|
||||
if (attrib[1] & 0x1000) // xflip
|
||||
@ -2781,7 +2782,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
|
||||
|
||||
for (; xoff < xend;)
|
||||
{
|
||||
color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr);
|
||||
color = *(u16*)&objvram[pixelsaddr & objvrammask];
|
||||
|
||||
pixelsaddr += pixelstride;
|
||||
|
||||
@ -2805,14 +2806,15 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 pixelsaddr = tilenum;
|
||||
if (DispCnt & 0x10)
|
||||
{
|
||||
tilenum <<= ((DispCnt >> 20) & 0x3);
|
||||
tilenum += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0);
|
||||
pixelsaddr <<= ((DispCnt >> 20) & 0x3);
|
||||
pixelsaddr += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0);
|
||||
}
|
||||
else
|
||||
{
|
||||
tilenum += ((ypos >> 3) * 0x20);
|
||||
pixelsaddr += ((ypos >> 3) * 0x20);
|
||||
}
|
||||
|
||||
if (spritemode == 1) pixelattr |= 0x80000000;
|
||||
@ -2821,8 +2823,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
|
||||
if (attrib[0] & 0x2000)
|
||||
{
|
||||
// 256-color
|
||||
tilenum <<= 5;
|
||||
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
|
||||
pixelsaddr <<= 5;
|
||||
pixelsaddr += ((ypos & 0x7) << 3);
|
||||
s32 pixelstride;
|
||||
|
||||
@ -2851,7 +2852,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
|
||||
|
||||
for (; xoff < xend;)
|
||||
{
|
||||
color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr);
|
||||
color = objvram[pixelsaddr];
|
||||
|
||||
pixelsaddr += pixelstride;
|
||||
|
||||
@ -2877,8 +2878,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
|
||||
else
|
||||
{
|
||||
// 16-color
|
||||
tilenum <<= 5;
|
||||
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
|
||||
pixelsaddr <<= 5;
|
||||
pixelsaddr += ((ypos & 0x7) << 2);
|
||||
s32 pixelstride;
|
||||
|
||||
@ -2910,13 +2910,13 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
|
||||
{
|
||||
if (attrib[1] & 0x1000)
|
||||
{
|
||||
if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F; pixelsaddr--; }
|
||||
else color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4;
|
||||
if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] & 0x0F; pixelsaddr--; }
|
||||
else color = objvram[pixelsaddr & objvrammask] >> 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4; pixelsaddr++; }
|
||||
else color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F;
|
||||
if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] >> 4; pixelsaddr++; }
|
||||
else color = objvram[pixelsaddr & objvrammask] & 0x0F;
|
||||
}
|
||||
|
||||
if (color)
|
||||
|
@ -59,9 +59,6 @@ public:
|
||||
|
||||
void CheckWindows(u32 line);
|
||||
|
||||
void BGExtPalDirty(u32 base);
|
||||
void OBJExtPalDirty();
|
||||
|
||||
u16* GetBGExtPal(u32 slot, u32 pal);
|
||||
u16* GetOBJExtPal();
|
||||
|
||||
@ -128,9 +125,6 @@ private:
|
||||
u16 MasterBrightness;
|
||||
|
||||
u16 BGExtPalCache[4][16*256];
|
||||
u16 OBJExtPalCache[16*256];
|
||||
u32 BGExtPalStatus[4];
|
||||
u32 OBJExtPalStatus;
|
||||
|
||||
u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
|
||||
u32 ColorBlend5(u32 val1, u32 val2);
|
||||
|
@ -179,6 +179,8 @@ u8 RenderFogDensityTable[34];
|
||||
|
||||
u32 RenderClearAttr1, RenderClearAttr2;
|
||||
|
||||
bool RenderFrameIdentical;
|
||||
|
||||
u32 ZeroDotWLimit;
|
||||
|
||||
u32 GXStat;
|
||||
@ -2491,6 +2493,19 @@ void VBlank()
|
||||
}
|
||||
|
||||
RenderNumPolygons = NumPolygons;
|
||||
RenderFrameIdentical = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
RenderFrameIdentical = RenderDispCnt == DispCnt
|
||||
&& RenderAlphaRef == AlphaRef
|
||||
&& RenderClearAttr1 == ClearAttr1
|
||||
&& RenderClearAttr2 == ClearAttr2
|
||||
&& RenderFogColor == FogColor
|
||||
&& RenderFogOffset == FogOffset * 0x200
|
||||
&& memcmp(RenderEdgeTable, EdgeTable, 8*2) == 0
|
||||
&& memcmp(RenderFogDensityTable + 1, FogDensityTable, 32) == 0
|
||||
&& memcmp(RenderToonTable, ToonTable, 32*2) == 0;
|
||||
}
|
||||
|
||||
RenderDispCnt = DispCnt;
|
||||
|
@ -87,6 +87,8 @@ extern u8 RenderFogDensityTable[34];
|
||||
|
||||
extern u32 RenderClearAttr1, RenderClearAttr2;
|
||||
|
||||
extern bool RenderFrameIdentical;
|
||||
|
||||
extern std::array<Polygon*,2048> RenderPolygonRAM;
|
||||
extern u32 RenderNumPolygons;
|
||||
|
||||
|
@ -58,6 +58,8 @@ bool PrevIsShadowMask;
|
||||
|
||||
bool Enabled;
|
||||
|
||||
bool FrameIdentical;
|
||||
|
||||
// threading
|
||||
|
||||
bool Threaded;
|
||||
@ -550,6 +552,16 @@ typedef struct
|
||||
|
||||
RendererPolygon PolygonList[2048];
|
||||
|
||||
template <typename T>
|
||||
inline T ReadVRAM_Texture(u32 addr)
|
||||
{
|
||||
return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF];
|
||||
}
|
||||
template <typename T>
|
||||
inline T ReadVRAM_TexPal(u32 addr)
|
||||
{
|
||||
return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF];
|
||||
}
|
||||
|
||||
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
|
||||
{
|
||||
@ -606,10 +618,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
|
||||
case 1: // A3I5
|
||||
{
|
||||
vramaddr += ((t * width) + s);
|
||||
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
|
||||
u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
|
||||
|
||||
texpal <<= 4;
|
||||
*color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
|
||||
*color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
|
||||
*alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
|
||||
}
|
||||
break;
|
||||
@ -617,12 +629,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
|
||||
case 2: // 4-color
|
||||
{
|
||||
vramaddr += (((t * width) + s) >> 2);
|
||||
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
|
||||
u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
|
||||
pixel >>= ((s & 0x3) << 1);
|
||||
pixel &= 0x3;
|
||||
|
||||
texpal <<= 3;
|
||||
*color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
|
||||
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
|
||||
*alpha = (pixel==0) ? alpha0 : 31;
|
||||
}
|
||||
break;
|
||||
@ -630,12 +642,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
|
||||
case 3: // 16-color
|
||||
{
|
||||
vramaddr += (((t * width) + s) >> 1);
|
||||
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
|
||||
u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
|
||||
if (s & 0x1) pixel >>= 4;
|
||||
else pixel &= 0xF;
|
||||
|
||||
texpal <<= 4;
|
||||
*color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
|
||||
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
|
||||
*alpha = (pixel==0) ? alpha0 : 31;
|
||||
}
|
||||
break;
|
||||
@ -643,10 +655,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
|
||||
case 4: // 256-color
|
||||
{
|
||||
vramaddr += ((t * width) + s);
|
||||
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
|
||||
u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
|
||||
|
||||
texpal <<= 4;
|
||||
*color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
|
||||
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
|
||||
*alpha = (pixel==0) ? alpha0 : 31;
|
||||
}
|
||||
break;
|
||||
@ -660,30 +672,30 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
|
||||
if (vramaddr >= 0x40000)
|
||||
slot1addr += 0x10000;
|
||||
|
||||
u8 val = GPU::ReadVRAM_Texture<u8>(vramaddr);
|
||||
u8 val = ReadVRAM_Texture<u8>(vramaddr);
|
||||
val >>= (2 * (s & 0x3));
|
||||
|
||||
u16 palinfo = GPU::ReadVRAM_Texture<u16>(slot1addr);
|
||||
u16 palinfo = ReadVRAM_Texture<u16>(slot1addr);
|
||||
u32 paloffset = (palinfo & 0x3FFF) << 2;
|
||||
texpal <<= 4;
|
||||
|
||||
switch (val & 0x3)
|
||||
{
|
||||
case 0:
|
||||
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
|
||||
*color = ReadVRAM_TexPal<u16>(texpal + paloffset);
|
||||
*alpha = 31;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
||||
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
||||
*alpha = 31;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
if ((palinfo >> 14) == 1)
|
||||
{
|
||||
u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
|
||||
u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
||||
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
|
||||
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
||||
|
||||
u32 r0 = color0 & 0x001F;
|
||||
u32 g0 = color0 & 0x03E0;
|
||||
@ -700,8 +712,8 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
|
||||
}
|
||||
else if ((palinfo >> 14) == 3)
|
||||
{
|
||||
u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
|
||||
u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
||||
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
|
||||
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
||||
|
||||
u32 r0 = color0 & 0x001F;
|
||||
u32 g0 = color0 & 0x03E0;
|
||||
@ -717,20 +729,20 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
|
||||
*color = r | g | b;
|
||||
}
|
||||
else
|
||||
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 4);
|
||||
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 4);
|
||||
*alpha = 31;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
if ((palinfo >> 14) == 2)
|
||||
{
|
||||
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 6);
|
||||
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 6);
|
||||
*alpha = 31;
|
||||
}
|
||||
else if ((palinfo >> 14) == 3)
|
||||
{
|
||||
u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
|
||||
u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
||||
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
|
||||
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
||||
|
||||
u32 r0 = color0 & 0x001F;
|
||||
u32 g0 = color0 & 0x03E0;
|
||||
@ -759,10 +771,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
|
||||
case 6: // A5I3
|
||||
{
|
||||
vramaddr += ((t * width) + s);
|
||||
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
|
||||
u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
|
||||
|
||||
texpal <<= 4;
|
||||
*color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1));
|
||||
*color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1));
|
||||
*alpha = (pixel >> 3);
|
||||
}
|
||||
break;
|
||||
@ -770,7 +782,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
|
||||
case 7: // direct color
|
||||
{
|
||||
vramaddr += (((t * width) + s) << 1);
|
||||
*color = GPU::ReadVRAM_Texture<u16>(vramaddr);
|
||||
*color = ReadVRAM_Texture<u16>(vramaddr);
|
||||
*alpha = (*color & 0x8000) ? 31 : 0;
|
||||
}
|
||||
break;
|
||||
@ -2007,8 +2019,8 @@ void ClearBuffers()
|
||||
{
|
||||
for (int x = 0; x < 256; x++)
|
||||
{
|
||||
u16 val2 = GPU::ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
|
||||
u16 val3 = GPU::ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
|
||||
u16 val2 = ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
|
||||
u16 val3 = ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
|
||||
|
||||
// TODO: confirm color conversion
|
||||
u32 r = (val2 << 1) & 0x3E; if (r) r++;
|
||||
@ -2088,11 +2100,19 @@ void VCount144()
|
||||
|
||||
void RenderFrame()
|
||||
{
|
||||
auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture);
|
||||
auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal);
|
||||
|
||||
bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty);
|
||||
bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty);
|
||||
|
||||
FrameIdentical = !(textureChanged || texPalChanged) && RenderFrameIdentical;
|
||||
|
||||
if (RenderThreadRunning)
|
||||
{
|
||||
Platform::Semaphore_Post(Sema_RenderStart);
|
||||
}
|
||||
else
|
||||
else if (!FrameIdentical)
|
||||
{
|
||||
ClearBuffers();
|
||||
RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons);
|
||||
@ -2107,8 +2127,15 @@ void RenderThreadFunc()
|
||||
if (!RenderThreadRunning) return;
|
||||
|
||||
RenderThreadRendering = true;
|
||||
ClearBuffers();
|
||||
RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons);
|
||||
if (FrameIdentical)
|
||||
{
|
||||
Platform::Semaphore_Post(Sema_ScanlineCount, 192);
|
||||
}
|
||||
else
|
||||
{
|
||||
ClearBuffers();
|
||||
RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons);
|
||||
}
|
||||
|
||||
Platform::Semaphore_Post(Sema_RenderDone);
|
||||
RenderThreadRendering = false;
|
||||
|
149
src/NonStupidBitfield.h
Normal file
149
src/NonStupidBitfield.h
Normal file
@ -0,0 +1,149 @@
|
||||
#ifndef NONSTUPIDBITFIELD_H
|
||||
#define NONSTUPIDBITFIELD_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include <memory.h>
|
||||
|
||||
#include <initializer_list>
|
||||
#include <algorithm>
|
||||
|
||||
// like std::bitset but less stupid and optimised for
|
||||
// our use case (keeping track of memory invalidations)
|
||||
|
||||
template <u32 Size>
|
||||
struct NonStupidBitField
|
||||
{
|
||||
static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8");
|
||||
static const u32 DataLength = Size / 8;
|
||||
u8 Data[DataLength];
|
||||
|
||||
struct Ref
|
||||
{
|
||||
NonStupidBitField<Size>& BitField;
|
||||
u32 Idx;
|
||||
|
||||
operator bool()
|
||||
{
|
||||
return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7));
|
||||
}
|
||||
|
||||
Ref& operator=(bool set)
|
||||
{
|
||||
BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7));
|
||||
BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7));
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
struct Iterator
|
||||
{
|
||||
NonStupidBitField<Size>& BitField;
|
||||
u32 DataIdx;
|
||||
u32 BitIdx;
|
||||
u64 RemainingBits;
|
||||
|
||||
u32 operator*() { return DataIdx * 8 + BitIdx; }
|
||||
|
||||
bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; }
|
||||
bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; }
|
||||
|
||||
template <typename T>
|
||||
void Next()
|
||||
{
|
||||
while (RemainingBits == 0 && DataIdx < DataLength)
|
||||
{
|
||||
DataIdx += sizeof(T);
|
||||
RemainingBits = *(T*)&BitField.Data[DataIdx];
|
||||
}
|
||||
|
||||
BitIdx = __builtin_ctzll(RemainingBits);
|
||||
RemainingBits &= ~(1ULL << BitIdx);
|
||||
}
|
||||
|
||||
Iterator operator++(int)
|
||||
{
|
||||
Iterator prev(*this);
|
||||
++*this;
|
||||
return prev;
|
||||
}
|
||||
|
||||
Iterator& operator++()
|
||||
{
|
||||
if ((DataLength % 8) == 0)
|
||||
Next<u64>();
|
||||
else if ((DataLength % 4) == 0)
|
||||
Next<u32>();
|
||||
else if ((DataLength % 2) == 0)
|
||||
Next<u16>();
|
||||
else
|
||||
Next<u8>();
|
||||
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
NonStupidBitField(u32 start, u32 size)
|
||||
{
|
||||
memset(Data, 0, sizeof(Data));
|
||||
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
u32 roundedStartBit = (start + 7) & ~7;
|
||||
u32 roundedEndBit = (start + size) & ~7;
|
||||
if (roundedStartBit != roundedEndBit)
|
||||
memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8);
|
||||
|
||||
if (start & 0x7)
|
||||
Data[start >> 3] = 0xFF << (start & 0x7);
|
||||
if ((start + size) & 0x7)
|
||||
Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7);
|
||||
}
|
||||
|
||||
NonStupidBitField()
|
||||
{
|
||||
memset(Data, 0, sizeof(Data));
|
||||
}
|
||||
|
||||
Iterator End()
|
||||
{
|
||||
return Iterator{*this, DataLength, 0, 0};
|
||||
}
|
||||
Iterator Begin()
|
||||
{
|
||||
if ((DataLength % 8) == 0)
|
||||
return ++Iterator{*this, 0, 0, *(u64*)Data};
|
||||
else if ((DataLength % 4) == 0)
|
||||
return ++Iterator{*this, 0, 0, *(u32*)Data};
|
||||
else if ((DataLength % 2) == 0)
|
||||
return ++Iterator{*this, 0, 0, *(u16*)Data};
|
||||
else
|
||||
return ++Iterator{*this, 0, 0, *Data};
|
||||
}
|
||||
|
||||
Ref operator[](u32 idx)
|
||||
{
|
||||
return Ref{*this, idx};
|
||||
}
|
||||
|
||||
NonStupidBitField& operator|=(const NonStupidBitField<Size>& other)
|
||||
{
|
||||
for (u32 i = 0; i < DataLength; i++)
|
||||
{
|
||||
Data[i] |= other.Data[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
NonStupidBitField& operator&=(const NonStupidBitField<Size>& other)
|
||||
{
|
||||
for (u32 i = 0; i < DataLength; i++)
|
||||
{
|
||||
Data[i] &= other.Data[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif
|
@ -77,7 +77,7 @@ Semaphore* Semaphore_Create();
|
||||
void Semaphore_Free(Semaphore* sema);
|
||||
void Semaphore_Reset(Semaphore* sema);
|
||||
void Semaphore_Wait(Semaphore* sema);
|
||||
void Semaphore_Post(Semaphore* sema);
|
||||
void Semaphore_Post(Semaphore* sema, int count = 1);
|
||||
|
||||
struct Mutex;
|
||||
Mutex* Mutex_Create();
|
||||
|
@ -230,9 +230,9 @@ void Semaphore_Wait(Semaphore* sema)
|
||||
((QSemaphore*) sema)->acquire();
|
||||
}
|
||||
|
||||
void Semaphore_Post(Semaphore* sema)
|
||||
void Semaphore_Post(Semaphore* sema, int count)
|
||||
{
|
||||
((QSemaphore*) sema)->release();
|
||||
((QSemaphore*) sema)->release(count);
|
||||
}
|
||||
|
||||
Mutex* Mutex_Create()
|
||||
|
Loading…
Reference in New Issue
Block a user