mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2025-07-25 23:29:55 -06:00
Compute shader renderer (#2041)
* nothing works yet * don't double buffer 3D framebuffers for the GL Renderer looks like leftovers from when 3D+2D composition was done in the frontend * oops * it works! * implement display capture for compute renderer it's actually just all stolen from the regular OpenGL renderer * fix bad indirect call * handle cleanup properly * add hires rendering to the compute shader renderer * fix UB also misc changes to use more unsigned multiplication also fix framebuffer resize * correct edge filling behaviour when AA is disabled * fix full color textures * fix edge marking (polygon id is 6-bit not 5) also make the code a bit nicer * take all edge cases into account for XMin/XMax calculation * use hires coordinate again * stop using fixed size buffers based on scale factor in shaders this makes shader compile times tolerable on Wintel - beginning of the shader cache - increase size of tile idx in workdesc to 20 bits * apparently & is not defined on bvec4 why does this even compile on Intel and Nvidia? * put the texture cache into it's own file * add compute shader renderer properly to the GUI also add option to toggle using high resolution vertex coordinates * unbind sampler object in compute shader renderer * fix GetRangedBitMask for 64 bit aligned 64 bits pretty embarassing * convert NonStupidBitfield.h back to LF only new lines * actually adapt to latest changes * fix stupid merge * actually make compute shader renderer work with newest changes * show progress on shader compilation * remove merge leftover
This commit is contained in:
@ -26,11 +26,38 @@
|
||||
#include <initializer_list>
|
||||
#include <algorithm>
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
||||
inline u64 GetRangedBitMask(u32 idx, u32 startBit, u32 bitsCount)
|
||||
{
|
||||
u32 startEntry = startBit >> 6;
|
||||
u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
|
||||
|
||||
if (entriesCount > 1)
|
||||
{
|
||||
if (idx == startEntry)
|
||||
return 0xFFFFFFFFFFFFFFFF << (startBit & 0x3F);
|
||||
if (((startBit + bitsCount) & 0x3F) && idx == startEntry + entriesCount - 1)
|
||||
return ~(0xFFFFFFFFFFFFFFFF << ((startBit + bitsCount) & 0x3F));
|
||||
|
||||
return 0xFFFFFFFFFFFFFFFF;
|
||||
}
|
||||
else if (idx == startEntry)
|
||||
{
|
||||
return bitsCount == 64
|
||||
? 0xFFFFFFFFFFFFFFFF
|
||||
: ((1ULL << bitsCount) - 1) << (startBit & 0x3F);
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// like std::bitset but less stupid and optimised for
|
||||
// our use case (keeping track of memory invalidations)
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
template <u32 Size>
|
||||
struct NonStupidBitField
|
||||
{
|
||||
@ -166,6 +193,11 @@ struct NonStupidBitField
|
||||
return Ref{*this, idx};
|
||||
}
|
||||
|
||||
bool operator[](u32 idx) const
|
||||
{
|
||||
return Data[idx >> 6] & (1ULL << (idx & 0x3F));
|
||||
}
|
||||
|
||||
void SetRange(u32 startBit, u32 bitsCount)
|
||||
{
|
||||
u32 startEntry = startBit >> 6;
|
||||
@ -187,6 +219,26 @@ struct NonStupidBitField
|
||||
}
|
||||
}
|
||||
|
||||
int Min() const
|
||||
{
|
||||
for (int i = 0; i < DataLength; i++)
|
||||
{
|
||||
if (Data[i])
|
||||
return i * 64 + __builtin_ctzll(Data[i]);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int Max() const
|
||||
{
|
||||
for (int i = DataLength - 1; i >= 0; i--)
|
||||
{
|
||||
if (Data[i])
|
||||
return i * 64 + (63 - __builtin_clzll(Data[i]));
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
NonStupidBitField& operator|=(const NonStupidBitField<Size>& other)
|
||||
{
|
||||
for (u32 i = 0; i < DataLength; i++)
|
||||
@ -195,6 +247,7 @@ struct NonStupidBitField
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
NonStupidBitField& operator&=(const NonStupidBitField<Size>& other)
|
||||
{
|
||||
for (u32 i = 0; i < DataLength; i++)
|
||||
@ -203,6 +256,20 @@ struct NonStupidBitField
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator bool() const
|
||||
{
|
||||
for (int i = 0; i < DataLength - 1; i++)
|
||||
{
|
||||
if (Data[i])
|
||||
return true;
|
||||
}
|
||||
if (Data[DataLength-1] & ((Size&0x3F) ? ~(0xFFFFFFFFFFFFFFFF << (Size&0x3F)) : 0xFFFFFFFFFFFFFFFF))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user