mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-15 13:57:57 -07:00
Merge pull request #108 from degasus/GLSLUtilShader
GLSL utility shader optimization
This commit is contained in:
commit
1f5b3c928f
@ -150,7 +150,7 @@ float MathFloatVectorSum(const std::vector<float>&);
|
||||
#define ROUND_DOWN(x, a) ((x) & ~((a) - 1))
|
||||
|
||||
// Rounds down. 0 -> undefined
|
||||
inline u64 Log2(u64 val)
|
||||
inline int Log2(u64 val)
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
return 63 - __builtin_clzll(val);
|
||||
@ -161,7 +161,7 @@ inline u64 Log2(u64 val)
|
||||
return result;
|
||||
|
||||
#else
|
||||
u64 result = -1;
|
||||
int result = -1;
|
||||
while (val != 0)
|
||||
{
|
||||
val >>= 1;
|
||||
|
@ -413,12 +413,12 @@ wxString NiceSizeFormat(u64 _size)
|
||||
{
|
||||
const char* const unit_symbols[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"};
|
||||
|
||||
auto const unit = Log2(std::max<u64>(_size, 1)) / 10;
|
||||
auto const unit_size = (1 << (unit * 10));
|
||||
const u64 unit = Log2(std::max<u64>(_size, 1)) / 10;
|
||||
const u64 unit_size = (1 << (unit * 10));
|
||||
|
||||
// ugly rounding integer math
|
||||
auto const value = (_size + unit_size / 2) / unit_size;
|
||||
auto const frac = (_size % unit_size * 10 + unit_size / 2) / unit_size % 10;
|
||||
const u64 value = (_size + unit_size / 2) / unit_size;
|
||||
const u64 frac = (_size % unit_size * 10 + unit_size / 2) / unit_size % 10;
|
||||
|
||||
return StrToWxStr(StringFromFormat("%" PRIu64 ".%" PRIu64 " %s", value, frac, unit_symbols[unit]));
|
||||
}
|
||||
|
@ -452,12 +452,6 @@ void ProgramShaderCache::CreateHeader ( void )
|
||||
// Precision defines for GLSLES3
|
||||
"%s\n"
|
||||
|
||||
"\n"// A few required defines and ones that will make our lives a lot easier
|
||||
"#define ATTRIN in\n"
|
||||
"#define ATTROUT out\n"
|
||||
"#define VARYIN %s\n"
|
||||
"#define VARYOUT %s\n"
|
||||
|
||||
// Silly differences
|
||||
"#define float2 vec2\n"
|
||||
"#define float3 vec3\n"
|
||||
@ -470,8 +464,9 @@ void ProgramShaderCache::CreateHeader ( void )
|
||||
"#define frac fract\n"
|
||||
"#define lerp mix\n"
|
||||
|
||||
// Terrible hack, look at DriverDetails.h
|
||||
"%s\n"
|
||||
// Terrible hacks, look at DriverDetails.h
|
||||
"%s\n" // replace textureSize as constant
|
||||
"%s\n" // wipe out all centroid usages
|
||||
|
||||
, v==GLSLES3 ? "#version 300 es" : v==GLSL_130 ? "#version 130" : v==GLSL_140 ? "#version 140" : "#version 150"
|
||||
, v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : ""
|
||||
@ -480,9 +475,8 @@ void ProgramShaderCache::CreateHeader ( void )
|
||||
|
||||
, v==GLSLES3 ? "precision highp float;" : ""
|
||||
|
||||
, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "in" : "centroid in"
|
||||
, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "out" : "centroid out"
|
||||
, DriverDetails::HasBug(DriverDetails::BUG_BROKENTEXTURESIZE) ? "#define textureSize(x, y) ivec2(1, 1)" : ""
|
||||
, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "#define centroid" : ""
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -115,9 +115,9 @@ const u8 rasters[char_count][char_height] = {
|
||||
|
||||
static const char *s_vertexShaderSrc =
|
||||
"uniform vec2 charSize;\n"
|
||||
"ATTRIN vec2 rawpos;\n"
|
||||
"ATTRIN vec2 tex0;\n"
|
||||
"VARYOUT vec2 uv0;\n"
|
||||
"in vec2 rawpos;\n"
|
||||
"in vec2 tex0;\n"
|
||||
"out vec2 uv0;\n"
|
||||
"void main(void) {\n"
|
||||
" gl_Position = vec4(rawpos,0,1);\n"
|
||||
" uv0 = tex0 * charSize;\n"
|
||||
@ -126,7 +126,7 @@ static const char *s_vertexShaderSrc =
|
||||
static const char *s_fragmentShaderSrc =
|
||||
"uniform sampler2D samp8;\n"
|
||||
"uniform vec4 color;\n"
|
||||
"VARYIN vec2 uv0;\n"
|
||||
"in vec2 uv0;\n"
|
||||
"out vec4 ocol0;\n"
|
||||
"void main(void) {\n"
|
||||
" ocol0 = texture(samp8,uv0) * color;\n"
|
||||
|
@ -652,14 +652,14 @@ void Renderer::Init()
|
||||
s_pfont = new RasterFont();
|
||||
|
||||
ProgramShaderCache::CompileShader(s_ShowEFBCopyRegions,
|
||||
"ATTRIN vec2 rawpos;\n"
|
||||
"ATTRIN vec3 color0;\n"
|
||||
"VARYOUT vec4 c;\n"
|
||||
"in vec2 rawpos;\n"
|
||||
"in vec3 color0;\n"
|
||||
"out vec4 c;\n"
|
||||
"void main(void) {\n"
|
||||
" gl_Position = vec4(rawpos, 0.0, 1.0);\n"
|
||||
" c = vec4(color0, 1.0);\n"
|
||||
"}\n",
|
||||
"VARYIN vec4 c;\n"
|
||||
"in vec4 c;\n"
|
||||
"out vec4 ocol0;\n"
|
||||
"void main(void) {\n"
|
||||
" ocol0 = c;\n"
|
||||
|
@ -346,7 +346,7 @@ TextureCache::TextureCache()
|
||||
const char *pColorMatrixProg =
|
||||
"uniform sampler2D samp9;\n"
|
||||
"uniform vec4 colmat[7];\n"
|
||||
"VARYIN vec2 uv0;\n"
|
||||
"in vec2 uv0;\n"
|
||||
"out vec4 ocol0;\n"
|
||||
"\n"
|
||||
"void main(){\n"
|
||||
@ -358,7 +358,7 @@ TextureCache::TextureCache()
|
||||
const char *pDepthMatrixProg =
|
||||
"uniform sampler2D samp9;\n"
|
||||
"uniform vec4 colmat[5];\n"
|
||||
"VARYIN vec2 uv0;\n"
|
||||
"in vec2 uv0;\n"
|
||||
"out vec4 ocol0;\n"
|
||||
"\n"
|
||||
"void main(){\n"
|
||||
@ -369,7 +369,7 @@ TextureCache::TextureCache()
|
||||
"}\n";
|
||||
|
||||
const char *VProgram =
|
||||
"VARYOUT vec2 uv0;\n"
|
||||
"out vec2 uv0;\n"
|
||||
"uniform sampler2D samp9;\n"
|
||||
"uniform vec4 copy_position;\n" // left, top, right, bottom
|
||||
"void main()\n"
|
||||
|
@ -68,7 +68,7 @@ void CreatePrograms()
|
||||
*/
|
||||
// Output is BGRA because that is slightly faster than RGBA.
|
||||
const char *VProgramRgbToYuyv =
|
||||
"VARYOUT vec2 uv0;\n"
|
||||
"out vec2 uv0;\n"
|
||||
"uniform vec4 copy_position;\n" // left, top, right, bottom
|
||||
"uniform sampler2D samp9;\n"
|
||||
"void main()\n"
|
||||
@ -79,7 +79,7 @@ void CreatePrograms()
|
||||
"}\n";
|
||||
const char *FProgramRgbToYuyv =
|
||||
"uniform sampler2D samp9;\n"
|
||||
"VARYIN vec2 uv0;\n"
|
||||
"in vec2 uv0;\n"
|
||||
"out vec4 ocol0;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
@ -110,14 +110,14 @@ void CreatePrograms()
|
||||
"}\n";
|
||||
const char *FProgramYuyvToRgb =
|
||||
"uniform sampler2D samp9;\n"
|
||||
"VARYIN vec2 uv0;\n"
|
||||
"in vec2 uv0;\n"
|
||||
"out vec4 ocol0;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" ivec2 uv = ivec2(gl_FragCoord.xy);\n"
|
||||
// We switch top/bottom here. TODO: move this to screen blit.
|
||||
" ivec2 ts = textureSize(samp9, 0);\n"
|
||||
" vec4 c0 = texelFetch(samp9, ivec2(uv.x/2, ts.y-uv.y-1), 0);\n"
|
||||
" vec4 c0 = texelFetch(samp9, ivec2(uv.x>>1, ts.y-uv.y-1), 0);\n"
|
||||
" float y = mix(c0.b, c0.r, (uv.x & 1) == 1);\n"
|
||||
" float yComp = 1.164 * (y - 0.0625);\n"
|
||||
" float uComp = c0.g - 0.5;\n"
|
||||
|
@ -311,19 +311,19 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
||||
if (per_pixel_depth)
|
||||
out.Write("#define depth gl_FragDepth\n");
|
||||
|
||||
out.Write("VARYIN float4 colors_02;\n");
|
||||
out.Write("VARYIN float4 colors_12;\n");
|
||||
out.Write("centroid in float4 colors_02;\n");
|
||||
out.Write("centroid in float4 colors_12;\n");
|
||||
|
||||
// compute window position if needed because binding semantic WPOS is not widely supported
|
||||
// Let's set up attributes
|
||||
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
|
||||
{
|
||||
out.Write("VARYIN float3 uv%d_2;\n", i);
|
||||
out.Write("centroid in float3 uv%d_2;\n", i);
|
||||
}
|
||||
out.Write("VARYIN float4 clipPos_2;\n");
|
||||
out.Write("centroid in float4 clipPos_2;\n");
|
||||
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
|
||||
{
|
||||
out.Write("VARYIN float4 Normal_2;\n");
|
||||
out.Write("centroid in float4 Normal_2;\n");
|
||||
}
|
||||
|
||||
if (forced_early_z)
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "VideoCommon/TextureConversionShader.h"
|
||||
#include "VideoCommon/TextureDecoder.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
#include "Common/MathUtil.h"
|
||||
|
||||
#define WRITE p+=sprintf
|
||||
|
||||
@ -65,8 +66,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
|
||||
int blkW = TexDecoder_GetBlockWidthInTexels(format);
|
||||
int blkH = TexDecoder_GetBlockHeightInTexels(format);
|
||||
int samples = GetEncodedSampleCount(format);
|
||||
// 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments
|
||||
int factor = samples == 1 ? 2 : 1;
|
||||
|
||||
if (ApiType == API_OPENGL)
|
||||
{
|
||||
WRITE(p, "#define samp0 samp9\n");
|
||||
@ -87,37 +87,41 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
|
||||
WRITE(p, "{\n"
|
||||
" int2 sampleUv;\n"
|
||||
" int2 uv1 = int2(gl_FragCoord.xy);\n"
|
||||
" float2 uv0 = float2(0.0, 0.0);\n"
|
||||
);
|
||||
|
||||
WRITE(p, " uv1.x = uv1.x * %d;\n", samples);
|
||||
WRITE(p, " int y_block_position = uv1.y & %d;\n", ~(blkH - 1));
|
||||
WRITE(p, " int y_offset_in_block = uv1.y & %d;\n", blkH - 1);
|
||||
WRITE(p, " int x_virtual_position = (uv1.x << %d) + y_offset_in_block * position.z;\n", Log2(samples));
|
||||
WRITE(p, " int x_block_position = (x_virtual_position >> %d) & %d;\n", Log2(blkH), ~(blkW - 1));
|
||||
if (samples == 1)
|
||||
{
|
||||
// 32 bit textures (RGBA8 and Z24) are stored in 2 cache line increments
|
||||
WRITE(p, " bool first = 0 == (x_virtual_position & %d);\n", 8 * samples); // first cache line, used in the encoders
|
||||
WRITE(p, " x_virtual_position = x_virtual_position << 1;\n");
|
||||
}
|
||||
WRITE(p, " int x_offset_in_block = x_virtual_position & %d;\n", blkW - 1);
|
||||
WRITE(p, " int y_offset = (x_virtual_position >> %d) & %d;\n", Log2(blkW), blkH - 1);
|
||||
|
||||
WRITE(p, " int yl = uv1.y / %d;\n", blkH);
|
||||
WRITE(p, " int yb = yl * %d;\n", blkH);
|
||||
WRITE(p, " int yoff = uv1.y - yb;\n");
|
||||
WRITE(p, " int xp = uv1.x + yoff * position.z;\n");
|
||||
WRITE(p, " int xel = xp / %d;\n", samples == 1 ? factor : blkW);
|
||||
WRITE(p, " int xb = xel / %d;\n", blkH);
|
||||
WRITE(p, " int xoff = xel - xb * %d;\n", blkH);
|
||||
WRITE(p, " int xl = uv1.x * %d / %d;\n", factor, blkW);
|
||||
WRITE(p, " int xib = uv1.x * %d - xl * %d;\n", factor, blkW);
|
||||
WRITE(p, " int halfxb = xb / %d;\n", factor);
|
||||
WRITE(p, " sampleUv.x = x_offset_in_block + x_block_position;\n");
|
||||
WRITE(p, " sampleUv.y = y_block_position + y_offset;\n");
|
||||
|
||||
WRITE(p, " sampleUv.x = xib + halfxb * %d;\n", blkW);
|
||||
WRITE(p, " sampleUv.y = yb + xoff;\n");
|
||||
WRITE(p, " float2 uv0 = float2(sampleUv);\n"); // sampleUv is the sample position in (int)gx_coords
|
||||
WRITE(p, " uv0 += float2(0.5, 0.5);\n"); // move to center of pixel
|
||||
WRITE(p, " uv0 *= float(position.w);\n"); // scale by two if needed (also move to pixel borders so that linear filtering will average adjacent pixel)
|
||||
WRITE(p, " uv0 += float2(position.xy);\n"); // move to copied rect
|
||||
WRITE(p, " uv0 /= float2(%d, %d);\n", EFB_WIDTH, EFB_HEIGHT); // normalize to [0:1]
|
||||
if (ApiType == API_OPENGL) // ogl has to flip up and down
|
||||
{
|
||||
WRITE(p, " uv0.y = 1.0-uv0.y;\n");
|
||||
}
|
||||
|
||||
WRITE(p, " float sample_offset = position.w / float(%d);\n", EFB_WIDTH);
|
||||
}
|
||||
|
||||
void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType)
|
||||
{
|
||||
WRITE(p, // sampleUv is the sample position in (int)gx_coords
|
||||
"uv0 = float2(sampleUv + int2(%d, 0));\n" // pixel offset (if more than one pixel is samped)
|
||||
"uv0 += float2(0.5, 0.5);\n" // move to center of pixel
|
||||
"uv0 *= float(position.w);\n" // scale by two if needed (this will move to pixels border to filter linear)
|
||||
"uv0 += float2(position.xy);\n" // move to copyed rect
|
||||
"uv0 /= float2(%d, %d);\n" // normlize to [0:1]
|
||||
"uv0.y = 1.0-uv0.y;\n" // ogl foo (disable this line for d3d)
|
||||
"%s = texture(samp0, uv0).%s;\n",
|
||||
xoffset, EFB_WIDTH, EFB_HEIGHT, dest, colorComp
|
||||
WRITE(p, " %s = texture(samp0, uv0 + float2(%d, 0) * sample_offset).%s;\n",
|
||||
dest, xoffset, colorComp
|
||||
);
|
||||
}
|
||||
|
||||
@ -373,8 +377,6 @@ void WriteRGBA8Encoder(char* p,API_TYPE ApiType)
|
||||
{
|
||||
WriteSwizzler(p, GX_TF_RGBA8, ApiType);
|
||||
|
||||
WRITE(p, " bool first = xb == (halfxb * 2);\n");
|
||||
|
||||
WRITE(p, " float4 texSample;\n");
|
||||
WRITE(p, " float4 color0;\n");
|
||||
WRITE(p, " float4 color1;\n");
|
||||
@ -563,8 +565,6 @@ void WriteZ24Encoder(char* p, API_TYPE ApiType)
|
||||
{
|
||||
WriteSwizzler(p, GX_TF_Z24X8, ApiType);
|
||||
|
||||
WRITE(p, " bool first = xb == (halfxb * 2);\n");
|
||||
|
||||
WRITE(p, " float depth0;\n");
|
||||
WRITE(p, " float depth1;\n");
|
||||
WRITE(p, " float3 expanded0;\n");
|
||||
|
@ -106,26 +106,26 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
|
||||
|
||||
if(api_type == API_OPENGL)
|
||||
{
|
||||
out.Write("ATTRIN float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
|
||||
out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
|
||||
if (components & VB_HAS_POSMTXIDX)
|
||||
out.Write("ATTRIN float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
|
||||
out.Write("in float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
|
||||
if (components & VB_HAS_NRM0)
|
||||
out.Write("ATTRIN float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
|
||||
out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
|
||||
if (components & VB_HAS_NRM1)
|
||||
out.Write("ATTRIN float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
|
||||
out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
|
||||
if (components & VB_HAS_NRM2)
|
||||
out.Write("ATTRIN float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);
|
||||
out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);
|
||||
|
||||
if (components & VB_HAS_COL0)
|
||||
out.Write("ATTRIN float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
|
||||
out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
|
||||
if (components & VB_HAS_COL1)
|
||||
out.Write("ATTRIN float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);
|
||||
out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);
|
||||
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i));
|
||||
if ((components & (VB_HAS_UV0<<i)) || hastexmtx)
|
||||
out.Write("ATTRIN float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i);
|
||||
out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i);
|
||||
}
|
||||
|
||||
// Let's set up attributes
|
||||
@ -133,15 +133,15 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
|
||||
{
|
||||
if (i < xfregs.numTexGen.numTexGens)
|
||||
{
|
||||
out.Write("VARYOUT float3 uv%d_2;\n", i);
|
||||
out.Write("centroid out float3 uv%d_2;\n", i);
|
||||
}
|
||||
}
|
||||
out.Write("VARYOUT float4 clipPos_2;\n");
|
||||
out.Write("centroid out float4 clipPos_2;\n");
|
||||
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
|
||||
out.Write("VARYOUT float4 Normal_2;\n");
|
||||
out.Write("centroid out float4 Normal_2;\n");
|
||||
|
||||
out.Write("VARYOUT float4 colors_02;\n");
|
||||
out.Write("VARYOUT float4 colors_12;\n");
|
||||
out.Write("centroid out float4 colors_02;\n");
|
||||
out.Write("centroid out float4 colors_12;\n");
|
||||
|
||||
out.Write("void main()\n{\n");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user