mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-21 05:09:34 -06:00
reduced frequency of dx9 ps_2_0 pixel generation errors, and made dx9 efb depth peek of 16-bit depth buffer not use 24-bit adjustment factor. shouldn't affect other the plugins.
(probably nobody else cares, but I need at least one video plugin that actually works on this computer) git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6618 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
@ -44,25 +44,29 @@ static LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache;
|
||||
static std::set<u32> unique_shaders;
|
||||
|
||||
#define MAX_SSAA_SHADERS 3
|
||||
enum
|
||||
{
|
||||
COPY_TYPE_DIRECT,
|
||||
COPY_TYPE_MATRIXCOLOR,
|
||||
NUM_COPY_TYPES
|
||||
};
|
||||
|
||||
static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[MAX_SSAA_SHADERS];
|
||||
static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[MAX_SSAA_SHADERS];
|
||||
static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[MAX_SSAA_SHADERS];
|
||||
static LPDIRECT3DPIXELSHADER9 s_CopyProgram[NUM_COPY_TYPES][PixelShaderCache::NUM_DEPTH_CONVERSION_TYPES][MAX_SSAA_SHADERS];
|
||||
static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0;
|
||||
|
||||
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram(int SSAAMode)
|
||||
{
|
||||
return s_ColorMatrixProgram[SSAAMode % MAX_SSAA_SHADERS];
|
||||
return s_CopyProgram[COPY_TYPE_MATRIXCOLOR][DEPTH_CONVERSION_TYPE_NONE][SSAAMode % MAX_SSAA_SHADERS];
|
||||
}
|
||||
|
||||
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode)
|
||||
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode, int depthConversionType)
|
||||
{
|
||||
return s_DepthMatrixProgram[SSAAMode % MAX_SSAA_SHADERS];
|
||||
return s_CopyProgram[COPY_TYPE_MATRIXCOLOR][depthConversionType % NUM_DEPTH_CONVERSION_TYPES][SSAAMode % MAX_SSAA_SHADERS];
|
||||
}
|
||||
|
||||
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram(int SSAAMode)
|
||||
{
|
||||
return s_ColorCopyProgram[SSAAMode % MAX_SSAA_SHADERS];
|
||||
return s_CopyProgram[COPY_TYPE_DIRECT][DEPTH_CONVERSION_TYPE_NONE][SSAAMode % MAX_SSAA_SHADERS];
|
||||
}
|
||||
|
||||
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram()
|
||||
@ -95,132 +99,120 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
#define WRITE p+=sprintf
|
||||
|
||||
static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConversionType, int SSAAMode)
|
||||
{
|
||||
//Used for Copy/resolve the color buffer
|
||||
//Color conversion Programs
|
||||
//Depth copy programs
|
||||
// this should create the same shaders as before (plus some extras added for DF16), just... more manageably than listing the full program for each combination
|
||||
char text[3072];
|
||||
|
||||
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
|
||||
text[sizeof(text) - 1] = 0x7C; // canary
|
||||
|
||||
char* p = text;
|
||||
WRITE(p, "// Copy/Color Matrix/Depth Matrix shader (matrix=%d, depth=%d, ssaa=%d)\n", copyMatrixType, depthConversionType, SSAAMode);
|
||||
|
||||
WRITE(p, "uniform sampler samp0 : register(s0);\n");
|
||||
if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
|
||||
WRITE(p, "uniform float4 cColMatrix[5] : register(c%d);\n", C_COLORMATRIX);
|
||||
WRITE(p, "void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n");
|
||||
|
||||
switch(SSAAMode % MAX_SSAA_SHADERS)
|
||||
{
|
||||
case 0: // 1 Sample
|
||||
WRITE(p, "in float2 uv0 : TEXCOORD0){\n"
|
||||
"float4 texcol = tex2D(samp0,uv0);\n");
|
||||
break;
|
||||
case 1: // 1 Samples SSAA
|
||||
WRITE(p, "in float4 uv0 : TEXCOORD0,\n"
|
||||
"in float4 uv1 : TEXCOORD1){\n"
|
||||
"float4 texcol = tex2D(samp0,uv0.xy);\n");
|
||||
break;
|
||||
case 2: // 4 Samples SSAA
|
||||
WRITE(p, "in float4 uv0 : TEXCOORD0,\n"
|
||||
"in float4 uv1 : TEXCOORD1,\n"
|
||||
"in float4 uv2 : TEXCOORD2,\n"
|
||||
"in float4 uv3 : TEXCOORD3){\n"
|
||||
"float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n");
|
||||
break;
|
||||
}
|
||||
|
||||
switch(depthConversionType % PixelShaderCache::NUM_DEPTH_CONVERSION_TYPES)
|
||||
{
|
||||
case PixelShaderCache::DEPTH_CONVERSION_TYPE_NONE:
|
||||
break;
|
||||
case PixelShaderCache::DEPTH_CONVERSION_TYPE_16BIT:
|
||||
// this is probably wrong. but it works better than the 24-bit conversion we used to generate in this case.
|
||||
WRITE(p, "float4 EncodedDepth = frac((texcol.r * (65535.0f/65536.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
|
||||
"texcol = float4((EncodedDepth.rgb * (65536.0f/65535.0f)),1.0f);\n");
|
||||
break;
|
||||
case PixelShaderCache::DEPTH_CONVERSION_TYPE_24BIT:
|
||||
WRITE(p, "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
|
||||
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n");
|
||||
break;
|
||||
}
|
||||
|
||||
if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
|
||||
WRITE(p, "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n");
|
||||
else
|
||||
WRITE(p, "ocol0 = texcol;\n");
|
||||
|
||||
WRITE(p, "}\n");
|
||||
if (text[sizeof(text) - 1] != 0x7C)
|
||||
PanicAlert("PixelShaderCache copy shader generator - buffer too small, canary has been eaten!");
|
||||
|
||||
setlocale(LC_NUMERIC, ""); // restore locale
|
||||
return D3D::CompileAndCreatePixelShader(text, (int)strlen(text));
|
||||
}
|
||||
|
||||
void PixelShaderCache::Init()
|
||||
{
|
||||
//program used for clear screen
|
||||
char pprog[3072];
|
||||
sprintf(pprog, "void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n"
|
||||
" in float4 incol0 : COLOR0){\n"
|
||||
"ocol0 = incol0;\n"
|
||||
"}\n");
|
||||
s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
||||
{
|
||||
char pprog[3072];
|
||||
sprintf(pprog, "void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n"
|
||||
" in float4 incol0 : COLOR0){\n"
|
||||
"ocol0 = incol0;\n"
|
||||
"}\n");
|
||||
s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
||||
}
|
||||
|
||||
//Used for Copy/resolve the color buffer
|
||||
//1 Sample
|
||||
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
||||
"void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n"
|
||||
"in float2 uv0 : TEXCOORD0){\n"
|
||||
"ocol0 = tex2D(samp0,uv0);\n"
|
||||
"}\n");
|
||||
s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
||||
int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
|
||||
int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
|
||||
bool canUseColorMatrix = (C_COLORMATRIX + 5 <= maxConstants);
|
||||
|
||||
//1 Samples SSAA
|
||||
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
||||
"void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n"
|
||||
"in float4 uv0 : TEXCOORD0,\n"
|
||||
"in float4 uv1 : TEXCOORD1){\n"
|
||||
"ocol0 = tex2D(samp0,uv0.xy);\n"
|
||||
"}\n");
|
||||
s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
||||
|
||||
//4 Samples SSAA
|
||||
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
||||
"void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n"
|
||||
"in float4 uv0 : TEXCOORD0,\n"
|
||||
"in float4 uv1 : TEXCOORD1,\n"
|
||||
"in float4 uv2 : TEXCOORD2,\n"
|
||||
"in float4 uv3 : TEXCOORD3){\n"
|
||||
"ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25;\n"
|
||||
"}\n");
|
||||
s_ColorCopyProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
||||
|
||||
|
||||
|
||||
//Color conversion Programs
|
||||
//1 sample
|
||||
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
||||
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
||||
"void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n"
|
||||
" in float2 uv0 : TEXCOORD0){\n"
|
||||
"float4 texcol = tex2D(samp0,uv0);\n"
|
||||
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
||||
"}\n",C_COLORMATRIX);
|
||||
s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
||||
|
||||
//1 samples SSAA
|
||||
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
||||
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
||||
"void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n"
|
||||
"in float4 uv0 : TEXCOORD0,\n"
|
||||
"in float4 uv1 : TEXCOORD1){\n"
|
||||
"float4 texcol = tex2D(samp0,uv0.xy);\n"
|
||||
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
||||
"}\n",C_COLORMATRIX);
|
||||
s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
||||
|
||||
//4 samples SSAA
|
||||
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
||||
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
||||
"void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n"
|
||||
"in float4 uv0 : TEXCOORD0,\n"
|
||||
"in float4 uv1 : TEXCOORD1,\n"
|
||||
"in float4 uv2 : TEXCOORD2,\n"
|
||||
"in float4 uv3 : TEXCOORD3){\n"
|
||||
"float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n"
|
||||
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
||||
"}\n",C_COLORMATRIX);
|
||||
s_ColorMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
||||
|
||||
//Depth copy programs
|
||||
//1 sample
|
||||
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
||||
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
||||
"void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n"
|
||||
" in float2 uv0 : TEXCOORD0){\n"
|
||||
"float4 texcol = tex2D(samp0,uv0);\n"
|
||||
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
|
||||
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
|
||||
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
||||
"}\n",C_COLORMATRIX);
|
||||
s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
||||
|
||||
//1 sample SSAA
|
||||
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
||||
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
||||
"void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n"
|
||||
"in float4 uv0 : TEXCOORD0,\n"
|
||||
"in float4 uv1 : TEXCOORD1){\n"
|
||||
"float4 texcol = tex2D(samp0,uv0.xy);\n"
|
||||
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
|
||||
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
|
||||
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
||||
"}\n",C_COLORMATRIX);
|
||||
s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
||||
|
||||
//4 sample SSAA
|
||||
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
||||
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
||||
"void main(\n"
|
||||
"out float4 ocol0 : COLOR0,\n"
|
||||
"in float4 uv0 : TEXCOORD0,\n"
|
||||
"in float4 uv1 : TEXCOORD1,\n"
|
||||
"in float4 uv2 : TEXCOORD2,\n"
|
||||
"in float4 uv3 : TEXCOORD3){\n"
|
||||
"float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n"
|
||||
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
|
||||
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
|
||||
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
||||
"}\n",C_COLORMATRIX);
|
||||
s_DepthMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
||||
// other screen copy/convert programs
|
||||
for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
|
||||
{
|
||||
for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++)
|
||||
{
|
||||
for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++)
|
||||
{
|
||||
if(ssaaMode && !s_CopyProgram[copyMatrixType][depthType][ssaaMode-1]
|
||||
|| depthType && !s_CopyProgram[copyMatrixType][depthType-1][ssaaMode]
|
||||
|| copyMatrixType && !s_CopyProgram[copyMatrixType-1][depthType][ssaaMode])
|
||||
{
|
||||
// if it failed at a lower setting, it's going to fail here for the same reason it did there,
|
||||
// so skip this attempt to avoid duplicate error messages.
|
||||
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL;
|
||||
}
|
||||
else if(copyMatrixType == COPY_TYPE_MATRIXCOLOR && !canUseColorMatrix)
|
||||
{
|
||||
// color matrix not supported, so substitute the nearest equivalent program that doesn't use it.
|
||||
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = s_CopyProgram[COPY_TYPE_DIRECT][depthType][ssaaMode];
|
||||
}
|
||||
else
|
||||
{
|
||||
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = CreateCopyShader(copyMatrixType, depthType, ssaaMode);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Clear();
|
||||
|
||||
@ -248,15 +240,18 @@ void PixelShaderCache::Clear()
|
||||
|
||||
void PixelShaderCache::Shutdown()
|
||||
{
|
||||
for(int i = 0;i < MAX_SSAA_SHADERS; i++)
|
||||
{
|
||||
if (s_ColorMatrixProgram[i]) s_ColorMatrixProgram[i]->Release();
|
||||
s_ColorMatrixProgram[i] = NULL;
|
||||
if (s_ColorCopyProgram[i]) s_ColorCopyProgram[i]->Release();
|
||||
s_ColorCopyProgram[i] = NULL;
|
||||
if (s_DepthMatrixProgram[i]) s_DepthMatrixProgram[i]->Release();
|
||||
s_DepthMatrixProgram[i] = NULL;
|
||||
}
|
||||
for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
|
||||
for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++)
|
||||
for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++)
|
||||
if(s_CopyProgram[copyMatrixType][depthType][ssaaMode]
|
||||
&& (copyMatrixType == 0 || s_CopyProgram[copyMatrixType][depthType][ssaaMode] != s_CopyProgram[copyMatrixType-1][depthType][ssaaMode]))
|
||||
s_CopyProgram[copyMatrixType][depthType][ssaaMode]->Release();
|
||||
|
||||
for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
|
||||
for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++)
|
||||
for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++)
|
||||
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL;
|
||||
|
||||
if (s_ClearProgram) s_ClearProgram->Release();
|
||||
s_ClearProgram = NULL;
|
||||
|
||||
@ -296,8 +291,11 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
|
||||
return (entry.shader != NULL);
|
||||
}
|
||||
|
||||
int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
|
||||
int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
|
||||
|
||||
// Need to compile a new shader
|
||||
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_D3D9, components);
|
||||
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_D3D9, maxConstants, components);
|
||||
|
||||
u32 code_hash = HashAdler32((const u8 *)code, strlen(code));
|
||||
unique_shaders.insert(code_hash);
|
||||
|
Reference in New Issue
Block a user