normalize the efb to texture process for color textures to make it work the same in all the plugins and with the same accuracy as real hardware (almost :))

please test for regressions and fixes.
some little changes to make pixel shader more dx9 sm2.0 friendly. the condition is not to use pixel lighting ( sorry no hardware support for the quantity of parameters needed).

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6777 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado
2011-01-07 19:23:57 +00:00
parent 62b79028ef
commit f869281301
14 changed files with 185 additions and 181 deletions

View File

@ -86,12 +86,13 @@ const char color_copy_program_code_msaa[] = {
const char color_matrix_program_code[] = {
"sampler samp0 : register(s0);\n"
"Texture2D Tex0 : register(t0);\n"
"uniform float4 cColMatrix[5] : register(c0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n"
" in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = Tex0.Sample(samp0,uv0);\n"
"texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n"
};
@ -99,7 +100,7 @@ const char color_matrix_program_code[] = {
const char color_matrix_program_code_msaa[] = {
"sampler samp0 : register(s0);\n"
"Texture2DMS<float4, %d> Tex0 : register(t0);\n"
"uniform float4 cColMatrix[5] : register(c0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n"
@ -110,6 +111,7 @@ const char color_matrix_program_code_msaa[] = {
"for(int i = 0; i < samples; ++i)\n"
" texcol += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);\n"
"texcol /= samples;\n"
"texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n"
};
@ -117,7 +119,7 @@ const char color_matrix_program_code_msaa[] = {
const char depth_matrix_program[] = {
"sampler samp0 : register(s0);\n"
"Texture2D Tex0 : register(t0);\n"
"uniform float4 cColMatrix[5] : register(c0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
@ -132,7 +134,7 @@ const char depth_matrix_program[] = {
const char depth_matrix_program_msaa[] = {
"sampler samp0 : register(s0);\n"
"Texture2DMS<float4, %d> Tex0 : register(t0);\n"
"uniform float4 cColMatrix[5] : register(c0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
@ -214,16 +216,15 @@ unsigned int ps_constant_offset_table[] = {
76, 80, // C_INDTEXSCALE, 8
84, 88, 92, 96, 100, 104, // C_INDTEXMTX, 24
108, 112, // C_FOG, 8
116, 120, 124, 128, 132, // C_COLORMATRIX, 20
136, 140, 144, 148, 152, // C_PLIGHTS0, 20
156, 160, 164, 168, 172, // C_PLIGHTS1, 20
176, 180, 184, 188, 192, // C_PLIGHTS2, 20
196, 200, 204, 208, 212, // C_PLIGHTS3, 20
216, 220, 224, 228, 232, // C_PLIGHTS4, 20
236, 240, 244, 248, 252, // C_PLIGHTS5, 20
256, 260, 264, 268, 272, // C_PLIGHTS6, 20
276, 280, 284, 288, 292, // C_PLIGHTS7, 20
296, 300, 304, 308, // C_PMATERIALS, 16
116, 120, 124, 128, 132, // C_PLIGHTS0, 20
136, 140, 144, 148, 152, // C_PLIGHTS1, 20
156, 160, 164, 168, 172, // C_PLIGHTS2, 20
176, 180, 184, 188, 192, // C_PLIGHTS3, 20
196, 200, 204, 208, 212, // C_PLIGHTS4, 20
216, 220, 224, 228, 232, // C_PLIGHTS5, 20
236, 240, 244, 248, 252, // C_PLIGHTS6, 20
256, 260, 264, 268, 272, // C_PLIGHTS7, 20
276, 280, 284, 288, // C_PMATERIALS, 16
};
void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
{

View File

@ -43,7 +43,7 @@
namespace DX11
{
#define MAX_COPY_BUFFERS 21
#define MAX_COPY_BUFFERS 24
ID3D11Buffer* efbcopycbuf[MAX_COPY_BUFFERS] = {};
TextureCache::TCacheEntry::~TCacheEntry()
@ -120,7 +120,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB
// set transformation
if (NULL == efbcopycbuf[cbufid])
{
const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(20 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
D3D11_SUBRESOURCE_DATA data;
data.pSysMem = colmat;
HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &efbcopycbuf[cbufid]);

View File

@ -170,7 +170,7 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv
WRITE(p, "uniform sampler samp0 : register(s0);\n");
if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
WRITE(p, "uniform float4 cColMatrix[5] : register(c%d);\n", C_COLORMATRIX);
WRITE(p, "uniform float4 cColMatrix[7] : register(c%d);\n", C_COLORMATRIX);
WRITE(p, "void main(\n"
"out float4 ocol0 : COLOR0,\n");
@ -208,6 +208,9 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv
if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
{
if(depthConversionType == DEPTH_CONVERSION_TYPE_NONE)
WRITE(p, "texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n");
WRITE(p, "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n");
}
else
@ -235,8 +238,7 @@ void PixelShaderCache::Init()
}
int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
bool canUseColorMatrix = (C_COLORMATRIX + 5 <= maxConstants);
int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
// other screen copy/convert programs
for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
@ -253,11 +255,6 @@ void PixelShaderCache::Init()
// so skip this attempt to avoid duplicate error messages.
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL;
}
else if(copyMatrixType == COPY_TYPE_MATRIXCOLOR && !canUseColorMatrix)
{
// color matrix not supported, so substitute the nearest equivalent program that doesn't use it.
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = s_CopyProgram[COPY_TYPE_DIRECT][depthType][ssaaMode];
}
else
{
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = CreateCopyShader(copyMatrixType, depthType, ssaaMode);
@ -311,6 +308,7 @@ void PixelShaderCache::Shutdown()
if (s_rgba6_to_rgb8) s_rgba6_to_rgb8->Release();
s_rgba6_to_rgb8 = NULL;
Clear();
g_ps_disk_cache.Sync();
g_ps_disk_cache.Close();

View File

@ -285,7 +285,7 @@ Renderer::Renderer()
CalculateXYScale(dst_rect);
s_LastAA = g_ActiveConfig.iMultisampleMode;
int SupersampleCoeficient = s_LastAA + 1;
int SupersampleCoeficient = (s_LastAA % 3) + 1;
s_LastEFBScale = g_ActiveConfig.iEFBScale;
CalculateTargetSize(SupersampleCoeficient);
@ -604,10 +604,9 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
vp.MaxZ = 1.0f;
D3D::dev->SetViewport(&vp);
float colmat[16] = {0.0f};
float fConstAdd[4] = {0.0f};
float colmat[28] = {0.0f};
colmat[0] = colmat[5] = colmat[10] = 1.0f;
PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation
PixelShaderManager::SetColorMatrix(colmat); // set transformation
LPDIRECT3DTEXTURE9 read_texture = FramebufferManager::GetEFBDepthTexture();
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
@ -1080,7 +1079,8 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons
{
TargetRectangle targetRc = ConvertEFBRectangle(rc);
LPDIRECT3DTEXTURE9 read_texture = FramebufferManager::GetEFBColorTexture();
D3D::drawShadedTexQuad(read_texture,targetRc.AsRECT(),Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),Width,Height,PixelShaderCache::GetColorCopyProgram(g_Config.iMultisampleMode),VertexShaderCache::GetSimpleVertexShader(g_Config.iMultisampleMode),Gamma);
D3D::drawShadedTexQuad(read_texture,targetRc.AsRECT(),Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),Width,Height,PixelShaderCache::GetColorCopyProgram(g_ActiveConfig.iMultisampleMode),VertexShaderCache::GetSimpleVertexShader(g_ActiveConfig.iMultisampleMode),Gamma);
}
D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);
D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER);
@ -1216,7 +1216,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons
CalculateXYScale(dst_rect);
int SupersampleCoeficient = s_LastAA + 1;
int SupersampleCoeficient = (s_LastAA % 3) + 1;
s_LastEFBScale = g_ActiveConfig.iEFBScale;
CalculateTargetSize(SupersampleCoeficient);

View File

@ -100,8 +100,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB
destrect.right = virtualW;
destrect.top = 0;
const float* const fConstAdd = colmat + 16; // fConstAdd is the last 4 floats of colmat
PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation
PixelShaderManager::SetColorMatrix(colmat); // set transformation
TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect);
RECT sourcerect;
sourcerect.bottom = targetSource.bottom;

View File

@ -362,7 +362,7 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf
(float)expandedWidth,
(float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this?
(float)(Renderer::EFBToScaledX(source.left) + Renderer::TargetStrideX()),
(float)(Renderer::EFBToScaledY(source.top) + Renderer::TargetStrideY()),
(float)(Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight) + Renderer::TargetStrideY()),
Renderer::EFBToScaledXf(sampleStride),
Renderer::EFBToScaledYf(sampleStride),
(float)Renderer::GetFullTargetWidth(),
@ -426,7 +426,7 @@ u64 EncodeToRamFromTexture(u32 address,LPDIRECT3DTEXTURE9 source_texture, u32 So
(float)expandedWidth,
(float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this?
(float)(Renderer::EFBToScaledX(source.left) + Renderer::TargetStrideX()),
(float)(Renderer::EFBToScaledY(source.top) + Renderer::TargetStrideY()),
(float)(Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight) + Renderer::TargetStrideY()),
Renderer::EFBToScaledXf(sampleStride),
Renderer::EFBToScaledYf(sampleStride),
(float)SourceW,

View File

@ -96,17 +96,22 @@ void PixelShaderCache::Init()
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ATTRIBS_ARB, (GLint *)&maxattribs);
INFO_LOG(VIDEO, "pixel max_alu=%d, max_inst=%d, max_attrib=%d", s_nMaxPixelInstructions, maxinst, maxattribs);
char pmatrixprog[1024];
char pmatrixprog[2048];
sprintf(pmatrixprog, "!!ARBfp1.0"
"TEMP R0;\n"
"TEMP R1;\n"
"PARAM K0 = { 0.5, 0.5, 0.5, 0.5};\n"
"TEX R0, fragment.texcoord[0], texture[0], RECT;\n"
"DP4 R1.w, R0, program.env[%d];\n"
"DP4 R1.z, R0, program.env[%d];\n"
"MUL R0, R0, program.env[%d];\n"
"ADD R0, R0, K0;\n"
"FLR R0, R0;\n"
"MUL R0, R0, program.env[%d];\n"
"DP4 R1.x, R0, program.env[%d];\n"
"DP4 R1.y, R0, program.env[%d];\n"
"DP4 R1.z, R0, program.env[%d];\n"
"DP4 R1.w, R0, program.env[%d];\n"
"ADD result.color, R1, program.env[%d];\n"
"END\n", C_COLORMATRIX+3, C_COLORMATRIX+2, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+4);
"END\n",C_COLORMATRIX+5,C_COLORMATRIX+6, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+2, C_COLORMATRIX+3, C_COLORMATRIX+4);
glGenProgramsARB(1, &s_ColorMatrixProgram);
SetCurrentShader(s_ColorMatrixProgram);
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog);
@ -118,12 +123,13 @@ void PixelShaderCache::Init()
s_ColorMatrixProgram = 0;
}
sprintf(pmatrixprog, "!!ARBfp1.0\n"
sprintf(pmatrixprog, "!!ARBfp1.0\n"
"TEMP R0;\n"
"TEMP R1;\n"
"TEMP R2;\n"
//16777215/16777216*256, 1/255, 256, 0
"PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n"
"PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n"
"PARAM K1 = { 15.0, 0.066666666666, 0.0, 0.0};\n"
//sample the depth value
"TEX R2, fragment.texcoord[0], texture[0], RECT;\n"
@ -138,21 +144,23 @@ void PixelShaderCache::Init()
//gives {?, 128/255, 254/255, ?} for depth value 254/255
//on some gpus
"FLR R0.z,R0;\n" //bits 31..24
"FLR R0.x,R0;\n" //bits 31..24
"SUB R0.xyw,R0,R0.z;\n" //subtract bits 31..24 from rest
"MUL R0.xyw,R0,K0.z;\n" // *256
"SUB R0.yzw,R0,R0.x;\n" //subtract bits 31..24 from rest
"MUL R0.yzw,R0,K0.z;\n" // *256
"FLR R0.y,R0;\n" //bits 23..16
"SUB R0.xw,R0,R0.y;\n" //subtract bits 23..16 from rest
"MUL R0.xw,R0,K0.z;\n" // *256
"FLR R0.x,R0;\n" //bits 15..8
"SUB R0.zw,R0,R0.y;\n" //subtract bits 23..16 from rest
"MUL R0.zw,R0,K0.z;\n" // *256
"FLR R0.z,R0;\n" //bits 15..8
"SUB R0.w,R0,R0.x;\n" //subtract bits 15..8 from rest
"MUL R0.w,R0,K0.z;\n" // *256
"FLR R0.w,R0;\n" //bits 7..0
"MOV R0.w,R0.x;\n" //duplicate bit 31..24
"MUL R0,R0,K0.y;\n" // /255
"MUL R0,R0,K0.y;\n" // /255
"MUL R0.w,R0,K1.x;\n" // *15
"FLR R0.w,R0;\n" //bits 31..28
"MUL R0.w,R0,K1.y;\n" // /15
"DP4 R1.x, R0, program.env[%d];\n"
"DP4 R1.y, R0, program.env[%d];\n"
@ -168,7 +176,7 @@ void PixelShaderCache::Init()
if (err != GL_NO_ERROR) {
ERROR_LOG(VIDEO, "Failed to create depth matrix fragment program");
glDeleteProgramsARB(1, &s_DepthMatrixProgram);
s_DepthMatrixProgram = 0;
s_DepthMatrixProgram = 0;
}
}

View File

@ -297,8 +297,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB
glViewport(0, 0, virtualW, virtualH);
PixelShaderCache::SetCurrentShader(bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram());
const float* const fConstAdd = colmat + 16; // fConstAdd is the last 4 floats of colmat
PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation
PixelShaderManager::SetColorMatrix(colmat); // set transformation
GL_REPORT_ERRORD();
TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect);

View File

@ -285,7 +285,8 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf
s32 expandedHeight = (height + blkH) & (~blkH);
float sampleStride = bScaleByHalf ? 2.f : 1.f;
TextureConversionShader::SetShaderParameters((float)expandedWidth,
TextureConversionShader::SetShaderParameters(
(float)expandedWidth,
(float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this?
(float)Renderer::EFBToScaledX(source.left),
(float)Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight),