normalize the efb to texture process for color textures to make it work the same in all the plugins and with the same accuracy as real hardware (almost :))

please test for regressions and fixes. some little changes to make pixel shader more dx9 sm2.0 friendly. the condition is not to use pixel lighting ( sorry no hardware support for the quantity of parameters needed). git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6777 8ced0084-cf51-0410-be5f-012b33b47a6e
2025-07-30 17:49:48 -06:00 · 2011-01-07 19:23:57 +00:00
parent 62b79028ef
commit f869281301
14 changed files with 185 additions and 181 deletions
--- a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp
+++ b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp
@ -86,12 +86,13 @@ const char color_copy_program_code_msaa[] = {
 const char color_matrix_program_code[] = {
 	"sampler samp0 : register(s0);\n"
 	"Texture2D Tex0 : register(t0);\n"
-	"uniform float4 cColMatrix[5] : register(c0);\n"
+	"uniform float4 cColMatrix[7] : register(c0);\n"
 	"void main(\n" 
 	"out float4 ocol0 : SV_Target,\n"
 	"in float4 pos : SV_Position,\n"
 	" in float2 uv0 : TEXCOORD0){\n"
 	"float4 texcol = Tex0.Sample(samp0,uv0);\n"
+	"texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n"
 	"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
 	"}\n"
 };
@ -99,7 +100,7 @@ const char color_matrix_program_code[] = {
 const char color_matrix_program_code_msaa[] = {
 	"sampler samp0 : register(s0);\n"
 	"Texture2DMS<float4, %d> Tex0 : register(t0);\n"
-	"uniform float4 cColMatrix[5] : register(c0);\n"
+	"uniform float4 cColMatrix[7] : register(c0);\n"
 	"void main(\n" 
 	"out float4 ocol0 : SV_Target,\n"
 	"in float4 pos : SV_Position,\n"
@ -110,6 +111,7 @@ const char color_matrix_program_code_msaa[] = {
 	"for(int i = 0; i < samples; ++i)\n"
 	"	texcol += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);\n"
 	"texcol /= samples;\n"
+	"texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n"
 	"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
 	"}\n"
 };
@ -117,7 +119,7 @@ const char color_matrix_program_code_msaa[] = {
 const char depth_matrix_program[] = {
 	"sampler samp0 : register(s0);\n"
 	"Texture2D Tex0 : register(t0);\n"
-	"uniform float4 cColMatrix[5] : register(c0);\n"
+	"uniform float4 cColMatrix[7] : register(c0);\n"
 	"void main(\n"
 	"out float4 ocol0 : SV_Target,\n"
 	" in float4 pos : SV_Position,\n"
@ -132,7 +134,7 @@ const char depth_matrix_program[] = {
 const char depth_matrix_program_msaa[] = {
 	"sampler samp0 : register(s0);\n"
 	"Texture2DMS<float4, %d> Tex0 : register(t0);\n"
-	"uniform float4 cColMatrix[5] : register(c0);\n"
+	"uniform float4 cColMatrix[7] : register(c0);\n"
 	"void main(\n"
 	"out float4 ocol0 : SV_Target,\n"
 	" in float4 pos : SV_Position,\n"
@ -214,16 +216,15 @@ unsigned int ps_constant_offset_table[] = {
 	76, 80,							// C_INDTEXSCALE, 8
 	84, 88, 92, 96, 100, 104,		// C_INDTEXMTX, 24
 	108, 112,						// C_FOG, 8
-	116, 120, 124, 128, 132,		// C_COLORMATRIX, 20	
-	136, 140, 144, 148, 152,		// C_PLIGHTS0, 20
-	156, 160, 164, 168, 172,		// C_PLIGHTS1, 20
-	176, 180, 184, 188, 192,		// C_PLIGHTS2, 20		
-	196, 200, 204, 208, 212,		// C_PLIGHTS3, 20
-	216, 220, 224, 228, 232,		// C_PLIGHTS4, 20
-	236, 240, 244, 248, 252,		// C_PLIGHTS5, 20
-	256, 260, 264, 268, 272,		// C_PLIGHTS6, 20
-	276, 280, 284, 288, 292,		// C_PLIGHTS7, 20	
-	296, 300, 304, 308,				// C_PMATERIALS, 16
+	116, 120, 124, 128, 132,		// C_PLIGHTS0, 20
+	136, 140, 144, 148, 152,		// C_PLIGHTS1, 20
+	156, 160, 164, 168, 172,		// C_PLIGHTS2, 20
+	176, 180, 184, 188, 192,		// C_PLIGHTS3, 20		
+	196, 200, 204, 208, 212,		// C_PLIGHTS4, 20
+	216, 220, 224, 228, 232,		// C_PLIGHTS5, 20
+	236, 240, 244, 248, 252,		// C_PLIGHTS6, 20
+	256, 260, 264, 268, 272,		// C_PLIGHTS7, 20
+	276, 280, 284, 288, 			// C_PMATERIALS, 16	
 };
 void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
 {
--- a/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp
+++ b/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp
@ -43,7 +43,7 @@
 namespace DX11
 {

-#define MAX_COPY_BUFFERS 21
+#define MAX_COPY_BUFFERS 24
 ID3D11Buffer* efbcopycbuf[MAX_COPY_BUFFERS] = {};

 TextureCache::TCacheEntry::~TCacheEntry()
@ -120,7 +120,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer,	bool bScaleB
 	// set transformation
 	if (NULL == efbcopycbuf[cbufid])
 	{
-		const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(20 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
+		const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
 		D3D11_SUBRESOURCE_DATA data;
 		data.pSysMem = colmat;
 		HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &efbcopycbuf[cbufid]);
--- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp
@ -170,7 +170,7 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv

 	WRITE(p, "uniform sampler samp0 : register(s0);\n");
 	if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
-		WRITE(p, "uniform float4 cColMatrix[5] : register(c%d);\n", C_COLORMATRIX);
+		WRITE(p, "uniform float4 cColMatrix[7] : register(c%d);\n", C_COLORMATRIX);
 	WRITE(p, "void main(\n"
 	         "out float4 ocol0 : COLOR0,\n");

@ -208,6 +208,9 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv

 	if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
 	{
+		if(depthConversionType == DEPTH_CONVERSION_TYPE_NONE)
+			WRITE(p, "texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n");		
+
 		WRITE(p, "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n");		
 	}
 	else
@ -235,8 +238,7 @@ void PixelShaderCache::Init()
 	}

 	int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
-	int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
-	bool canUseColorMatrix = (C_COLORMATRIX + 5 <= maxConstants);
+	int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);	

 	// other screen copy/convert programs
 	for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
@ -253,11 +255,6 @@ void PixelShaderCache::Init()
 					// so skip this attempt to avoid duplicate error messages.
 					s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL;
 				}
-				else if(copyMatrixType == COPY_TYPE_MATRIXCOLOR && !canUseColorMatrix)
-				{
-					// color matrix not supported, so substitute the nearest equivalent program that doesn't use it.
-					s_CopyProgram[copyMatrixType][depthType][ssaaMode] = s_CopyProgram[COPY_TYPE_DIRECT][depthType][ssaaMode];
-				}
 				else
 				{
 					s_CopyProgram[copyMatrixType][depthType][ssaaMode] = CreateCopyShader(copyMatrixType, depthType, ssaaMode);
@ -311,6 +308,7 @@ void PixelShaderCache::Shutdown()
 	if (s_rgba6_to_rgb8) s_rgba6_to_rgb8->Release();
 	s_rgba6_to_rgb8 = NULL;

+	
 	Clear();
 	g_ps_disk_cache.Sync();
 	g_ps_disk_cache.Close();
--- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp
@ -285,7 +285,7 @@ Renderer::Renderer()
 	CalculateXYScale(dst_rect);

 	s_LastAA = g_ActiveConfig.iMultisampleMode;
-	int SupersampleCoeficient = s_LastAA + 1;
+	int SupersampleCoeficient = (s_LastAA % 3) + 1;

 	s_LastEFBScale = g_ActiveConfig.iEFBScale;
 	CalculateTargetSize(SupersampleCoeficient);
@ -604,10 +604,9 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
 		vp.MaxZ = 1.0f;
 		D3D::dev->SetViewport(&vp);

-		float colmat[16] = {0.0f};
-		float fConstAdd[4] = {0.0f};
+		float colmat[28] = {0.0f};
 		colmat[0] = colmat[5] = colmat[10] = 1.0f;
-		PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation
+		PixelShaderManager::SetColorMatrix(colmat); // set transformation
 		LPDIRECT3DTEXTURE9 read_texture = FramebufferManager::GetEFBDepthTexture();
 		
 		D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
@ -1080,7 +1079,8 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons
 	{
 		TargetRectangle targetRc = ConvertEFBRectangle(rc);
 		LPDIRECT3DTEXTURE9 read_texture = FramebufferManager::GetEFBColorTexture();
-		D3D::drawShadedTexQuad(read_texture,targetRc.AsRECT(),Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),Width,Height,PixelShaderCache::GetColorCopyProgram(g_Config.iMultisampleMode),VertexShaderCache::GetSimpleVertexShader(g_Config.iMultisampleMode),Gamma);
+		D3D::drawShadedTexQuad(read_texture,targetRc.AsRECT(),Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),Width,Height,PixelShaderCache::GetColorCopyProgram(g_ActiveConfig.iMultisampleMode),VertexShaderCache::GetSimpleVertexShader(g_ActiveConfig.iMultisampleMode),Gamma);		
+		
 	}
 	D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);
 	D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER);
@ -1216,7 +1216,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons

 		CalculateXYScale(dst_rect);
 		
-		int SupersampleCoeficient = s_LastAA + 1;
+		int SupersampleCoeficient = (s_LastAA % 3) + 1;

 		s_LastEFBScale = g_ActiveConfig.iEFBScale;
 		CalculateTargetSize(SupersampleCoeficient);
--- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp
@ -100,8 +100,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB
 		destrect.right = virtualW;
 		destrect.top = 0;

-		const float* const fConstAdd = colmat + 16;		// fConstAdd is the last 4 floats of colmat
-		PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation
+		PixelShaderManager::SetColorMatrix(colmat); // set transformation
 		TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect);
 		RECT sourcerect;
 		sourcerect.bottom = targetSource.bottom;
--- a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp
@ -362,7 +362,7 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf
 		(float)expandedWidth,
 		(float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this?
 		(float)(Renderer::EFBToScaledX(source.left) + Renderer::TargetStrideX()),
-		(float)(Renderer::EFBToScaledY(source.top) + Renderer::TargetStrideY()),
+		(float)(Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight) + Renderer::TargetStrideY()),
 		Renderer::EFBToScaledXf(sampleStride),
 		Renderer::EFBToScaledYf(sampleStride),
 		(float)Renderer::GetFullTargetWidth(),
@ -426,7 +426,7 @@ u64 EncodeToRamFromTexture(u32 address,LPDIRECT3DTEXTURE9 source_texture, u32 So
 		(float)expandedWidth,
 		(float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this?
 		(float)(Renderer::EFBToScaledX(source.left) + Renderer::TargetStrideX()),
-		(float)(Renderer::EFBToScaledY(source.top) + Renderer::TargetStrideY()),
+		(float)(Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight) + Renderer::TargetStrideY()),
 		Renderer::EFBToScaledXf(sampleStride),
 		Renderer::EFBToScaledYf(sampleStride),
 		(float)SourceW,
--- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp
@ -96,17 +96,22 @@ void PixelShaderCache::Init()
 	glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ATTRIBS_ARB, (GLint *)&maxattribs);
 	INFO_LOG(VIDEO, "pixel max_alu=%d, max_inst=%d, max_attrib=%d", s_nMaxPixelInstructions, maxinst, maxattribs);
 	
-	char pmatrixprog[1024];
+	char pmatrixprog[2048];
 	sprintf(pmatrixprog, "!!ARBfp1.0"
 						"TEMP R0;\n"
 						"TEMP R1;\n"
+						"PARAM K0 = { 0.5, 0.5, 0.5, 0.5};\n" 
 						"TEX R0, fragment.texcoord[0], texture[0], RECT;\n"
-						"DP4 R1.w, R0, program.env[%d];\n"
-						"DP4 R1.z, R0, program.env[%d];\n"
+						"MUL R0, R0, program.env[%d];\n"
+						"ADD R0, R0, K0;\n"
+						"FLR R0, R0;\n"
+						"MUL R0, R0, program.env[%d];\n"
 						"DP4 R1.x, R0, program.env[%d];\n"
 						"DP4 R1.y, R0, program.env[%d];\n"
+						"DP4 R1.z, R0, program.env[%d];\n"
+						"DP4 R1.w, R0, program.env[%d];\n"
 						"ADD result.color, R1, program.env[%d];\n"
-						"END\n", C_COLORMATRIX+3, C_COLORMATRIX+2, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+4);
+						"END\n",C_COLORMATRIX+5,C_COLORMATRIX+6, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+2, C_COLORMATRIX+3, C_COLORMATRIX+4);
 	glGenProgramsARB(1, &s_ColorMatrixProgram);
 	SetCurrentShader(s_ColorMatrixProgram);
 	glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog);
@ -118,12 +123,13 @@ void PixelShaderCache::Init()
 		s_ColorMatrixProgram = 0;
 	}

-    sprintf(pmatrixprog, "!!ARBfp1.0\n"
+     sprintf(pmatrixprog, "!!ARBfp1.0\n"
 						"TEMP R0;\n"
 						"TEMP R1;\n"
                        "TEMP R2;\n"
                        //16777215/16777216*256, 1/255, 256, 0
-                        "PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n" 
+                        "PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n"
+						"PARAM K1 = { 15.0, 0.066666666666, 0.0, 0.0};\n"
                        //sample the depth value
 						"TEX R2, fragment.texcoord[0], texture[0], RECT;\n"

@ -138,21 +144,23 @@ void PixelShaderCache::Init()
                        //gives {?, 128/255, 254/255, ?} for depth value 254/255
                        //on some gpus

-                        "FLR R0.z,R0;\n"        //bits 31..24
+                        "FLR R0.x,R0;\n"        //bits 31..24

-                        "SUB R0.xyw,R0,R0.z;\n" //subtract bits 31..24 from rest
-                        "MUL R0.xyw,R0,K0.z;\n" // *256
+                        "SUB R0.yzw,R0,R0.x;\n" //subtract bits 31..24 from rest
+                        "MUL R0.yzw,R0,K0.z;\n" // *256
                        "FLR R0.y,R0;\n"        //bits 23..16

-                        "SUB R0.xw,R0,R0.y;\n"  //subtract bits 23..16 from rest
-                        "MUL R0.xw,R0,K0.z;\n"  // *256
-                        "FLR R0.x,R0;\n"        //bits 15..8
+                        "SUB R0.zw,R0,R0.y;\n"  //subtract bits 23..16 from rest
+                        "MUL R0.zw,R0,K0.z;\n"  // *256
+                        "FLR R0.z,R0;\n"        //bits 15..8

-                        "SUB R0.w,R0,R0.x;\n"   //subtract bits 15..8 from rest
-                        "MUL R0.w,R0,K0.z;\n"   // *256
-                        "FLR R0.w,R0;\n"        //bits 7..0
+                        "MOV R0.w,R0.x;\n"   //duplicate bit 31..24
+                        
+						"MUL R0,R0,K0.y;\n"     // /255

-                        "MUL R0,R0,K0.y;\n"     // /255
+						"MUL R0.w,R0,K1.x;\n"   // *15
+                        "FLR R0.w,R0;\n"        //bits 31..28
+						"MUL R0.w,R0,K1.y;\n"   // /15

 						"DP4 R1.x, R0, program.env[%d];\n"
 						"DP4 R1.y, R0, program.env[%d];\n"
@ -168,7 +176,7 @@ void PixelShaderCache::Init()
 	if (err != GL_NO_ERROR) {
 		ERROR_LOG(VIDEO, "Failed to create depth matrix fragment program");
 		glDeleteProgramsARB(1, &s_DepthMatrixProgram);
-		s_DepthMatrixProgram = 0;
+		s_DepthMatrixProgram = 0;	
 	}
 	
 }
--- a/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp
@ -297,8 +297,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer,	bool bScaleB
 		glViewport(0, 0, virtualW, virtualH);

 		PixelShaderCache::SetCurrentShader(bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram());    
-		const float* const fConstAdd = colmat + 16;		// fConstAdd is the last 4 floats of colmat
-		PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation
+		PixelShaderManager::SetColorMatrix(colmat); // set transformation
 		GL_REPORT_ERRORD();

 		TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect);
--- a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp
@ -285,7 +285,8 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf
 	s32 expandedHeight = (height + blkH) & (~blkH);

    float sampleStride = bScaleByHalf ? 2.f : 1.f;
-	TextureConversionShader::SetShaderParameters((float)expandedWidth,
+	TextureConversionShader::SetShaderParameters(
+		(float)expandedWidth,
 		(float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this?
 		(float)Renderer::EFBToScaledX(source.left),
 		(float)Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight),