From d435fd8957a204479c7f045aa34e722de626b7d4 Mon Sep 17 00:00:00 2001
From: Rodolfo Osvaldo Bogado <rodolfoosvaldobogado@gmail.com>
Date: Wed, 5 Jan 2011 17:16:52 +0000
Subject: [PATCH] corrected depth copy shader in dx9/dx11 to improve accuracy
 when doing efb to texture for depth formats

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6751 8ced0084-cf51-0410-be5f-012b33b47a6e
---
 Source/Core/VideoCommon/Src/BPStructs.cpp              |  1 -
 Source/Core/VideoCommon/Src/TextureCacheBase.cpp       |  4 +++-
 .../Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp  |  8 ++++----
 .../Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp   | 10 +++-------
 4 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/Source/Core/VideoCommon/Src/BPStructs.cpp b/Source/Core/VideoCommon/Src/BPStructs.cpp
index 4b0998c38d..139d32fc09 100644
--- a/Source/Core/VideoCommon/Src/BPStructs.cpp
+++ b/Source/Core/VideoCommon/Src/BPStructs.cpp
@@ -360,7 +360,6 @@ void BPWritten(const BPCmd& bp)
 	// ----------------------------------
 	// Display Copy Filtering Control - GX_SetCopyFilter(u8 aa,u8 sample_pattern[12][2],u8 vf,u8 vfilter[7])
 	// Fields: Destination, Frame2Field, Gamma, Source
-	// TODO: We might have to implement the gamma one, some games might need this, if they are too dark to see.
 	// ----------------------------------
 	case BPMEM_DISPLAYCOPYFILER:   // if (aa) { use sample_pattern } else { use 666666 }
 	case BPMEM_DISPLAYCOPYFILER+1: // if (aa) { use sample_pattern } else { use 666666 }
diff --git a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp
index a64230bed6..52edba3c8a 100644
--- a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp
+++ b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp
@@ -428,6 +428,8 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer,
 			switch(copyfmt) 
 			{
 			case 0: // Z4
+				colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1;
+				break;
 			case 1: // Z8
 			case 8: // Z8
 				colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1;
@@ -474,7 +476,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer,
 				cbufid = 12;
 				break;
 
-			case 3: // Z16 //?
+			case 3: // Z16 
 				colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
 				cbufid = 13;
 				break;
diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp
index 152be54834..f0779997e5 100644
--- a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp
+++ b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp
@@ -123,8 +123,8 @@ const char depth_matrix_program[] = {
 	" in float4 pos : SV_Position,\n"
 	" in float2 uv0 : TEXCOORD0){\n"
 	"float4 texcol = Tex0.Sample(samp0,uv0);\n"
-	"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
-	"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
+	"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,256.0f,256.0f*256.0f,256.0f*256.0f*16.0f));\n"
+	"texcol = round(EncodedDepth * (16777216.0f/16777215.0f) * 255.0f) / 255.0f;\n"
 	"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
 	"}\n"
 };
@@ -143,8 +143,8 @@ const char depth_matrix_program_msaa[] = {
 	"for(int i = 0; i < samples; ++i)\n"
 	"	texcol += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);\n"
 	"texcol /= samples;\n"
-	"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
-	"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
+	"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,256.0f,256.0f*256.0f,256.0f*256.0f*16.0f));\n"
+	"texcol = round(EncodedDepth * (16777216.0f/16777215.0f) * 255.0f) / 255.0f;\n"
 	"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
 	"}\n"
 };
diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp
index 1f1cf4dff1..ef18468fc2 100644
--- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp
@@ -193,14 +193,10 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv
 	{
 	case PixelShaderCache::DEPTH_CONVERSION_TYPE_NONE:
 		break;
-	case PixelShaderCache::DEPTH_CONVERSION_TYPE_16BIT:
-		// this is probably wrong. but it works better than the 24-bit conversion we used to generate in this case.
-		WRITE(p, "float4 EncodedDepth = frac((texcol.r * (65535.0f/65536.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
-		         "texcol = float4((EncodedDepth.rgb * (65536.0f/65535.0f)),1.0f);\n");
-		break;
+	case PixelShaderCache::DEPTH_CONVERSION_TYPE_16BIT:		
 	case PixelShaderCache::DEPTH_CONVERSION_TYPE_24BIT:
-		WRITE(p, "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
-		         "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n");
+		WRITE(p, "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,256.0f,256.0f*256.0f,256.0f*256.0f*16.0f));\n"
+		         "texcol = round(EncodedDepth * (16777216.0f/16777215.0f) * 255.0f) / 255.0f;\n");
 		break;
 	}
 	//Apply Gamma Correction