HUGE commit :)

in general cleanup and bugfix disable pierre patch for the moment as it causes problem in some games and hopefully fix the remaining missing textures for nvidia users in opengl. make the code in pixelshadergen looks nice and readable. D3D: this is a ultra experimental commit please check for regressions or error. make the efb Scale / super sampling level customizable to improve the output quality and let the user configure quality according to his hardware. is everyone likes this change will translate it to opengl please test git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5612 8ced0084-cf51-0410-be5f-012b33b47a6e
2025-07-25 07:09:48 -06:00 · 2010-06-05 00:01:18 +00:00
parent 63dbcf4f97
commit c98f8a96d2
25 changed files with 291 additions and 434 deletions
--- a/Source/Core/Common/Src/LinearDiskCache.cpp
+++ b/Source/Core/Common/Src/LinearDiskCache.cpp
@ -22,7 +22,7 @@ static const char ID[4] = {'D', 'C', 'A', 'C'};
 // Update this to the current SVN revision every time you change shader generation code.
 // We don't automatically get this from SVN_REV because that would mean regenerating the
 // shader cache for every revision, graphics-related or not, which is simply annoying.
-const int version = 5520;
+const int version = 5597;

 LinearDiskCache::LinearDiskCache() 
 	: file_(NULL), num_entries_(0) {
--- a/Source/Core/VideoCommon/Src/BPMemory.h
+++ b/Source/Core/VideoCommon/Src/BPMemory.h
@ -108,6 +108,15 @@

 // Tev/combiner things

+#define TEVSCALE_1 0
+#define TEVSCALE_2 1
+#define TEVSCALE_4 2
+#define TEVDIVIDE_2 3
+
+#define TEVCMP_R8    0
+#define TEVCMP_GR16  1
+#define TEVCMP_BGR24 2
+#define TEVCMP_RGB8  3

 #define TEVOP_ADD 0
 #define TEVOP_SUB 1
@ -173,13 +182,10 @@ enum Compare
 #define ZTEXTURE_ADD 1
 #define ZTEXTURE_REPLACE 2

-enum TevBias
-{
-    TB_ZERO     = 0,
-    TB_ADDHALF  = 1,
-    TB_SUBHALF  = 2,
-	TB_COMPARE  = 3,
-};
+#define TevBias_ZERO     0
+#define TevBias_ADDHALF  1
+#define TevBias_SUBHALF  2
+#define TevBias_COMPARE  3

 enum AlphaOp
 {
--- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
@ -157,13 +157,13 @@ const float epsilon8bit = 1.0f / 255.0f;
 static const char *tevKSelTableC[] = // KCSEL
 {
    "1.0f,1.0f,1.0f",    // 1   = 0x00
-    "0.8745098f,0.8745098f,0.8745098f", // 7_8 = 0x01
-    "0.7490196f,0.7490196f,0.7490196f",	 // 3_4 = 0x02
-    "0.6235294f,0.6235294f,0.6235294f", // 5_8 = 0x03
-    "0.4980392f,0.4980392f,0.4980392f",       // 1_2 = 0x04
-    "0.372549f,0.372549f,0.372549f", // 3_8 = 0x05
-    "0.2470588f,0.2470588f,0.2470588f",    // 1_4 = 0x06
-    "0.1215686f,0.1215686f,0.1215686f", // 1_8 = 0x07
+    "(223.0f/255.0f),(223.0f/255.0f),(223.0f/255.0f)", // 7_8 = 0x01
+    "(191.0f/255.0f),(191.0f/255.0f),(191.0f/255.0f)",	 // 3_4 = 0x02
+    "(159.0f/255.0f),(159.0f/255.0f),(159.0f/255.0f)", // 5_8 = 0x03
+    "(127.0f/255.0f),(127.0f/255.0f),(127.0f/255.0f)",       // 1_2 = 0x04
+    "(95.0f/255.0f),(95.0f/255.0f),(95.0f/255.0f)", // 3_8 = 0x05
+    "(63.0f/255.0f),(63.0f/255.0f),(63.0f/255.0f)",    // 1_4 = 0x06
+    "(31.0f/255.0f),(31.0f/255.0f),(31.0f/255.0f)", // 1_8 = 0x07
    "ERROR", // 0x08
    "ERROR", // 0x09
    "ERROR", // 0x0a
@ -193,13 +193,13 @@ static const char *tevKSelTableC[] = // KCSEL
 static const char *tevKSelTableA[] = // KASEL
 {
    "1.0f",  // 1   = 0x00
-    "0.8745098f",// 7_8 = 0x01
-    "0.7490196f", // 3_4 = 0x02
-    "0.6235294f",// 5_8 = 0x03
-    "0.4980392f",  // 1_2 = 0x04
-    "0.372549f",// 3_8 = 0x05
-    "0.2470588f", // 1_4 = 0x06
-    "0.1215686f",// 1_8 = 0x07
+    "(223.0f/255.0f)",// 7_8 = 0x01
+    "(191.0f/255.0f)", // 3_4 = 0x02
+    "(159.0f/255.0f)",// 5_8 = 0x03
+    "(127.0f/255.0f)",  // 1_2 = 0x04
+    "(95.0f/255.0f)",// 3_8 = 0x05
+    "(63.0f/255.0f)", // 1_4 = 0x06
+    "(31.0f/255.0f)",// 1_8 = 0x07
    "ERROR", // 0x08
    "ERROR", // 0x09
    "ERROR", // 0x0a
@ -237,8 +237,8 @@ static const char *tevScaleTable[] = // CS
 static const char *tevBiasTable[] = // TB
 {
    "",       // ZERO,
-    "+0.4980392f",  // ADDHALF,
-    "-0.4980392f",  // SUBHALF,
+    "+(127.0f/255.0f)",  // ADDHALF,
+    "-(127.0f/255.0f)",  // SUBHALF,
    "",
 };

@ -247,13 +247,6 @@ static const char *tevOpTable[] = { // TEV
    "-",      // TEVOP_SUB = 1,
 };

-//static const char *tevCompOpTable[] = { ">", "==" };
-
-#define TEVCMP_R8    0
-#define TEVCMP_GR16  1
-#define TEVCMP_BGR24 2
-#define TEVCMP_RGB8  3
-
 static const char *tevCInputTable[] = // CC
 {
    "(prev.rgb)",               // CPREV,
@ -269,7 +262,7 @@ static const char *tevCInputTable[] = // CC
    "(rastemp.rgb)",            // RASC,
    "(rastemp.aaa)",      // RASA,
    "float3(1.0f,1.0f,1.0f)",              // ONE
-    "float3(0.4980392f,0.4980392f,0.4980392f)",                 // HALF
+    "float3((127.0f/255.0f),(127.0f/255.0f),(127.0f/255.0f))",                 // HALF
    "(konsttemp.rgb)", //"konsttemp.rgb",        // KONST
    "float3(0.0f,0.0f,0.0f)",              // ZERO
 	///aded extra values to map clamped values
@ -286,7 +279,7 @@ static const char *tevCInputTable[] = // CC
    "(rastemp.rgb)",            // RASC,
    "(rastemp.aaa)",      // RASA,
    "float3(1.0f,1.0f,1.0f)",              // ONE
-    "float3(0.4980392f,0.4980392f,0.4980392f)",                 // HALF
+    "float3((127.0f/255.0f),(127.0f/255.0f),(127.0f/255.0f))",                 // HALF
    "(konsttemp.rgb)", //"konsttemp.rgb",        // KONST
    "float3(0.0f,0.0f,0.0f)",              // ZERO    
    "PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
@ -510,7 +503,7 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, u32 H
 	}

 	// emulation of unisgned 8 overflow when casting
-	WRITE(p, "prev = frac(4.0f + prev * 0.99609375f) *  1.00392157f;\n");//fmod(fmod(prev * 255.0f,256.0f) + 256.0f,256.0f) * 0.0039215686f;\n");		
+	WRITE(p, "prev = frac(4.0f + prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
 		
    if (!WriteAlphaTest(p, HLSL))
 	{
@ -719,14 +712,49 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
    if (bCKonst || bAKonst )
        WRITE(p, "konsttemp=float4(%s,%s);\n",tevKSelTableC[kc],tevKSelTableA[ka]);  

-	if(cc.a == 0 || cc.a == 1 || cc.b == 0 || cc.b == 1 || cc.c == 0 || cc.c == 1 || ac.a == 0 || ac.b == 0 || ac.c == 0)
-		WRITE(p, "cprev = frac(4.0f + prev * 0.99609375f) *  1.00392157f;\n");  
-	if(cc.a == 2 || cc.a == 3 || cc.b == 2 || cc.b == 3 || cc.c == 2 || cc.c == 3 || ac.a == 1 || ac.b == 1 || ac.c == 1)
-		WRITE(p, "cc0 = frac(4.0f + c0 * 0.99609375f) *  1.00392157f;\n");
-	if(cc.a == 4 || cc.a == 5 || cc.b == 4 || cc.b == 5 || cc.c == 4 || cc.c == 5 || ac.a == 2 || ac.b == 2 || ac.c == 2)
-		WRITE(p, "cc1 = frac(4.0f + c1 * 0.99609375f) *  1.00392157f;\n");  
-	if(cc.a == 6 || cc.a == 7 || cc.b == 6 || cc.b == 7 || cc.c == 6 || cc.c == 7 || ac.a == 3 || ac.b == 3 || ac.c == 3)
-		WRITE(p, "cc2 = frac(4.0f + c2 * 0.99609375f) *  1.00392157f;\n");  	
+	if(cc.a == TEVCOLORARG_CPREV 
+	|| cc.a == TEVCOLORARG_APREV 
+	|| cc.b == TEVCOLORARG_CPREV 
+	|| cc.b == TEVCOLORARG_APREV 
+	|| cc.c == TEVCOLORARG_CPREV 
+	|| cc.c == TEVCOLORARG_APREV 
+	|| ac.a == TEVALPHAARG_APREV 
+	|| ac.b == TEVALPHAARG_APREV 
+	|| ac.c == TEVALPHAARG_APREV)
+		WRITE(p, "cprev = frac(4.0f + prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");  
+
+	if(cc.a == TEVCOLORARG_C0 
+	|| cc.a == TEVCOLORARG_A0 
+	|| cc.b == TEVCOLORARG_C0 
+	|| cc.b == TEVCOLORARG_A0 
+	|| cc.c == TEVCOLORARG_C0 
+	|| cc.c == TEVCOLORARG_A0 
+	|| ac.a == TEVALPHAARG_A0 
+	|| ac.b == TEVALPHAARG_A0 
+	|| ac.c == TEVALPHAARG_A0)
+		WRITE(p, "cc0 = frac(4.0f + c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
+
+	if(cc.a == TEVCOLORARG_C1 
+	|| cc.a == TEVCOLORARG_A1 
+	|| cc.b == TEVCOLORARG_C1 
+	|| cc.b == TEVCOLORARG_A1 
+	|| cc.c == TEVCOLORARG_C1 
+	|| cc.c == TEVCOLORARG_A1 
+	|| ac.a == TEVALPHAARG_A1 
+	|| ac.b == TEVALPHAARG_A1 
+	|| ac.c == TEVALPHAARG_A1)
+		WRITE(p, "cc1 = frac(4.0f + c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n");  
+	
+	if(cc.a == TEVCOLORARG_C2 
+	|| cc.a == TEVCOLORARG_A2 
+	|| cc.b == TEVCOLORARG_C2 
+	|| cc.b == TEVCOLORARG_A2 
+	|| cc.c == TEVCOLORARG_C2 
+	|| cc.c == TEVCOLORARG_A2 
+	|| ac.a == TEVALPHAARG_A2 
+	|| ac.b == TEVALPHAARG_A2 
+	|| ac.c == TEVALPHAARG_A2)
+		WRITE(p, "cc2 = frac(4.0f + c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n");  	

    if (cc.clamp)
 		WRITE(p, "%s=saturate(", tevCOutputTable[cc.dest]);
@ -734,13 +762,13 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
 		WRITE(p, "%s=", tevCOutputTable[cc.dest]);

    // combine the color channel
-    if (cc.bias != 3) // if not compare
+	if (cc.bias != TevBias_COMPARE) // if not compare
 	{
 		//normal color combiner goes here        
-		if (cc.shift>0)
+		if (cc.shift > TEVSCALE_1)
 			WRITE(p, "%s*(",tevScaleTable[cc.shift]);		

-		if(!(cc.d == TEVCOLORARG_ZERO && cc.op == 0))
+		if(!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD))
 			WRITE(p, "%s%s",tevCInputTable[cc.d],tevOpTable[cc.op]);

 		if (cc.a == cc.b)
@ -750,11 +778,11 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
 		else if (cc.c == TEVCOLORARG_ONE)
 			WRITE(p,"%s",tevCInputTable[cc.b + 16]);
 		else if (cc.a == TEVCOLORARG_ZERO)
-			WRITE(p,"%s*(trunc(%s * 256.9921875f)*0.00390625f)",tevCInputTable[cc.b + 16],tevCInputTable[cc.c + 16]);
+			WRITE(p,"%s*%s",tevCInputTable[cc.b + 16],tevCInputTable[cc.c + 16]);
 		else if (cc.b == TEVCOLORARG_ZERO)
-			WRITE(p,"%s*(float3(1.0f,1.0f,1.0f)-(trunc(%s * 256.9921875f)*0.00390625f))",tevCInputTable[cc.a + 16],tevCInputTable[cc.c + 16]);
+			WRITE(p,"%s*(float3(1.0f,1.0f,1.0f)-%s)",tevCInputTable[cc.a + 16],tevCInputTable[cc.c + 16]);
 		else
-			WRITE(p, "lerp(%s,%s,(trunc(%s * 256.9921875f)*0.00390625f))",tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16],tevCInputTable[cc.c + 16]);
+			WRITE(p, "lerp(%s,%s,%s)",tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16],tevCInputTable[cc.c + 16]);
 		
 		WRITE(p, "%s",tevBiasTable[cc.bias]);
 		
@ -780,13 +808,13 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
 	else
 		WRITE(p, "%s=", tevAOutputTable[ac.dest]);

-    if (ac.bias != 3) // if not compare
+    if (ac.bias != TevBias_COMPARE) // if not compare
 	{
        //normal alpha combiner goes here
-		if (ac.shift>0)
+		if (ac.shift > TEVSCALE_1)
 			WRITE(p, "%s*(",tevScaleTable[ac.shift]);		

-		if(!(ac.d == TEVALPHAARG_ZERO && ac.op == 0))
+		if(!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD))
 			WRITE(p, "%s.a%s",tevAInputTable[ac.d],tevOpTable[ac.op]);		

 		if (ac.a == ac.b)
@ -794,11 +822,11 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
 		else if (ac.c == TEVALPHAARG_ZERO)
 			WRITE(p,"%s.a",tevAInputTable[ac.a + 8]);
 		else if (ac.a == TEVALPHAARG_ZERO)
-			WRITE(p,"%s.a*(trunc(%s.a * 256.9921875f)*0.00390625f)",tevAInputTable[ac.b + 8],tevAInputTable[ac.c + 8]);
+			WRITE(p,"%s.a*%s.a",tevAInputTable[ac.b + 8],tevAInputTable[ac.c + 8]);
 		else if (ac.b == TEVALPHAARG_ZERO)
-			WRITE(p,"%s.a*(1.0f-(trunc(%s.a * 256.9921875f)*0.00390625f))",tevAInputTable[ac.a + 8],tevAInputTable[ac.c + 8]);
+			WRITE(p,"%s.a*(1.0f-%s.a)",tevAInputTable[ac.a + 8],tevAInputTable[ac.c + 8]);
 		else
-	        WRITE(p, "lerp(%s.a,%s.a,(trunc(%s.a * 256.9921875f)*0.00390625f))",tevAInputTable[ac.a + 8],tevAInputTable[ac.b + 8],tevAInputTable[ac.c + 8]);
+	        WRITE(p, "lerp(%s.a,%s.a,%s.a)",tevAInputTable[ac.a + 8],tevAInputTable[ac.b + 8],tevAInputTable[ac.c + 8]);
 		
 		WRITE(p, "%s",tevBiasTable[ac.bias]);
 		
--- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp
@ -111,6 +111,7 @@ void WriteSwizzler(char*& p, u32 format,bool HLSL)

 	if(HLSL)
 	{
+		WRITE(p, "  sampleUv = sampleUv + float2(0.0f,1.0f);\n");// still to determine the reason for this
 		WRITE(p, "  sampleUv = sampleUv / blkDims.zw;\n");
 	}		
 }
@ -163,6 +164,7 @@ void Write32BitSwizzler(char*& p, u32 format, bool HLSL)

 	if(HLSL)
 	{
+		WRITE(p, "  sampleUv = sampleUv + float2(0.0f,1.0f);\n");// still to determine the reason for this
 		WRITE(p, "  sampleUv = sampleUv / blkDims.zw;\n");
 	}	
 }
--- a/Source/Core/VideoCommon/Src/VertexLoader.cpp
+++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp
@ -598,12 +598,12 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
 	int startv = 0, extraverts = 0;
 	int v = 0;

-	int remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
+	//int remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
 	while (v < count)
 	{
 		int remainingVerts = VertexManager::GetRemainingSize() / native_stride;
-		if (remainingVerts2 - v + startv < remainingVerts)
-		    remainingVerts = remainingVerts2 - v + startv;
+		//if (remainingVerts2 - v + startv < remainingVerts)
+		    //remainingVerts = remainingVerts2 - v + startv;
 		if (remainingVerts < granularity) {
 			INCSTAT(stats.thisFrame.numBufferSplits);
 			// This buffer full - break current primitive and flush, to switch to the next buffer.
@ -611,7 +611,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
 			if (v - startv > 0)
 				VertexManager::AddVertices(primitive, v - startv + extraverts);
 			VertexManager::Flush();
-			remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
+			//remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
 			// Why does this need to be so complicated?
 			switch (primitive) {
 				case 3: // triangle strip, copy last two vertices