Merge branch 'master' into GLSL-master

Conflicts: Source/Core/VideoCommon/Src/PixelShaderGen.cpp Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp
2025-07-24 14:49:42 -06:00 · 2013-03-15 11:19:52 -05:00
parent 84119a966b d63d7fde9e
commit 8c1091a21f
38 changed files with 384 additions and 190 deletions
--- a/Source/Core/VideoCommon/Src/BPMemory.h
+++ b/Source/Core/VideoCommon/Src/BPMemory.h
@ -93,7 +93,7 @@
 #define BPMEM_TEV_ALPHA_ENV    0xC1 // 0xC1 + (2 * 16)
 #define BPMEM_TEV_REGISTER_L   0xE0 // 0xE0 + (2 * 4)
 #define BPMEM_TEV_REGISTER_H   0xE1 // 0xE1 + (2 * 4)
-#define BPMEM_FOGRANGE         0xE8
+#define BPMEM_FOGRANGE         0xE8 // 0xE8 + 6
 #define BPMEM_FOGPARAM0        0xEE
 #define BPMEM_FOGBMAGNITUDE    0xEF
 #define BPMEM_FOGBEXPONENT     0xF0
@ -988,7 +988,7 @@ struct BPMemory
    FourTexUnits tex[2]; //80-bf
    TevStageCombiner combiners[16]; //0xC0-0xDF
    TevReg tevregs[4];  //0xE0
-    FogRangeParams fogRange;
+    FogRangeParams fogRange;  // 0xE8
    FogParams fog; //0xEE,0xEF,0xF0,0xF1,0xF2
    AlphaTest alpha_test; //0xF3
    ZTex1 ztex1; //0xf4,0xf5
--- a/Source/Core/VideoCommon/Src/Fifo.cpp
+++ b/Source/Core/VideoCommon/Src/Fifo.cpp
@ -235,11 +235,11 @@ void RunGpu()
 	{
 		u8 *uData = Memory::GetPointer(fifo.CPReadPointer);
 			
-			FPURoundMode::SaveSIMDState();
-			FPURoundMode::LoadDefaultSIMDState();
-			ReadDataFromFifo(uData, 32);				
-			u32 count = OpcodeDecoder_Run(g_bSkipCurrentFrame);	
-			FPURoundMode::LoadSIMDState();
+		FPURoundMode::SaveSIMDState();
+		FPURoundMode::LoadDefaultSIMDState();
+		ReadDataFromFifo(uData, 32);
+		u32 count = OpcodeDecoder_Run(g_bSkipCurrentFrame);
+		FPURoundMode::LoadSIMDState();

 		//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");

--- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
@ -276,7 +276,7 @@ void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::
 static void WriteStage(char *&p, int n, API_TYPE ApiType);
 static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType);
 // static void WriteAlphaCompare(char *&p, int num, int comp);
-static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool depthTextureEnable);
+static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth);
 static void WriteFog(char *&p);

 static const char *tevKSelTableC[] = // KCSEL
@ -511,8 +511,8 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
 	BuildSwapModeTable(); // Needed for WriteStage
 	int numStages = bpmem.genMode.numtevstages + 1;
 	int numTexgen = bpmem.genMode.numtexgens;
-	
-	bool depthTextureEnable = bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable;
+
+	bool per_pixel_depth = bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable;

 	char *p = text;
 	WRITE(p, "//Pixel Shader for TEV stages\n");
@ -602,7 +602,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
 		if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
 			WRITE(p, "out float4 ocol1;\n");
 		
-		if (depthTextureEnable)
+		if (per_pixel_depth)
 			WRITE(p, "#define depth gl_FragDepth\n");
 		WRITE(p, "float4 rawpos = gl_FragCoord;\n");

@ -658,14 +658,14 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
 		{
 			WRITE(p, "  out float4 ocol0 : COLOR0,%s%s\n  in float4 rawpos : %s,\n",
 				dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n  out float4 ocol1 : COLOR1," : "",
-				depthTextureEnable ? "\n  out float depth : DEPTH," : "",
+				per_pixel_depth ? "\n  out float depth : DEPTH," : "",
 				ApiType & API_D3D9_SM20 ? "POSITION" : "VPOS");
 		}
 		else
 		{
 			WRITE(p, "  out float4 ocol0 : SV_Target0,%s%s\n  in float4 rawpos : SV_Position,\n",
 				dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n  out float4 ocol1 : SV_Target1," : "",
-				depthTextureEnable ? "\n  out float depth : SV_Depth," : "");
+				per_pixel_depth ? "\n  out float depth : SV_Depth," : "");
 		}

 		WRITE(p, "  in float4 colors_0 : COLOR0,\n");
@ -808,7 +808,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType

 	AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
 	if (Pretest == AlphaTest::UNDETERMINED)
-		WriteAlphaTest(p, ApiType, dstAlphaMode, depthTextureEnable);
+		WriteAlphaTest(p, ApiType, dstAlphaMode, per_pixel_depth);

 	
 	// the screen space depth value = far z + (clip z / clip w) * z range
@ -817,9 +817,10 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
 	else
 		// dx9 doesn't support 4 component position, so we have to calculate it again
 		WRITE(p, "float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n");
-	
-	// Note: depth textures are disabled if early depth test is enabled
-	if (depthTextureEnable)
+
+	// depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either
+	bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel;
+	if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture)
 	{
 		// use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
 		WRITE(p, "zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n",
@ -829,8 +830,10 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
 		WRITE(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n");
 		WRITE(p, "zCoord = frac(zCoord);\n");
 		WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n");
-	
-		WRITE(p, "depth = zCoord;\n");
+
+		// Note: depth texture out put is only written to depth buffer if late depth test is used
+		if (per_pixel_depth)
+			WRITE(p, "depth = zCoord;\n");
 	}

 	if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
@ -1242,7 +1245,7 @@ static const char *tevAlphaFunclogicTable[] =
 	" == "  // xnor
 };

-static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool depthTextureEnable)
+static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth)
 {
 	static const char *alphaRef[2] =
 	{
@ -1266,7 +1269,7 @@ static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode
 	WRITE(p, "\t\tocol0 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
 	if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
 		WRITE(p, "\t\tocol1 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
-	if(depthTextureEnable)
+	if(per_pixel_depth)
 		WRITE(p, "depth = 1.f;\n");

 	// HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before
--- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp
+++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp
@ -415,6 +415,7 @@ void PixelShaderManager::SetZTextureBias(u32 bias)
 void PixelShaderManager::SetViewportChanged()
 {
 	s_bDepthRangeChanged = true;
+	s_bFogRangeAdjustChanged = true; // TODO: Shouldn't be necessary with an accurate fog range adjust implementation
 }

 void PixelShaderManager::SetIndTexScaleChanged(u8 stagemask)
--- a/Source/Core/VideoCommon/Src/VertexManagerBase.h
+++ b/Source/Core/VideoCommon/Src/VertexManagerBase.h
@ -2,6 +2,7 @@
 #ifndef _VERTEXMANAGERBASE_H
 #define _VERTEXMANAGERBASE_H

+#include "Common.h"
 #include <vector>

 class NativeVertexFormat;
@ -10,17 +11,16 @@ class PointerWrap;
 class VertexManager
 {
 private:
-	// What are the actual values?
-	static const u32 SMALLEST_POSSIBLE_VERTEX = 1;
-	static const u32 LARGEST_POSSIBLE_VERTEX = 188;
+	static const u32 SMALLEST_POSSIBLE_VERTEX = sizeof(float)*3;                 // 3 pos
+	static const u32 LARGEST_POSSIBLE_VERTEX = sizeof(float)*45 + sizeof(u32)*2; // 3 pos, 3*3 normal, 2*u32 color, 8*4 tex, 1 posMat 
 	
 	static const u32 MAX_PRIMITIVES_PER_COMMAND = (u16)-1;
 	
 public:
-	static const u32 MAXVBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * LARGEST_POSSIBLE_VERTEX;
+	static const u32 MAXVBUFFERSIZE = ROUND_UP_POW2 (MAX_PRIMITIVES_PER_COMMAND * LARGEST_POSSIBLE_VERTEX);
 	
 	// We may convert triangle-fans to triangle-lists, almost 3x as many indices.
-	static const u32 MAXIBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * 3;
+	static const u32 MAXIBUFFERSIZE = ROUND_UP_POW2 (MAX_PRIMITIVES_PER_COMMAND * 3);

 	VertexManager();
 	// needs to be virtual for DX11's dtor