Initial port of zfreeze branch (3.5-1729)

Initial port of original zfreeze branch (3.5-1729) by neobrain into
most recent build of Dolphin.

Makes Rogue Squadron 2 very playable at full speed thanks to recent core
speedups made to Dolphin. Works on DirectX Video plugin only for now.

Enjoy!  and Merry Xmas!!
This commit is contained in:
NanoByte011
2014-12-25 00:34:22 -07:00
committed by Scott Mansell
parent 4984215971
commit 937844b9e3
12 changed files with 173 additions and 4 deletions

View File

@ -23,6 +23,7 @@ struct PixelShaderConstants
int4 fogcolor;
int4 fogi;
float4 fogf[2];
float4 zslope;
};
struct VertexShaderConstants

View File

@ -228,6 +228,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
"\tint4 " I_FOGCOLOR";\n"
"\tint4 " I_FOGI";\n"
"\tfloat4 " I_FOGF"[2];\n"
"\tfloat4 " I_ZSLOPE";\n"
"};\n");
if (g_ActiveConfig.bEnablePixelLighting)
@ -269,7 +270,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
out.Write("};\n");
const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED);
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z);
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || bpmem.genMode.zfreeze;
if (forced_early_z)
{
@ -538,10 +539,20 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
uid_data->early_ztest = bpmem.UseEarlyDepthTest();
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
uid_data->zfreeze = bpmem.genMode.zfreeze;
// Note: z-textures are not written to depth buffer if early depth test is used
if (per_pixel_depth && bpmem.UseEarlyDepthTest())
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
{
if (bpmem.genMode.zfreeze)
{
out.Write("\tdepth = " I_ZSLOPE".z + " I_ZSLOPE".x * (clipPos.x / clipPos.w) + " I_ZSLOPE".y * (clipPos.y / clipPos.w);\n");
}
else
{
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
}
}
// Note: depth texture output is only written to depth buffer if late depth test is used
// theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway
@ -555,7 +566,16 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
}
if (per_pixel_depth && bpmem.UseLateDepthTest())
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
{
if (bpmem.genMode.zfreeze)
{
out.Write("\tdepth = " I_ZSLOPE".z + " I_ZSLOPE".x * (clipPos.x / clipPos.w) + " I_ZSLOPE".y * (clipPos.y / clipPos.w);\n");
}
else
{
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
}
}
if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
{

View File

@ -21,8 +21,9 @@
#define C_FOGCOLOR (C_INDTEXMTX + 6) //27
#define C_FOGI (C_FOGCOLOR + 1) //28
#define C_FOGF (C_FOGI + 1) //29
#define C_ZSLOPE (C_FOGF + 1) //30
#define C_PENVCONST_END (C_FOGF + 2)
#define C_PENVCONST_END (C_ZSLOPE + 2)
// Different ways to achieve rendering with destination alpha
enum DSTALPHA_MODE
@ -62,6 +63,7 @@ struct pixel_shader_uid_data
u32 forced_early_z : 1;
u32 early_ztest : 1;
u32 bounding_box : 1;
u32 zfreeze : 1;
u32 texMtxInfo_n_projection : 8; // 8x1 bit
u32 tevindref_bi0 : 3;

View File

@ -14,6 +14,8 @@
bool PixelShaderManager::s_bFogRangeAdjustChanged;
bool PixelShaderManager::s_bViewPortChanged;
bool PixelShaderManager::s_bZSlopeChanged;
static float zslope[3];
std::array<int4,4> PixelShaderManager::s_tev_color;
std::array<int4,4> PixelShaderManager::s_tev_konst_color;
@ -48,6 +50,7 @@ void PixelShaderManager::Dirty()
SetDestAlpha();
SetZTextureBias();
SetViewportChanged();
SetZSlopeChanged(0, 0, 1);
SetIndTexScaleChanged(false);
SetIndTexScaleChanged(true);
SetIndMatrixChanged(0);
@ -112,6 +115,17 @@ void PixelShaderManager::SetConstants()
dirty = true;
s_bViewPortChanged = false;
}
if (s_bZSlopeChanged)
{
constants.zslope[0] = zslope[0];
constants.zslope[1] = zslope[1];
constants.zslope[2] = zslope[2];
constants.zslope[3] = 0;
dirty = true;
s_bZSlopeChanged = false;
}
}
void PixelShaderManager::SetTevColor(int index, int component, s32 value)
@ -168,6 +182,14 @@ void PixelShaderManager::SetViewportChanged()
s_bFogRangeAdjustChanged = true; // TODO: Shouldn't be necessary with an accurate fog range adjust implementation
}
void PixelShaderManager::SetZSlopeChanged(float dfdx, float dfdy, float f0)
{
zslope[0] = dfdx;
zslope[1] = dfdy;
zslope[2] = f0;
s_bZSlopeChanged = true;
}
void PixelShaderManager::SetIndTexScaleChanged(bool high)
{
constants.indtexscale[high][0] = bpmem.texscale[high].ss0;

View File

@ -36,6 +36,7 @@ public:
static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt);
static void SetZTextureBias();
static void SetViewportChanged();
static void SetZSlopeChanged(float dfdx, float dfdy, float f0);
static void SetIndMatrixChanged(int matrixidx);
static void SetTevKSelChanged(int id);
static void SetZTextureTypeChanged();
@ -50,6 +51,7 @@ public:
static bool s_bFogRangeAdjustChanged;
static bool s_bViewPortChanged;
static bool s_bZSlopeChanged;
// These colors aren't available from global BP state,
// hence we keep a copy of them around.

View File

@ -291,6 +291,7 @@ static inline void AssignVSOutputMembers(T& object, const char* a, const char* b
#define I_FOGCOLOR "cfogcolor"
#define I_FOGI "cfogi"
#define I_FOGF "cfogf"
#define I_ZSLOPE "czslope"
#define I_POSNORMALMATRIX "cpnmtx"
#define I_PROJECTION "cproj"

View File

@ -690,6 +690,24 @@ void VertexShaderManager::ResetView()
bProjectionChanged = true;
}
void VertexShaderManager::TransformToClipSpace(const float* data, float *out)
{
const float *world_matrix = (const float *)xfmem.posMatrices + g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4;
const float *proj_matrix = &g_fProjectionMatrix[0];
float t[3];
t[0] = data[0] * world_matrix[0] + data[1] * world_matrix[1] + data[2] * world_matrix[2] + world_matrix[3];
t[1] = data[0] * world_matrix[4] + data[1] * world_matrix[5] + data[2] * world_matrix[6] + world_matrix[7];
t[2] = data[0] * world_matrix[8] + data[1] * world_matrix[9] + data[2] * world_matrix[10] + world_matrix[11];
// TODO: this requires g_fProjectionMatrix to be up to date, which is not really a good design decision.
out[0] = t[0] * proj_matrix[0] + t[1] * proj_matrix[1] + t[2] * proj_matrix[2] + proj_matrix[3];
out[1] = t[0] * proj_matrix[4] + t[1] * proj_matrix[5] + t[2] * proj_matrix[6] + proj_matrix[7];
out[2] = t[0] * proj_matrix[8] + t[1] * proj_matrix[9] + t[2] * proj_matrix[10] + proj_matrix[11];
out[3] = t[0] * proj_matrix[12] + t[1] * proj_matrix[13] + t[2] * proj_matrix[14] + proj_matrix[15];
}
void VertexShaderManager::DoState(PointerWrap &p)
{
p.Do(g_fProjectionMatrix);

View File

@ -34,6 +34,12 @@ public:
static void RotateView(float x, float y);
static void ResetView();
// data: 3 floats representing the X, Y and Z vertex model coordinates
// out: 4 floats which will be initialized with the corresponding clip space coordinates
// NOTE: g_fProjectionMatrix must be up to date when this is called
// (i.e. VertexShaderManager::SetConstants needs to be called before using this!)
static void TransformToClipSpace(const float* data, float *out);
static VertexShaderConstants constants;
static bool dirty;
};