Optimize shader uid checks by checking the number of uid values which are actually used.

This commit is contained in:
NeoBrainX 2013-06-22 21:24:21 +02:00
parent 16ada5fa3d
commit bdc28106ee
5 changed files with 44 additions and 29 deletions

View File

@ -535,6 +535,11 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api
for (unsigned int i = 0; i < numStages; i++) for (unsigned int i = 0; i < numStages; i++)
WriteStage<T>(out, uid_data, i, ApiType, RegisterStates); // build the equation for this stage WriteStage<T>(out, uid_data, i, ApiType, RegisterStates); // build the equation for this stage
#define MY_STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str)))
bool enable_pl = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
uid_data.num_values = (enable_pl) ? sizeof(uid_data)/sizeof(32) : MY_STRUCT_OFFSET(uid_data,stagehash[numStages])/sizeof(u32);
if (numStages) if (numStages)
{ {
// The results of the last texenv stage are put onto the screen, // The results of the last texenv stage are put onto the screen,

View File

@ -61,6 +61,9 @@ struct pixel_shader_uid_data
{ {
// TODO: Optimize field order for easy access! // TODO: Optimize field order for easy access!
u32 num_values; // TODO: Shouldn't be a u32
u32 NumValues() const { return num_values; } // TODO: Can be optimized :)
u32 components; u32 components;
u32 dstAlphaMode : 2; u32 dstAlphaMode : 2;
u32 Pretest : 2; u32 Pretest : 2;
@ -96,34 +99,6 @@ struct pixel_shader_uid_data
else if (index == 3) { tevindref_bi4 = texmap; } else if (index == 3) { tevindref_bi4 = texmap; }
} }
struct {
u32 cc : 24;
u32 ac : 24;
u32 tevorders_texmap : 3;
u32 tevorders_texcoord : 3;
u32 tevorders_enable : 1;
u32 tevorders_colorchan : 3;
u32 pad1 : 6;
u32 hasindstage : 1;
u32 tevind : 21;
u32 tevksel_swap1a : 2; // TODO: Doesn't fit here..
u32 tevksel_swap2a : 2; // TODO: Doesn't fit here..
u32 tevksel_swap1b : 2; // TODO: Doesn't fit here..
u32 tevksel_swap2b : 2; // TODO: Doesn't fit here..
u32 pad2 : 2;
u32 tevksel_swap1c : 2; // TODO: Doesn't fit here..
u32 tevksel_swap2c : 2; // TODO: Doesn't fit here..
u32 tevksel_swap1d : 2; // TODO: Doesn't fit here..
u32 tevksel_swap2d : 2; // TODO: Doesn't fit here..
u32 tevksel_kc : 5;
u32 tevksel_ka : 5;
u32 pad3 : 14;
} stagehash[16];
u32 alpha_test_comp0 : 3; u32 alpha_test_comp0 : 3;
u32 alpha_test_comp1 : 3; u32 alpha_test_comp1 : 3;
u32 alpha_test_logic : 2; u32 alpha_test_logic : 2;
@ -141,6 +116,34 @@ struct pixel_shader_uid_data
u32 xfregs_numTexGen_numTexGens : 4; u32 xfregs_numTexGen_numTexGens : 4;
struct {
u32 cc : 24;
u32 ac : 24;
u32 tevorders_texmap : 3;
u32 tevorders_texcoord : 3;
u32 tevorders_enable : 1;
u32 tevorders_colorchan : 3;
u32 pad1 : 6;
// TODO: Clean up the swapXY mess
u32 hasindstage : 1;
u32 tevind : 21;
u32 tevksel_swap1a : 2;
u32 tevksel_swap2a : 2;
u32 tevksel_swap1b : 2;
u32 tevksel_swap2b : 2;
u32 pad2 : 2;
u32 tevksel_swap1c : 2;
u32 tevksel_swap2c : 2;
u32 tevksel_swap1d : 2;
u32 tevksel_swap2d : 2;
u32 tevksel_kc : 5;
u32 tevksel_ka : 5;
u32 pad3 : 14;
} stagehash[16];
// TODO: I think we're fine without an enablePixelLighting field, should probably double check, though.. // TODO: I think we're fine without an enablePixelLighting field, should probably double check, though..
LightingUidData lighting; LightingUidData lighting;
}; };

View File

@ -100,7 +100,7 @@ public:
bool operator < (const ShaderUid& obj) const bool operator < (const ShaderUid& obj) const
{ {
// TODO: Store last frame used and order by that? makes much more sense anyway... // TODO: Store last frame used and order by that? makes much more sense anyway...
for (unsigned int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i) for (unsigned int i = 0; i < data.NumValues(); ++i)
{ {
if (this->values[i] < obj.values[i]) if (this->values[i] < obj.values[i])
return true; return true;
@ -212,6 +212,8 @@ struct LightingUidData
u32 diffusefunc : 8; // 4x2 bits u32 diffusefunc : 8; // 4x2 bits
u32 attnfunc : 8; // 4x2 bits u32 attnfunc : 8; // 4x2 bits
u32 light_mask : 32; // 4x8 bits u32 light_mask : 32; // 4x8 bits
u32 NumValues() const { return sizeof(LightingUidData) / sizeof(u32); }
}; };
#pragma pack() #pragma pack()

View File

@ -74,6 +74,8 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type)
vertex_shader_uid_data& uid_data = (&out.template GetUidData<vertex_shader_uid_data>() != NULL) vertex_shader_uid_data& uid_data = (&out.template GetUidData<vertex_shader_uid_data>() != NULL)
? out.template GetUidData<vertex_shader_uid_data>() : dummy_data; ? out.template GetUidData<vertex_shader_uid_data>() : dummy_data;
uid_data.num_values = sizeof(uid_data)/sizeof(u32);
out.SetBuffer(text); out.SetBuffer(text);
#ifndef ANDROID #ifndef ANDROID
locale_t locale; locale_t locale;

View File

@ -67,6 +67,9 @@ const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 },
struct vertex_shader_uid_data struct vertex_shader_uid_data
{ {
u32 num_values; // TODO: Shouldn't be a u32
u32 NumValues() const { return num_values; }
u32 components; u32 components;
u32 numColorChans : 2; u32 numColorChans : 2;
u32 numTexGens : 4; u32 numTexGens : 4;