zfreeze: cache vertex positions

Suggested by degasus.
This commit is contained in:
Tillmann Karras
2015-06-01 19:58:27 +02:00
parent 9e2f4dd7da
commit 5ddd2cef6c
9 changed files with 80 additions and 17 deletions

View File

@ -23,6 +23,11 @@ static const X64Reg base_reg = RBX;
static const u8* memory_base_ptr = (u8*)&g_main_cp_state.array_strides;
static OpArg MPIC(const void* ptr, X64Reg scale_reg, int scale = SCALE_1)
{
return MComplex(base_reg, scale_reg, scale, (s32)((u8*)ptr - memory_base_ptr));
}
static OpArg MPIC(const void* ptr)
{
return MDisp(base_reg, (s32)((u8*)ptr - memory_base_ptr));
@ -193,6 +198,31 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
MOV(32, dest, R(scratch3));
data.AddMemOffset(sizeof(float));
dest.AddMemOffset(sizeof(float));
// zfreeze
if (native_format == &m_native_vtx_decl.position)
{
if (cpu_info.bSSE4_1)
{
PINSRD(coords, R(scratch3), i);
}
else
{
PINSRW(coords, R(scratch3), 2 * i + 0);
SHR(32, R(scratch3), Imm8(16));
PINSRW(coords, R(scratch3), 2 * i + 1);
}
}
}
// zfreeze
if (native_format == &m_native_vtx_decl.position)
{
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4));
MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
return load_bytes;
}
@ -213,6 +243,16 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
case 3: MOVUPS(dest, coords); break;
}
// zfreeze
if (native_format == &m_native_vtx_decl.position)
{
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4));
MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
return load_bytes;
}
@ -388,6 +428,13 @@ void VertexLoaderX64::GenerateVertexLoader()
MOVZX(32, 8, scratch1, MDisp(src_reg, m_src_ofs));
AND(32, R(scratch1), Imm8(0x3F));
MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1));
// zfreeze
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
MOV(32, MPIC(VertexLoaderManager::position_matrix_index - 1, count_reg, SCALE_4), R(scratch1));
SetJumpTarget(dont_store);
m_native_components |= VB_HAS_POSMTXIDX;
m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true;