mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-15 22:09:19 -07:00
fc370c5136
whit this code hashing is completely disabled for the dlists content itself this must bring some fps more but i must know if this causes any glitches. i suspect that no game modify the content of the dlist so hash is time wasted but the only way to test it is with this commit. please test as many games s you can and give me feedback for any glitch. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7106 8ced0084-cf51-0410-be5f-012b33b47a6e
805 lines
20 KiB
C++
805 lines
20 KiB
C++
// Copyright (C) 2003-2009 Dolphin Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official SVN repository and contact information can be found at
|
|
// http://code.google.com/p/dolphin-emu/
|
|
|
|
// TODO: Handle cache-is-full condition :p
|
|
|
|
#include <map>
|
|
|
|
#include "Common.h"
|
|
#include "VideoCommon.h"
|
|
#include "Hash.h"
|
|
#include "MemoryUtil.h"
|
|
#include "DataReader.h"
|
|
#include "Statistics.h"
|
|
#include "OpcodeDecoding.h" // For the GX_ constants.
|
|
#include "HW/Memmap.h"
|
|
|
|
#include "XFMemory.h"
|
|
#include "CPMemory.h"
|
|
#include "BPMemory.h"
|
|
|
|
#include "VertexLoaderManager.h"
|
|
#include "VertexManagerBase.h"
|
|
#include "x64Emitter.h"
|
|
#include "ABI.h"
|
|
|
|
#include "DLCache.h"
|
|
#include "VideoConfig.h"
|
|
|
|
#define DL_CODE_CACHE_SIZE (1024*1024*16)
|
|
#define DL_CODE_CLEAR_THRESHOLD (128 * 1024)
|
|
extern int frameCount;
|
|
static int CheckContextId;
|
|
using namespace Gen;
|
|
|
|
namespace DLCache
|
|
{
|
|
|
|
// Currently just recompiles the DLs themselves, doesn't bother with the vertex data.
|
|
// The speed boost is pretty small. The real big boost will come when we also store
|
|
// vertex arrays in the cached DLs.
|
|
|
|
enum DisplayListPass {
|
|
DLPASS_ANALYZE,
|
|
DLPASS_COMPILE,
|
|
DLPASS_RUN,
|
|
};
|
|
|
|
#define DL_HASH_STEPS 512
|
|
|
|
struct ReferencedDataRegion
|
|
{
|
|
ReferencedDataRegion()
|
|
:hash(0),
|
|
start_address(NULL),
|
|
size(0),
|
|
MustClean(false),
|
|
ReferencedArray(-1),
|
|
ReferencedArrayStride(0),
|
|
NextRegion(NULL)
|
|
{}
|
|
u64 hash;
|
|
u8* start_address;
|
|
ReferencedDataRegion* NextRegion;
|
|
u32 size;
|
|
u32 MustClean;
|
|
u32 ReferencedArray;
|
|
u32 ReferencedArrayStride;
|
|
|
|
|
|
int IntersectsMemoryRange(u8* range_address, u32 range_size)
|
|
{
|
|
if (start_address + size < range_address)
|
|
return -1;
|
|
if (start_address >= range_address + range_size)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
struct CachedDisplayList
|
|
{
|
|
CachedDisplayList()
|
|
: uncachable(false),
|
|
num_xf_reg(0),
|
|
num_cp_reg(0),
|
|
num_bp_reg(0),
|
|
num_index_xf(0),
|
|
num_draw_call(0),
|
|
pass(DLPASS_ANALYZE),
|
|
BufferCount(0),
|
|
Regions(NULL),
|
|
LastRegion(NULL)
|
|
{
|
|
frame_count = frameCount;
|
|
}
|
|
u64 dl_hash;
|
|
ReferencedDataRegion* Regions;
|
|
ReferencedDataRegion* LastRegion;
|
|
const u8* compiled_code;
|
|
u32 uncachable; // if set, this DL will always be interpreted. This gets set if hash ever changes.
|
|
// Analitic data
|
|
u32 num_xf_reg;
|
|
u32 num_cp_reg;
|
|
u32 num_bp_reg;
|
|
u32 num_index_xf;
|
|
u32 num_draw_call;
|
|
|
|
u32 pass;
|
|
|
|
|
|
u32 check;
|
|
|
|
int frame_count;
|
|
|
|
// ... Something containing cached vertex buffers here ...
|
|
u32 BufferCount;
|
|
|
|
|
|
// Compile the commands themselves down to native code.
|
|
|
|
|
|
|
|
void InsertRegion(ReferencedDataRegion* NewRegion)
|
|
{
|
|
if(LastRegion)
|
|
{
|
|
LastRegion->NextRegion = NewRegion;
|
|
}
|
|
LastRegion = NewRegion;
|
|
if(!Regions)
|
|
{
|
|
Regions = LastRegion;
|
|
}
|
|
BufferCount++;
|
|
}
|
|
|
|
void InsertOverlapingRegion(u8* RegionStartAddress, u32 Size,u32 referencedArray,u32 referencedArrayStride)
|
|
{
|
|
ReferencedDataRegion* NewRegion = FindOverlapingRegion(RegionStartAddress, Size);
|
|
if(NewRegion)
|
|
{
|
|
bool RegionChanged = false;
|
|
if(RegionStartAddress < NewRegion->start_address)
|
|
{
|
|
NewRegion->start_address = RegionStartAddress;
|
|
RegionChanged = true;
|
|
}
|
|
if(RegionStartAddress + Size > NewRegion->start_address + NewRegion->size)
|
|
{
|
|
NewRegion->size += (u32)((RegionStartAddress + Size) - (NewRegion->start_address + NewRegion->size));
|
|
RegionChanged = true;
|
|
}
|
|
if(RegionChanged)
|
|
NewRegion->hash = GetHash64(NewRegion->start_address, NewRegion->size, DL_HASH_STEPS);
|
|
}
|
|
else
|
|
{
|
|
NewRegion = new ReferencedDataRegion;
|
|
NewRegion->MustClean = false;
|
|
NewRegion->size = Size;
|
|
NewRegion->start_address = RegionStartAddress;
|
|
NewRegion->hash = GetHash64(RegionStartAddress, Size, DL_HASH_STEPS);
|
|
NewRegion->ReferencedArray = referencedArray;
|
|
NewRegion->ReferencedArrayStride = referencedArrayStride;
|
|
|
|
InsertRegion(NewRegion);
|
|
}
|
|
}
|
|
|
|
bool CheckRegions()
|
|
{
|
|
ReferencedDataRegion* Current = Regions;
|
|
while(Current)
|
|
{
|
|
if(Current->hash)
|
|
{
|
|
// this chek is not strictly necesary now but i left it for an aditional safety check
|
|
if(Current->ReferencedArray != -1 && (cached_arraybases[Current->ReferencedArray] != Current->start_address || arraystrides[Current->ReferencedArray] != Current->ReferencedArrayStride))
|
|
{
|
|
return false;
|
|
}
|
|
if(Current->hash != GetHash64(Current->start_address, Current->size, DL_HASH_STEPS))
|
|
return false;
|
|
}
|
|
Current = Current->NextRegion;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
ReferencedDataRegion* FindOverlapingRegion(u8* RegionStart, u32 Regionsize)
|
|
{
|
|
ReferencedDataRegion* Current = Regions;
|
|
while(Current)
|
|
{
|
|
if(!Current->IntersectsMemoryRange(RegionStart, Regionsize))
|
|
return Current;
|
|
Current = Current->NextRegion;
|
|
}
|
|
return Current;
|
|
}
|
|
|
|
void ClearRegions()
|
|
{
|
|
ReferencedDataRegion* Current = Regions;
|
|
while(Current)
|
|
{
|
|
ReferencedDataRegion* temp = Current;
|
|
Current = Current->NextRegion;
|
|
if(temp->MustClean)
|
|
delete [] temp->start_address;
|
|
delete temp;
|
|
}
|
|
LastRegion = NULL;
|
|
Regions = NULL;
|
|
}
|
|
};
|
|
|
|
// We want to allow caching DLs that start at the same address but have different lengths,
|
|
// so the size has to be in the ID.
|
|
inline u64 CreateMapId(u32 address, u32 size)
|
|
{
|
|
return ((u64)address << 32) | size;
|
|
}
|
|
|
|
inline u64 CreateVMapId(u32 VATUSED)
|
|
{
|
|
u64 vmap_id = 0;
|
|
for(int i = 0; i < 8 ; i++)
|
|
{
|
|
if(VATUSED & (1 << i))
|
|
{
|
|
if(vmap_id != 0)
|
|
{
|
|
vmap_id ^= (((u64)g_VtxAttr[i].g0.Hex) | (((u64)g_VtxAttr[i].g1.Hex) << 32)) ^ (((u64)g_VtxAttr[i].g2.Hex) << 16);
|
|
}
|
|
else
|
|
{
|
|
vmap_id = (((u64)g_VtxAttr[i].g0.Hex) | (((u64)g_VtxAttr[i].g1.Hex) << 32)) ^ (((u64)g_VtxAttr[i].g2.Hex) << 16);
|
|
}
|
|
}
|
|
}
|
|
for(int i = 0; i < 12; i++)
|
|
{
|
|
if(VATUSED & (1 << (i + 16)))
|
|
vmap_id = vmap_id ^ ((u64)cached_arraybases[i]) ^ (((u64)arraystrides[i]) << 16);
|
|
}
|
|
return vmap_id ^ g_VtxDesc.Hex;
|
|
}
|
|
|
|
typedef std::map<u64, CachedDisplayList> DLMap;
|
|
|
|
struct VDlist
|
|
{
|
|
DLMap dl_map;
|
|
u32 VATUsed;
|
|
u32 count;
|
|
};
|
|
|
|
typedef std::map<u64, VDlist> VDLMap;
|
|
|
|
static VDLMap dl_map;
|
|
static u8* dlcode_cache;
|
|
|
|
static Gen::XEmitter emitter;
|
|
|
|
|
|
|
|
// First pass - analyze
|
|
u32 AnalyzeAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)
|
|
{
|
|
u8* old_pVideoData = g_pVideoData;
|
|
u8* startAddress = Memory::GetPointer(address);
|
|
u32 num_xf_reg = 0;
|
|
u32 num_cp_reg = 0;
|
|
u32 num_bp_reg = 0;
|
|
u32 num_index_xf = 0;
|
|
u32 num_draw_call = 0;
|
|
u32 result = 0;
|
|
|
|
|
|
// Avoid the crash if Memory::GetPointer failed ..
|
|
if (startAddress != 0)
|
|
{
|
|
g_pVideoData = startAddress;
|
|
|
|
// temporarily swap dl and non-dl (small "hack" for the stats)
|
|
Statistics::SwapDL();
|
|
|
|
u8 *end = g_pVideoData + size;
|
|
while (g_pVideoData < end)
|
|
{
|
|
// Yet another reimplementation of the DL reading...
|
|
int cmd_byte = DataReadU8();
|
|
switch (cmd_byte)
|
|
{
|
|
case GX_NOP:
|
|
break;
|
|
|
|
case GX_LOAD_CP_REG: //0x08
|
|
{
|
|
u8 sub_cmd = DataReadU8();
|
|
u32 value = DataReadU32();
|
|
LoadCPReg(sub_cmd, value);
|
|
INCSTAT(stats.thisFrame.numCPLoads);
|
|
num_cp_reg++;
|
|
}
|
|
break;
|
|
|
|
case GX_LOAD_XF_REG:
|
|
{
|
|
u32 Cmd2 = DataReadU32();
|
|
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
|
u32 xf_address = Cmd2 & 0xFFFF;
|
|
GC_ALIGNED128(u32 data_buffer[16]);
|
|
DataReadU32xFuncs[transfer_size-1](data_buffer);
|
|
LoadXFReg(transfer_size, xf_address, data_buffer);
|
|
INCSTAT(stats.thisFrame.numXFLoads);
|
|
num_xf_reg++;
|
|
}
|
|
break;
|
|
|
|
case GX_LOAD_INDX_A: //used for position matrices
|
|
{
|
|
LoadIndexedXF(DataReadU32(), 0xC);
|
|
num_index_xf++;
|
|
}
|
|
break;
|
|
case GX_LOAD_INDX_B: //used for normal matrices
|
|
{
|
|
LoadIndexedXF(DataReadU32(), 0xD);
|
|
num_index_xf++;
|
|
}
|
|
break;
|
|
case GX_LOAD_INDX_C: //used for postmatrices
|
|
{
|
|
LoadIndexedXF(DataReadU32(), 0xE);
|
|
num_index_xf++;
|
|
}
|
|
break;
|
|
case GX_LOAD_INDX_D: //used for lights
|
|
{
|
|
LoadIndexedXF(DataReadU32(), 0xF);
|
|
num_index_xf++;
|
|
}
|
|
break;
|
|
case GX_CMD_CALL_DL:
|
|
{
|
|
u32 addr = DataReadU32();
|
|
u32 count = DataReadU32();
|
|
ExecuteDisplayList(addr, count);
|
|
}
|
|
break;
|
|
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
|
|
DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
|
|
break;
|
|
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
|
|
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
|
|
break;
|
|
case GX_LOAD_BP_REG: //0x61
|
|
{
|
|
u32 bp_cmd = DataReadU32();
|
|
LoadBPReg(bp_cmd);
|
|
INCSTAT(stats.thisFrame.numBPLoads);
|
|
num_bp_reg++;
|
|
}
|
|
break;
|
|
|
|
// draw primitives
|
|
default:
|
|
if (cmd_byte & 0x80)
|
|
{
|
|
// load vertices (use computed vertex size from FifoCommandRunnable above)
|
|
u16 numVertices = DataReadU16();
|
|
result |= 1 << (cmd_byte & GX_VAT_MASK);
|
|
VertexLoaderManager::RunVertices(
|
|
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
|
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
|
numVertices);
|
|
num_draw_call++;
|
|
const int tc[12] = {
|
|
g_VtxDesc.Position, g_VtxDesc.Normal, g_VtxDesc.Color0, g_VtxDesc.Color1, g_VtxDesc.Tex0Coord, g_VtxDesc.Tex1Coord,
|
|
g_VtxDesc.Tex2Coord, g_VtxDesc.Tex3Coord, g_VtxDesc.Tex4Coord, g_VtxDesc.Tex5Coord, g_VtxDesc.Tex6Coord, (g_VtxDesc.Hex >> 31) & 3
|
|
};
|
|
for(int i = 0; i < 12; i++)
|
|
{
|
|
if(tc[i] > 1)
|
|
{
|
|
result |= 1 << (i + 16);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
INCSTAT(stats.numDListsCalled);
|
|
INCSTAT(stats.thisFrame.numDListsCalled);
|
|
// un-swap
|
|
Statistics::SwapDL();
|
|
}
|
|
dl->num_bp_reg = num_bp_reg;
|
|
dl->num_cp_reg = num_cp_reg;
|
|
dl->num_draw_call = num_draw_call;
|
|
dl->num_index_xf = num_index_xf;
|
|
dl->num_xf_reg = num_xf_reg;
|
|
// reset to the old pointer
|
|
g_pVideoData = old_pVideoData;
|
|
return result;
|
|
}
|
|
|
|
// The only sensible way to detect changes to vertex data is to convert several times
|
|
// and hash the output.
|
|
|
|
// Second pass - compile
|
|
// Since some commands can affect the size of other commands, we really have no choice
|
|
// but to compile as we go, interpreting the list. We can't compile and then execute, we must
|
|
// compile AND execute at the same time. The second time the display list gets called, we already
|
|
// have the compiled code so we don't have to interpret anymore, we just run it.
|
|
void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)
|
|
{
|
|
u8* old_pVideoData = g_pVideoData;
|
|
u8* startAddress = Memory::GetPointer(address);
|
|
|
|
// Avoid the crash if Memory::GetPointer failed ..
|
|
if (startAddress != 0)
|
|
{
|
|
g_pVideoData = startAddress;
|
|
|
|
// temporarily swap dl and non-dl (small "hack" for the stats)
|
|
Statistics::SwapDL();
|
|
|
|
u8 *end = g_pVideoData + size;
|
|
|
|
emitter.AlignCode4();
|
|
dl->compiled_code = emitter.GetCodePtr();
|
|
emitter.ABI_EmitPrologue(4);
|
|
|
|
while (g_pVideoData < end)
|
|
{
|
|
// Yet another reimplementation of the DL reading...
|
|
int cmd_byte = DataReadU8();
|
|
switch (cmd_byte)
|
|
{
|
|
case GX_NOP:
|
|
// Execute
|
|
// Compile
|
|
break;
|
|
|
|
case GX_LOAD_CP_REG: //0x08
|
|
{
|
|
// Execute
|
|
u8 sub_cmd = DataReadU8();
|
|
u32 value = DataReadU32();
|
|
LoadCPReg(sub_cmd, value);
|
|
INCSTAT(stats.thisFrame.numCPLoads);
|
|
|
|
// Compile
|
|
emitter.ABI_CallFunctionCC((void *)&LoadCPReg, sub_cmd, value);
|
|
}
|
|
break;
|
|
|
|
case GX_LOAD_XF_REG:
|
|
{
|
|
// Execute
|
|
u32 Cmd2 = DataReadU32();
|
|
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
|
u32 xf_address = Cmd2 & 0xFFFF;
|
|
ReferencedDataRegion* NewRegion = new ReferencedDataRegion;
|
|
NewRegion->MustClean = true;
|
|
NewRegion->size = transfer_size * 4;
|
|
NewRegion->start_address = (u8*) new u8[NewRegion->size+15+12]; // alignment and guaranteed space
|
|
NewRegion->hash = 0;
|
|
dl->InsertRegion(NewRegion);
|
|
u32 *data_buffer = (u32*)(u8*)(((size_t)NewRegion->start_address+0xf)&~0xf);
|
|
DataReadU32xFuncs[transfer_size-1](data_buffer);
|
|
LoadXFReg(transfer_size, xf_address, data_buffer);
|
|
INCSTAT(stats.thisFrame.numXFLoads);
|
|
// Compile
|
|
emitter.ABI_CallFunctionCCP((void *)&LoadXFReg, transfer_size, xf_address, data_buffer);
|
|
}
|
|
break;
|
|
|
|
case GX_LOAD_INDX_A: //used for position matrices
|
|
{
|
|
u32 value = DataReadU32();
|
|
// Execute
|
|
LoadIndexedXF(value, 0xC);
|
|
// Compile
|
|
emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xC);
|
|
}
|
|
break;
|
|
case GX_LOAD_INDX_B: //used for normal matrices
|
|
{
|
|
u32 value = DataReadU32();
|
|
// Execute
|
|
LoadIndexedXF(value, 0xD);
|
|
// Compile
|
|
emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xD);
|
|
}
|
|
break;
|
|
case GX_LOAD_INDX_C: //used for postmatrices
|
|
{
|
|
u32 value = DataReadU32();
|
|
// Execute
|
|
LoadIndexedXF(value, 0xE);
|
|
// Compile
|
|
emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xE);
|
|
}
|
|
break;
|
|
case GX_LOAD_INDX_D: //used for lights
|
|
{
|
|
u32 value = DataReadU32();
|
|
// Execute
|
|
LoadIndexedXF(value, 0xF);
|
|
// Compile
|
|
emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xF);
|
|
}
|
|
break;
|
|
|
|
case GX_CMD_CALL_DL:
|
|
{
|
|
u32 addr= DataReadU32();
|
|
u32 count = DataReadU32();
|
|
ExecuteDisplayList(addr, count);
|
|
emitter.ABI_CallFunctionCC((void *)&ExecuteDisplayList, addr, count);
|
|
}
|
|
break;
|
|
|
|
case GX_CMD_UNKNOWN_METRICS:
|
|
// zelda 4 swords calls it and checks the metrics registers after that
|
|
break;
|
|
|
|
case GX_CMD_INVL_VC:// Invalidate (vertex cache?)
|
|
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
|
|
break;
|
|
|
|
case GX_LOAD_BP_REG: //0x61
|
|
{
|
|
u32 bp_cmd = DataReadU32();
|
|
// Execute
|
|
LoadBPReg(bp_cmd);
|
|
INCSTAT(stats.thisFrame.numBPLoads);
|
|
// Compile
|
|
emitter.ABI_CallFunctionC((void *)&LoadBPReg, bp_cmd);
|
|
}
|
|
break;
|
|
|
|
// draw primitives
|
|
default:
|
|
if (cmd_byte & 0x80)
|
|
{
|
|
// load vertices (use computed vertex size from FifoCommandRunnable above)
|
|
|
|
// Execute
|
|
u16 numVertices = DataReadU16();
|
|
u8* StartAddress = VertexManager::s_pBaseBufferPointer;
|
|
VertexManager::Flush();
|
|
VertexLoaderManager::RunVertices(
|
|
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
|
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
|
numVertices);
|
|
u8* EndAddress = VertexManager::s_pCurBufferPointer;
|
|
u32 Vdatasize = (u32)(EndAddress - StartAddress);
|
|
if (size > 0)
|
|
{
|
|
// Compile
|
|
ReferencedDataRegion* NewRegion = new ReferencedDataRegion;
|
|
NewRegion->MustClean = true;
|
|
NewRegion->size = Vdatasize;
|
|
NewRegion->start_address = (u8*)new u8[Vdatasize];
|
|
NewRegion->hash = 0;
|
|
dl->InsertRegion(NewRegion);
|
|
memcpy(NewRegion->start_address, StartAddress, Vdatasize);
|
|
emitter.ABI_CallFunctionCCCP((void *)&VertexLoaderManager::RunCompiledVertices, cmd_byte & GX_VAT_MASK, (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, numVertices, NewRegion->start_address);
|
|
|
|
}
|
|
const int tc[12] = {
|
|
g_VtxDesc.Position, g_VtxDesc.Normal, g_VtxDesc.Color0, g_VtxDesc.Color1, g_VtxDesc.Tex0Coord, g_VtxDesc.Tex1Coord,
|
|
g_VtxDesc.Tex2Coord, g_VtxDesc.Tex3Coord, g_VtxDesc.Tex4Coord, g_VtxDesc.Tex5Coord, g_VtxDesc.Tex6Coord, (g_VtxDesc.Hex >> 31) & 3
|
|
};
|
|
for(int i = 0; i < 12; i++)
|
|
{
|
|
if(tc[i] > 1)
|
|
{
|
|
u8* saddr = cached_arraybases[i];
|
|
int arraySize = arraystrides[i] * ((tc[i] == 2)? numVertices : ((numVertices < 1024)? 2 * numVertices : numVertices));
|
|
dl->InsertOverlapingRegion(saddr, arraySize,i,arraystrides[i]);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ERROR_LOG(VIDEO, "DLCache::CompileAndRun: Illegal command %02x", cmd_byte);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
emitter.ABI_EmitEpilogue(4);
|
|
INCSTAT(stats.numDListsCalled);
|
|
INCSTAT(stats.thisFrame.numDListsCalled);
|
|
Statistics::SwapDL();
|
|
}
|
|
g_pVideoData = old_pVideoData;
|
|
}
|
|
|
|
|
|
void Init()
|
|
{
|
|
CheckContextId = 0;
|
|
dlcode_cache = (u8*)AllocateExecutableMemory(DL_CODE_CACHE_SIZE, false); // Don't need low memory.
|
|
emitter.SetCodePtr(dlcode_cache);
|
|
}
|
|
|
|
void Shutdown()
|
|
{
|
|
Clear();
|
|
FreeMemoryPages(dlcode_cache, DL_CODE_CACHE_SIZE);
|
|
dlcode_cache = NULL;
|
|
}
|
|
|
|
void Clear()
|
|
{
|
|
VDLMap::iterator iter = dl_map.begin();
|
|
while (iter != dl_map.end()) {
|
|
VDlist &ParentEntry = iter->second;
|
|
DLMap::iterator childiter = ParentEntry.dl_map.begin();
|
|
while (childiter != ParentEntry.dl_map.end()) {
|
|
CachedDisplayList &entry = childiter->second;
|
|
entry.ClearRegions();
|
|
childiter++;
|
|
}
|
|
ParentEntry.dl_map.clear();
|
|
iter++;
|
|
}
|
|
dl_map.clear();
|
|
// Reset the cache pointers.
|
|
emitter.SetCodePtr(dlcode_cache);
|
|
}
|
|
|
|
void ProgressiveCleanup()
|
|
{
|
|
VDLMap::iterator iter = dl_map.begin();
|
|
while (iter != dl_map.end()) {
|
|
VDlist &ParentEntry = iter->second;
|
|
DLMap::iterator childiter = ParentEntry.dl_map.begin();
|
|
while (childiter != ParentEntry.dl_map.end())
|
|
{
|
|
CachedDisplayList &entry = childiter->second;
|
|
int limit = 3600;
|
|
if (entry.frame_count < frameCount - limit) {
|
|
// entry.Destroy();
|
|
entry.ClearRegions();
|
|
ParentEntry.dl_map.erase(childiter++); // (this is gcc standard!)
|
|
}
|
|
else
|
|
++childiter;
|
|
}
|
|
if(ParentEntry.dl_map.empty())
|
|
{
|
|
dl_map.erase(iter++);
|
|
}
|
|
else
|
|
iter++;
|
|
}
|
|
}
|
|
|
|
static size_t GetSpaceLeft()
|
|
{
|
|
return DL_CODE_CACHE_SIZE - (emitter.GetCodePtr() - dlcode_cache);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
// NOTE - outside the namespace on purpose.
|
|
bool HandleDisplayList(u32 address, u32 size)
|
|
{
|
|
//Fixed DlistCaching now is fully functional still some things to workout
|
|
if(!g_ActiveConfig.bDlistCachingEnable)
|
|
return false;
|
|
if(size == 0) return false;
|
|
|
|
// Is this thread safe?
|
|
if (DLCache::GetSpaceLeft() < DL_CODE_CLEAR_THRESHOLD) {
|
|
DLCache::Clear();
|
|
}
|
|
|
|
u64 dl_id = DLCache::CreateMapId(address, size);
|
|
u64 vhash = 0;
|
|
DLCache::VDLMap::iterator Parentiter = DLCache::dl_map.find(dl_id);
|
|
DLCache::DLMap::iterator iter;
|
|
bool childexist = false;
|
|
if (Parentiter != DLCache::dl_map.end())
|
|
{
|
|
vhash = DLCache::CreateVMapId(Parentiter->second.VATUsed);
|
|
iter = Parentiter->second.dl_map.find(vhash);
|
|
childexist = iter != Parentiter->second.dl_map.end();
|
|
}
|
|
if (Parentiter != DLCache::dl_map.end() && childexist)
|
|
{
|
|
DLCache::CachedDisplayList &dl = iter->second;
|
|
if (dl.uncachable)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
switch (dl.pass)
|
|
{
|
|
case DLCache::DLPASS_COMPILE:
|
|
// First, check that the hash is the same as the last time.
|
|
if (dl.dl_hash != GetHash64(Memory::GetPointer(address), size, 0))
|
|
{
|
|
dl.uncachable = true;
|
|
return false;
|
|
}
|
|
DLCache::CompileAndRunDisplayList(address, size, &dl);
|
|
dl.pass = DLCache::DLPASS_RUN;
|
|
break;
|
|
case DLCache::DLPASS_RUN:
|
|
{
|
|
// Every N draws, check hash
|
|
if (dl.check != CheckContextId)
|
|
{
|
|
dl.check = CheckContextId;
|
|
if (dl.dl_hash != GetHash64(Memory::GetPointer(address), size, 0) || !dl.CheckRegions())
|
|
{
|
|
dl.uncachable = true;
|
|
dl.check = 60;
|
|
dl.ClearRegions();
|
|
return false;
|
|
}
|
|
}
|
|
dl.frame_count= frameCount;
|
|
u8 *old_datareader = g_pVideoData;
|
|
((void (*)())(void*)(dl.compiled_code))();
|
|
Statistics::SwapDL();
|
|
ADDSTAT(stats.thisFrame.numCPLoadsInDL, dl.num_cp_reg);
|
|
ADDSTAT(stats.thisFrame.numXFLoadsInDL, dl.num_xf_reg);
|
|
ADDSTAT(stats.thisFrame.numBPLoadsInDL, dl.num_bp_reg);
|
|
|
|
ADDSTAT(stats.thisFrame.numCPLoads, dl.num_cp_reg);
|
|
ADDSTAT(stats.thisFrame.numXFLoads, dl.num_xf_reg);
|
|
ADDSTAT(stats.thisFrame.numBPLoads, dl.num_bp_reg);
|
|
|
|
INCSTAT(stats.numDListsCalled);
|
|
INCSTAT(stats.thisFrame.numDListsCalled);
|
|
|
|
Statistics::SwapDL();
|
|
g_pVideoData = old_datareader;
|
|
break;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
DLCache::CachedDisplayList dl;
|
|
|
|
u32 dlvatused = DLCache::AnalyzeAndRunDisplayList(address, size, &dl);
|
|
dl.dl_hash = GetHash64(Memory::GetPointer(address), size,0);
|
|
dl.pass = DLCache::DLPASS_COMPILE;
|
|
dl.check = CheckContextId;
|
|
vhash = DLCache::CreateVMapId(dlvatused);
|
|
if(Parentiter != DLCache::dl_map.end())
|
|
{
|
|
DLCache::VDlist &vdl = Parentiter->second;
|
|
vdl.dl_map[vhash] = dl;
|
|
vdl.VATUsed = dlvatused;
|
|
vdl.count++;
|
|
}
|
|
else
|
|
{
|
|
DLCache::VDlist vdl;
|
|
vdl.dl_map[vhash] = dl;
|
|
vdl.VATUsed = dlvatused;
|
|
vdl.count = 1;
|
|
DLCache::dl_map[dl_id] = vdl;
|
|
|
|
}
|
|
return true;
|
|
|
|
}
|
|
|
|
void IncrementCheckContextId()
|
|
{
|
|
CheckContextId++;
|
|
}
|