dolphin/Source/Core/VideoCommon/OpcodeDecoding.cpp
JosJuice 54773bc5d2 VideoCommon: Remove calls to GetPointer
This fourth part of my series of patches to get rid of unsafe uses of
GetPointer takes care of the "easy" cases in VideoCommon. Three uses of
GetPointer now remain in Dolphin: VertexLoaderManager, TextureInfo, and
the software renderer's TextureSampler.
2024-04-09 21:08:57 +02:00

278 lines
8.2 KiB
C++

// Copyright 2008 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// DL facts:
// Ikaruga uses (nearly) NO display lists!
// Zelda WW uses TONS of display lists
// Zelda TP uses almost 100% display lists except menus (we like this!)
// Super Mario Galaxy has nearly all geometry and more than half of the state in DLs (great!)
// Note that it IS NOT GENERALLY POSSIBLE to precompile display lists! You can compile them as they
// are while interpreting them, and hope that the vertex format doesn't change, though, if you do
// it right when they are called. The reason is that the vertex format affects the sizes of the
// vertices.
#include "VideoCommon/OpcodeDecoding.h"
#include "Common/Assert.h"
#include "Common/Logging/Log.h"
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/Memmap.h"
#include "Core/System.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderBase.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/XFMemory.h"
#include "VideoCommon/XFStateManager.h"
#include "VideoCommon/XFStructs.h"
namespace OpcodeDecoder
{
bool g_record_fifo_data = false;
template <bool is_preprocess>
class RunCallback final : public Callback
{
public:
OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data))
{
m_cycles += 18 + 6 * count;
if constexpr (!is_preprocess)
{
LoadXFReg(address, count, data);
INCSTAT(g_stats.this_frame.num_xf_loads);
}
}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value))
{
m_cycles += 12;
const u8 sub_command = command & CP_COMMAND_MASK;
if constexpr (!is_preprocess)
{
if (sub_command == MATINDEX_A)
{
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
auto& system = Core::System::GetInstance();
system.GetXFStateManager().SetTexMatrixChangedA(value);
}
else if (sub_command == MATINDEX_B)
{
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
auto& system = Core::System::GetInstance();
system.GetXFStateManager().SetTexMatrixChangedB(value);
}
else if (sub_command == VCD_LO || sub_command == VCD_HI)
{
VertexLoaderManager::g_main_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG);
VertexLoaderManager::g_bases_dirty = true;
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
}
else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B ||
sub_command == CP_VAT_REG_C)
{
VertexLoaderManager::g_main_vat_dirty[command & CP_VAT_MASK] = true;
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
}
else if (sub_command == ARRAY_BASE)
{
VertexLoaderManager::g_bases_dirty = true;
}
INCSTAT(g_stats.this_frame.num_cp_loads);
}
else if constexpr (is_preprocess)
{
if (sub_command == VCD_LO || sub_command == VCD_HI)
{
VertexLoaderManager::g_preprocess_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG);
}
else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B ||
sub_command == CP_VAT_REG_C)
{
VertexLoaderManager::g_preprocess_vat_dirty[command & CP_VAT_MASK] = true;
}
}
GetCPState().LoadCPReg(command, value);
}
OPCODE_CALLBACK(void OnBP(u8 command, u32 value))
{
m_cycles += 12;
if constexpr (is_preprocess)
{
LoadBPRegPreprocess(command, value, m_cycles);
}
else
{
LoadBPReg(command, value, m_cycles);
INCSTAT(g_stats.this_frame.num_bp_loads);
}
}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size))
{
m_cycles += 6;
if constexpr (is_preprocess)
PreprocessIndexedXF(array, index, address, size);
else
LoadIndexedXF(array, index, address, size);
}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices, const u8* vertex_data))
{
// load vertices
const u32 size = vertex_size * num_vertices;
const u32 bytes =
VertexLoaderManager::RunVertices<is_preprocess>(vat, primitive, num_vertices, vertex_data);
ASSERT(bytes == size);
// 4 GPU ticks per vertex, 3 CPU ticks per GPU tick
m_cycles += num_vertices * 4 * 3 + 6;
}
// This can't be inlined since it calls Run, which makes it recursive
// m_in_display_list prevents it from actually recursing infinitely, but there's no real benefit
// to inlining Run for the display list directly.
OPCODE_CALLBACK_NOINLINE(void OnDisplayList(u32 address, u32 size))
{
m_cycles += 6;
if (m_in_display_list)
{
WARN_LOG_FMT(VIDEO, "recursive display list detected");
}
else
{
m_in_display_list = true;
auto& system = Core::System::GetInstance();
if constexpr (is_preprocess)
{
auto& memory = system.GetMemory();
const u8* const start_address = memory.GetPointerForRange(address, size);
system.GetFifo().PushFifoAuxBuffer(start_address, size);
if (start_address != nullptr)
{
Run(start_address, size, *this);
}
}
else
{
const u8* start_address;
auto& fifo = system.GetFifo();
if (fifo.UseDeterministicGPUThread())
{
start_address = static_cast<u8*>(fifo.PopFifoAuxBuffer(size));
}
else
{
auto& memory = system.GetMemory();
start_address = memory.GetPointerForRange(address, size);
}
// Avoid the crash if memory.GetPointerForRange failed ..
if (start_address != nullptr)
{
// temporarily swap dl and non-dl (small "hack" for the stats)
g_stats.SwapDL();
Run(start_address, size, *this);
INCSTAT(g_stats.this_frame.num_dlists_called);
// un-swap
g_stats.SwapDL();
}
}
m_in_display_list = false;
}
}
OPCODE_CALLBACK(void OnNop(u32 count))
{
m_cycles += 6 * count; // Hm, this means that we scan over nop streams pretty slowly...
}
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data))
{
if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_UNKNOWN_METRICS)
{
// 'Zelda Four Swords' calls it and checks the metrics registers after that
m_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "GX 0x44");
}
else if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_INVL_VC)
{
// Invalidate Vertex Cache
m_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)");
}
else
{
auto& system = Core::System::GetInstance();
system.GetCommandProcessor().HandleUnknownOpcode(opcode, data, is_preprocess);
m_cycles += 1;
}
}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size))
{
ASSERT(size >= 1);
if constexpr (!is_preprocess)
{
// Display lists get added directly into the FIFO stream since this same callback is used to
// process them.
if (g_record_fifo_data && static_cast<Opcode>(data[0]) != Opcode::GX_CMD_CALL_DL)
{
Core::System::GetInstance().GetFifoRecorder().WriteGPCommand(data, size);
}
}
}
OPCODE_CALLBACK(CPState& GetCPState())
{
if constexpr (is_preprocess)
return g_preprocess_cp_state;
else
return g_main_cp_state;
}
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
VertexLoaderBase* loader = VertexLoaderManager::RefreshLoader<is_preprocess>(vat);
return loader->m_vertex_size;
}
u32 m_cycles = 0;
bool m_in_display_list = false;
};
template <bool is_preprocess>
u8* RunFifo(DataReader src, u32* cycles)
{
using CallbackT = RunCallback<is_preprocess>;
auto callback = CallbackT{};
u32 size = Run(src.GetPointer(), static_cast<u32>(src.size()), callback);
if (cycles != nullptr)
*cycles = callback.m_cycles;
src.Skip(size);
return src.GetPointer();
}
template u8* RunFifo<true>(DataReader src, u32* cycles);
template u8* RunFifo<false>(DataReader src, u32* cycles);
} // namespace OpcodeDecoder