diff --git a/Source/Core/VideoBackends/Software/OpcodeDecoder.cpp b/Source/Core/VideoBackends/Software/OpcodeDecoder.cpp index 66816e6626..6ae76c44b5 100644 --- a/Source/Core/VideoBackends/Software/OpcodeDecoder.cpp +++ b/Source/Core/VideoBackends/Software/OpcodeDecoder.cpp @@ -14,8 +14,8 @@ #include "VideoBackends/Software/SWVertexLoader.h" #include "VideoBackends/Software/SWVideoConfig.h" #include "VideoBackends/Software/XFMemLoader.h" -#include "VideoCommon/DataReader.h" #include "VideoCommon/Fifo.h" +#include "VideoCommon/VertexLoaderUtils.h" typedef void (*DecodingFunction)(u32); diff --git a/Source/Core/VideoBackends/Software/SWCommandProcessor.cpp b/Source/Core/VideoBackends/Software/SWCommandProcessor.cpp index 5b3af7f78a..9c7ff303b7 100644 --- a/Source/Core/VideoBackends/Software/SWCommandProcessor.cpp +++ b/Source/Core/VideoBackends/Software/SWCommandProcessor.cpp @@ -20,8 +20,8 @@ #include "VideoBackends/Software/SWCommandProcessor.h" #include "VideoBackends/Software/VideoBackend.h" -#include "VideoCommon/DataReader.h" #include "VideoCommon/Fifo.h" +#include "VideoCommon/VertexLoaderUtils.h" namespace SWCommandProcessor { diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index 0612a743da..d847f87cfe 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -13,7 +13,6 @@ #include "VideoBackends/Software/TransformUnit.h" #include "VideoBackends/Software/XFMemLoader.h" -#include "VideoCommon/DataReader.h" #include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoader_Color.h" #include "VideoCommon/VertexLoader_Normal.h" @@ -286,20 +285,20 @@ void SWVertexLoader::LoadTexMtx(SWVertexLoader *vertexLoader, InputVertexData *v void SWVertexLoader::LoadPosition(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 unused) { - VertexManager::s_pCurBufferPointer = (u8*)&vertex->position; + g_vertex_manager_write_ptr = (u8*)&vertex->position; vertexLoader->m_positionLoader(); } void SWVertexLoader::LoadNormal(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 unused) { - VertexManager::s_pCurBufferPointer = (u8*)&vertex->normal; + g_vertex_manager_write_ptr = (u8*)&vertex->normal; vertexLoader->m_normalLoader(); } void SWVertexLoader::LoadColor(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 index) { u32 color; - VertexManager::s_pCurBufferPointer = (u8*)&color; + g_vertex_manager_write_ptr = (u8*)&color; colIndex = index; vertexLoader->m_colorLoader[index](); @@ -309,7 +308,7 @@ void SWVertexLoader::LoadColor(SWVertexLoader *vertexLoader, InputVertexData *ve void SWVertexLoader::LoadTexCoord(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 index) { - VertexManager::s_pCurBufferPointer = (u8*)&vertex->texCoords[index]; + g_vertex_manager_write_ptr = (u8*)&vertex->texCoords[index]; tcIndex = index; vertexLoader->m_texCoordLoader[index](); } diff --git a/Source/Core/VideoCommon/BoundingBox.cpp b/Source/Core/VideoCommon/BoundingBox.cpp index 1c007677e0..4b4400f2e7 100644 --- a/Source/Core/VideoCommon/BoundingBox.cpp +++ b/Source/Core/VideoCommon/BoundingBox.cpp @@ -32,7 +32,7 @@ static PortableVertexDeclaration vertexDecl; // Gets the pointer to the current buffer position void LOADERDECL SetVertexBufferPosition() { - bufferPos = VertexManager::s_pCurBufferPointer; + bufferPos = g_vertex_manager_write_ptr; } // Prepares the bounding box for new primitive data diff --git a/Source/Core/VideoCommon/DataReader.h b/Source/Core/VideoCommon/DataReader.h index 5b6247c9b3..f7e030739f 100644 --- a/Source/Core/VideoCommon/DataReader.h +++ b/Source/Core/VideoCommon/DataReader.h @@ -5,124 +5,61 @@ #pragma once #include "Common/Common.h" -#include "VideoCommon/VertexManagerBase.h" - -extern u8* g_video_buffer_read_ptr; - -#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__) -#include -#endif - -__forceinline void DataSkip(u32 skip) -{ - g_video_buffer_read_ptr += skip; -} - -// probably unnecessary -template -__forceinline void DataSkip() -{ - g_video_buffer_read_ptr += count; -} - -template -__forceinline T DataPeek(int _uOffset, u8** bufp = &g_video_buffer_read_ptr) -{ - auto const result = Common::FromBigEndian(*reinterpret_cast(*bufp + _uOffset)); - return result; -} - -// TODO: kill these -__forceinline u8 DataPeek8(int _uOffset) -{ - return DataPeek(_uOffset); -} - -__forceinline u16 DataPeek16(int _uOffset) -{ - return DataPeek(_uOffset); -} - -__forceinline u32 DataPeek32(int _uOffset) -{ - return DataPeek(_uOffset); -} - -template -__forceinline T DataRead(u8** bufp = &g_video_buffer_read_ptr) -{ - auto const result = DataPeek(0, bufp); - *bufp += sizeof(T); - return result; -} class DataReader { public: - inline DataReader() : buffer(g_video_buffer_read_ptr), offset(0) {} - inline ~DataReader() { g_video_buffer_read_ptr += offset; } - template inline T Read() + __forceinline DataReader() + : buffer(nullptr), end(nullptr) {} + + __forceinline DataReader(u8* src, u8* _end) + : buffer(src), end(_end) {} + + __forceinline void WritePointer(u8** src) { - const T result = Common::FromBigEndian(*(T*)(buffer + offset)); - offset += sizeof(T); + *src = buffer; + } + + __forceinline u8* operator=(u8* src) + { + buffer = src; + return src; + } + + __forceinline size_t size() + { + return end - buffer; + } + + template __forceinline T Peek(int offset = 0) + { + T data = *(T*)(buffer + offset); + if (swapped) + data = Common::FromBigEndian(data); + return data; + } + + template __forceinline T Read() + { + const T result = Peek(); + buffer += sizeof(T); return result; } -private: - u8 *buffer; - int offset; -}; -// TODO: kill these -__forceinline u8 DataReadU8() -{ - return DataRead(); -} - -__forceinline s8 DataReadS8() -{ - return DataRead(); -} - -__forceinline u16 DataReadU16() -{ - return DataRead(); -} - -__forceinline u32 DataReadU32() -{ - return DataRead(); -} - -__forceinline u32 DataReadU32Unswapped() -{ - u32 tmp = *(u32*)g_video_buffer_read_ptr; - g_video_buffer_read_ptr += 4; - return tmp; -} - -__forceinline u8* DataGetPosition() -{ - return g_video_buffer_read_ptr; -} - -template -__forceinline void DataWrite(T data) -{ - *(T*)VertexManager::s_pCurBufferPointer = data; - VertexManager::s_pCurBufferPointer += sizeof(T); -} - -class DataWriter -{ -public: - inline DataWriter() : buffer(VertexManager::s_pCurBufferPointer), offset(0) {} - inline ~DataWriter() { VertexManager::s_pCurBufferPointer += offset; } - template inline void Write(T data) + template __forceinline void Write(T data) { - *(T*)(buffer+offset) = data; - offset += sizeof(T); + if (swapped) + data = Common::FromBigEndian(data); + *(T*)(buffer) = data; + buffer += sizeof(T); } + + template __forceinline void Skip(size_t data = 1) + { + buffer += sizeof(T) * data; + } + private: - u8 *buffer; - int offset; + u8* __restrict buffer; + u8* end; }; diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index ead6241fde..496ac0a851 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -41,10 +41,10 @@ bool g_use_deterministic_gpu_thread; static std::mutex s_video_buffer_lock; static std::condition_variable s_video_buffer_cond; static u8* s_video_buffer; -u8* g_video_buffer_read_ptr; +static u8* s_video_buffer_read_ptr; static std::atomic s_video_buffer_write_ptr; static std::atomic s_video_buffer_seen_ptr; -u8* g_video_buffer_pp_read_ptr; +static u8* s_video_buffer_pp_read_ptr; // The read_ptr is always owned by the GPU thread. In normal mode, so is the // write_ptr, despite it being atomic. In g_use_deterministic_gpu_thread mode, // things get a bit more complicated: @@ -63,11 +63,11 @@ void Fifo_DoState(PointerWrap &p) u8* write_ptr = s_video_buffer_write_ptr; p.DoPointer(write_ptr, s_video_buffer); s_video_buffer_write_ptr = write_ptr; - p.DoPointer(g_video_buffer_read_ptr, s_video_buffer); + p.DoPointer(s_video_buffer_read_ptr, s_video_buffer); if (p.mode == PointerWrap::MODE_READ && g_use_deterministic_gpu_thread) { // We're good and paused, right? - s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr; + s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr; } p.Do(g_bSkipCurrentFrame); } @@ -106,8 +106,8 @@ void Fifo_Shutdown() FreeMemoryPages(s_video_buffer, FIFO_SIZE); s_video_buffer = nullptr; s_video_buffer_write_ptr = nullptr; - g_video_buffer_pp_read_ptr = nullptr; - g_video_buffer_read_ptr = nullptr; + s_video_buffer_pp_read_ptr = nullptr; + s_video_buffer_read_ptr = nullptr; s_video_buffer_seen_ptr = nullptr; s_fifo_aux_write_ptr = nullptr; s_fifo_aux_read_ptr = nullptr; @@ -169,15 +169,15 @@ void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) if (may_move_read_ptr) { // what's left over in the buffer - size_t size = write_ptr - g_video_buffer_pp_read_ptr; + size_t size = write_ptr - s_video_buffer_pp_read_ptr; - memmove(s_video_buffer, g_video_buffer_pp_read_ptr, size); + memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size); // This change always decreases the pointers. We write seen_ptr // after write_ptr here, and read it before in RunGpuLoop, so // 'write_ptr > seen_ptr' there cannot become spuriously true. s_video_buffer_write_ptr = write_ptr = s_video_buffer + size; - g_video_buffer_pp_read_ptr = s_video_buffer; - g_video_buffer_read_ptr = s_video_buffer; + s_video_buffer_pp_read_ptr = s_video_buffer; + s_video_buffer_read_ptr = s_video_buffer; s_video_buffer_seen_ptr = write_ptr; } } @@ -213,15 +213,15 @@ static void ReadDataFromFifo(u32 readPtr) size_t len = 32; if (len > (size_t)(s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr)) { - size_t existing_len = s_video_buffer_write_ptr - g_video_buffer_read_ptr; + size_t existing_len = s_video_buffer_write_ptr - s_video_buffer_read_ptr; if (len > (size_t)(FIFO_SIZE - existing_len)) { PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) existing_len, (unsigned long) len, (unsigned long) FIFO_SIZE); return; } - memmove(s_video_buffer, g_video_buffer_read_ptr, existing_len); + memmove(s_video_buffer, s_video_buffer_read_ptr, existing_len); s_video_buffer_write_ptr = s_video_buffer + existing_len; - g_video_buffer_read_ptr = s_video_buffer; + s_video_buffer_read_ptr = s_video_buffer; } // Copy new video instructions to s_video_buffer for future use in rendering the new picture Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len); @@ -238,13 +238,13 @@ static void ReadDataFromFifoOnCPU(u32 readPtr) // We can't wrap around while the GPU is working on the data. // This should be very rare due to the reset in SyncGPU. SyncGPU(SYNC_GPU_WRAPAROUND); - if (g_video_buffer_pp_read_ptr != g_video_buffer_read_ptr) + if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr) { PanicAlert("desynced read pointers"); return; } write_ptr = s_video_buffer_write_ptr; - size_t existing_len = write_ptr - g_video_buffer_pp_read_ptr; + size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr; if (len > (size_t)(FIFO_SIZE - existing_len)) { PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) existing_len, (unsigned long) len, (unsigned long) FIFO_SIZE); @@ -252,17 +252,17 @@ static void ReadDataFromFifoOnCPU(u32 readPtr) } } Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len); - OpcodeDecoder_Preprocess(write_ptr + len, false); + s_video_buffer_pp_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false); // This would have to be locked if the GPU thread didn't spin. s_video_buffer_write_ptr = write_ptr + len; } void ResetVideoBuffer() { - g_video_buffer_read_ptr = s_video_buffer; + s_video_buffer_read_ptr = s_video_buffer; s_video_buffer_write_ptr = s_video_buffer; s_video_buffer_seen_ptr = s_video_buffer; - g_video_buffer_pp_read_ptr = s_video_buffer; + s_video_buffer_pp_read_ptr = s_video_buffer; s_fifo_aux_write_ptr = s_fifo_aux_data; s_fifo_aux_read_ptr = s_fifo_aux_data; } @@ -294,7 +294,7 @@ void RunGpuLoop() // See comment in SyncGPU if (write_ptr > seen_ptr) { - OpcodeDecoder_Run(write_ptr, false); + s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false); { std::lock_guard vblk(s_video_buffer_lock); @@ -330,7 +330,7 @@ void RunGpuLoop() u8* write_ptr = s_video_buffer_write_ptr; - cyclesExecuted = OpcodeDecoder_Run(write_ptr, false); + s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted) @@ -338,7 +338,7 @@ void RunGpuLoop() Common::AtomicStore(fifo.CPReadPointer, readPtr); Common::AtomicAdd(fifo.CPReadWriteDistance, -32); - if ((write_ptr - g_video_buffer_read_ptr) == 0) + if ((write_ptr - s_video_buffer_read_ptr) == 0) Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer); } @@ -403,7 +403,7 @@ void RunGpu() FPURoundMode::SaveSIMDState(); FPURoundMode::LoadDefaultSIMDState(); ReadDataFromFifo(fifo.CPReadPointer); - OpcodeDecoder_Run(s_video_buffer_write_ptr, false); + s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false); FPURoundMode::LoadSIMDState(); } @@ -454,7 +454,7 @@ void Fifo_UpdateWantDeterminism(bool want) if (gpu_thread) { // These haven't been updated in non-deterministic mode. - s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr; + s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr; CopyPreprocessCPStateFromMain(); VertexLoaderManager::MarkAllDirty(); } diff --git a/Source/Core/VideoCommon/Fifo.h b/Source/Core/VideoCommon/Fifo.h index 3a441c187c..d0b3c07015 100644 --- a/Source/Core/VideoCommon/Fifo.h +++ b/Source/Core/VideoCommon/Fifo.h @@ -17,7 +17,6 @@ extern bool g_bSkipCurrentFrame; // and can change at runtime. extern bool g_use_deterministic_gpu_thread; extern std::atomic g_video_buffer_write_ptr_xthread; -extern u8* g_video_buffer_pp_read_ptr; void Fifo_Init(); void Fifo_Shutdown(); diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index a7dd5b6a85..34b821605f 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -36,7 +36,6 @@ bool g_bRecordFifoData = false; static u32 InterpretDisplayList(u32 address, u32 size) { - u8* old_pVideoData = g_video_buffer_read_ptr; u8* startAddress; if (g_use_deterministic_gpu_thread) @@ -49,41 +48,29 @@ static u32 InterpretDisplayList(u32 address, u32 size) // Avoid the crash if Memory::GetPointer failed .. if (startAddress != nullptr) { - g_video_buffer_read_ptr = startAddress; - // temporarily swap dl and non-dl (small "hack" for the stats) Statistics::SwapDL(); - u8 *end = g_video_buffer_read_ptr + size; - cycles = OpcodeDecoder_Run(end, true); + OpcodeDecoder_Run(DataReader(startAddress, startAddress + size), &cycles, true); INCSTAT(stats.thisFrame.numDListsCalled); // un-swap Statistics::SwapDL(); } - // reset to the old pointer - g_video_buffer_read_ptr = old_pVideoData; - return cycles; } static void InterpretDisplayListPreprocess(u32 address, u32 size) { - u8* old_read_ptr = g_video_buffer_pp_read_ptr; u8* startAddress = Memory::GetPointer(address); PushFifoAuxBuffer(startAddress, size); if (startAddress != nullptr) { - g_video_buffer_pp_read_ptr = startAddress; - - u8 *end = startAddress + size; - OpcodeDecoder_Preprocess(end, true); + OpcodeDecoder_Run(DataReader(startAddress, startAddress + size), nullptr, true); } - - g_video_buffer_pp_read_ptr = old_read_ptr; } static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess) @@ -134,180 +121,8 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess) } } -template -static u32 Decode(u8* end, bool in_display_list) -{ - u8 *opcodeStart = *bufp; - if (*bufp == end) - return 0; - - u8 cmd_byte = DataRead(bufp); - u32 cycles; - int refarray; - switch (cmd_byte) - { - case GX_NOP: - cycles = 6; // Hm, this means that we scan over nop streams pretty slowly... - break; - - case GX_LOAD_CP_REG: //0x08 - { - if (end - *bufp < 1 + 4) - return 0; - cycles = 12; - u8 sub_cmd = DataRead(bufp); - u32 value = DataRead(bufp); - LoadCPReg(sub_cmd, value, is_preprocess); - if (!is_preprocess) - INCSTAT(stats.thisFrame.numCPLoads); - } - break; - - case GX_LOAD_XF_REG: - { - if (end - *bufp < 4) - return 0; - u32 Cmd2 = DataRead(bufp); - int transfer_size = ((Cmd2 >> 16) & 15) + 1; - if ((size_t) (end - *bufp) < transfer_size * sizeof(u32)) - return 0; - cycles = 18 + 6 * transfer_size; - if (!is_preprocess) - { - u32 xf_address = Cmd2 & 0xFFFF; - LoadXFReg(transfer_size, xf_address); - - INCSTAT(stats.thisFrame.numXFLoads); - } - else - { - *bufp += transfer_size * sizeof(u32); - } - } - break; - - case GX_LOAD_INDX_A: //used for position matrices - refarray = 0xC; - goto load_indx; - case GX_LOAD_INDX_B: //used for normal matrices - refarray = 0xD; - goto load_indx; - case GX_LOAD_INDX_C: //used for postmatrices - refarray = 0xE; - goto load_indx; - case GX_LOAD_INDX_D: //used for lights - refarray = 0xF; - goto load_indx; - load_indx: - if (end - *bufp < 4) - return 0; - cycles = 6; - if (is_preprocess) - PreprocessIndexedXF(DataRead(bufp), refarray); - else - LoadIndexedXF(DataRead(bufp), refarray); - break; - - case GX_CMD_CALL_DL: - { - if (end - *bufp < 8) - return 0; - u32 address = DataRead(bufp); - u32 count = DataRead(bufp); - - if (in_display_list) - { - cycles = 6; - WARN_LOG(VIDEO,"recursive display list detected"); - } - else - { - if (is_preprocess) - InterpretDisplayListPreprocess(address, count); - else - cycles = 6 + InterpretDisplayList(address, count); - } - } - break; - - case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that - cycles = 6; - DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte); - break; - - case GX_CMD_INVL_VC: // Invalidate Vertex Cache - cycles = 6; - DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); - break; - - case GX_LOAD_BP_REG: //0x61 - // In skipped_frame case: We have to let BP writes through because they set - // tokens and stuff. TODO: Call a much simplified LoadBPReg instead. - { - if (end - *bufp < 4) - return 0; - cycles = 12; - u32 bp_cmd = DataRead(bufp); - if (is_preprocess) - { - LoadBPRegPreprocess(bp_cmd); - } - else - { - LoadBPReg(bp_cmd); - INCSTAT(stats.thisFrame.numBPLoads); - } - } - break; - - // draw primitives - default: - if ((cmd_byte & 0xC0) == 0x80) - { - cycles = 1600; - // load vertices - if (end - *bufp < 2) - return 0; - u16 num_vertices = DataRead(bufp); - - if (is_preprocess) - { - size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess); - if ((size_t) (end - *bufp) < size) - return 0; - *bufp += size; - } - else - { - if (!VertexLoaderManager::RunVertices( - cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) - (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, - num_vertices, - end - *bufp, - g_bSkipCurrentFrame)) - return 0; - } - } - else - { - UnknownOpcode(cmd_byte, opcodeStart, is_preprocess); - cycles = 1; - } - break; - } - - // Display lists get added directly into the FIFO stream - if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL) - FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(*bufp - opcodeStart)); - - // In is_preprocess mode, we don't actually care about cycles, at least for - // now... make sure the compiler realizes that. - return is_preprocess ? 1 : cycles; -} - void OpcodeDecoder_Init() { - g_video_buffer_read_ptr = GetVideoBufferStartPtr(); } @@ -315,33 +130,189 @@ void OpcodeDecoder_Shutdown() { } -u32 OpcodeDecoder_Run(u8* end, bool in_display_list) +template +u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list) { u32 totalCycles = 0; + u8* opcodeStart; while (true) { - u8* old = g_video_buffer_read_ptr; - u32 cycles = Decode(end, in_display_list); - if (cycles == 0) + src.WritePointer(&opcodeStart); + + if (!src.size()) + goto end; + + u8 cmd_byte = src.Read(); + int refarray; + switch (cmd_byte) { - g_video_buffer_read_ptr = old; + case GX_NOP: + totalCycles += 6; // Hm, this means that we scan over nop streams pretty slowly... + break; + + case GX_LOAD_CP_REG: //0x08 + { + if (src.size() < 1 + 4) + goto end; + totalCycles += 12; + u8 sub_cmd = src.Read(); + u32 value = src.Read(); + LoadCPReg(sub_cmd, value, is_preprocess); + if (!is_preprocess) + INCSTAT(stats.thisFrame.numCPLoads); + } + break; + + case GX_LOAD_XF_REG: + { + if (src.size() < 4) + goto end; + u32 Cmd2 = src.Read(); + int transfer_size = ((Cmd2 >> 16) & 15) + 1; + if (src.size() < transfer_size * sizeof(u32)) + goto end; + totalCycles += 18 + 6 * transfer_size; + if (!is_preprocess) + { + u32 xf_address = Cmd2 & 0xFFFF; + LoadXFReg(transfer_size, xf_address, src); + + INCSTAT(stats.thisFrame.numXFLoads); + } + src.Skip(transfer_size); + } + break; + + case GX_LOAD_INDX_A: //used for position matrices + refarray = 0xC; + goto load_indx; + case GX_LOAD_INDX_B: //used for normal matrices + refarray = 0xD; + goto load_indx; + case GX_LOAD_INDX_C: //used for postmatrices + refarray = 0xE; + goto load_indx; + case GX_LOAD_INDX_D: //used for lights + refarray = 0xF; + goto load_indx; + load_indx: + if (src.size() < 4) + goto end; + totalCycles += 6; + if (is_preprocess) + PreprocessIndexedXF(src.Read(), refarray); + else + LoadIndexedXF(src.Read(), refarray); + break; + + case GX_CMD_CALL_DL: + { + if (src.size() < 8) + goto end; + u32 address = src.Read(); + u32 count = src.Read(); + + if (in_display_list) + { + totalCycles += 6; + WARN_LOG(VIDEO,"recursive display list detected"); + } + else + { + if (is_preprocess) + InterpretDisplayListPreprocess(address, count); + else + totalCycles += 6 + InterpretDisplayList(address, count); + } + } + break; + + case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that + totalCycles += 6; + DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte); + break; + + case GX_CMD_INVL_VC: // Invalidate Vertex Cache + totalCycles += 6; + DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); + break; + + case GX_LOAD_BP_REG: //0x61 + // In skipped_frame case: We have to let BP writes through because they set + // tokens and stuff. TODO: Call a much simplified LoadBPReg instead. + { + if (src.size() < 4) + goto end; + totalCycles += 12; + u32 bp_cmd = src.Read(); + if (is_preprocess) + { + LoadBPRegPreprocess(bp_cmd); + } + else + { + LoadBPReg(bp_cmd); + INCSTAT(stats.thisFrame.numBPLoads); + } + } + break; + + // draw primitives + default: + if ((cmd_byte & 0xC0) == 0x80) + { + // load vertices + if (src.size() < 2) + goto end; + u16 num_vertices = src.Read(); + + if (is_preprocess) + { + size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess); + if (src.size() < size) + goto end; + src.Skip(size); + } + else + { + int bytes = VertexLoaderManager::RunVertices( + cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) + (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, + num_vertices, + src, + g_bSkipCurrentFrame); + + if (bytes < 0) + goto end; + else + src.Skip(bytes); + } + totalCycles += 1600; + } + else + { + UnknownOpcode(cmd_byte, opcodeStart, is_preprocess); + totalCycles += 1; + } break; } - totalCycles += cycles; + + // Display lists get added directly into the FIFO stream + if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL) + { + u8* opcodeEnd; + src.WritePointer(&opcodeEnd); + FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(opcodeEnd - opcodeStart)); + } } - return totalCycles; + +end: + if (cycles) + { + *cycles = totalCycles; + } + return opcodeStart; } -void OpcodeDecoder_Preprocess(u8 *end, bool in_display_list) -{ - while (true) - { - u8* old = g_video_buffer_pp_read_ptr; - u32 cycles = Decode(end, in_display_list); - if (cycles == 0) - { - g_video_buffer_pp_read_ptr = old; - break; - } - } -} +template u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list); +template u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list); diff --git a/Source/Core/VideoCommon/OpcodeDecoding.h b/Source/Core/VideoCommon/OpcodeDecoding.h index a217da556e..5fd03e26f3 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.h +++ b/Source/Core/VideoCommon/OpcodeDecoding.h @@ -5,6 +5,7 @@ #pragma once #include "Common/CommonTypes.h" +#include "VideoCommon/DataReader.h" #define GX_NOP 0x00 @@ -40,5 +41,6 @@ extern bool g_bRecordFifoData; void OpcodeDecoder_Init(); void OpcodeDecoder_Shutdown(); -u32 OpcodeDecoder_Run(u8* end, bool in_display_list); -void OpcodeDecoder_Preprocess(u8* end, bool in_display_list); + +template +u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list); diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index 3f7161b8f7..f293564e85 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -45,6 +45,10 @@ int colElements[2]; GC_ALIGNED128(float posScale[4]); GC_ALIGNED64(float tcScale[8][2]); +// This pointer is used as the source/dst for all fixed function loader calls +u8* g_video_buffer_read_ptr; +u8* g_vertex_manager_write_ptr; + static const float fractionTable[32] = { 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), @@ -93,8 +97,8 @@ static void LOADERDECL TexMtx_Write_Float4() { #if _M_SSE >= 0x200 __m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), s_curtexmtx[s_texmtxwrite++]); - _mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */)); - VertexManager::s_pCurBufferPointer += sizeof(float) * 4; + _mm_storeu_ps((float*)g_vertex_manager_write_ptr, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */)); + g_vertex_manager_write_ptr += sizeof(float) * 4; #else DataWrite(0.f); DataWrite(0.f); @@ -487,10 +491,13 @@ void VertexLoader::ConvertVertices ( int count ) #endif } -void VertexLoader::RunVertices(const VAT& vat, int primitive, int const count) +int VertexLoader::RunVertices(const VAT& vat, int primitive, int count, DataReader src, DataReader dst) { + dst.WritePointer(&g_vertex_manager_write_ptr); + src.WritePointer(&g_video_buffer_read_ptr); SetupRunVertices(vat, primitive, count); ConvertVertices(count); + return count; } void VertexLoader::SetVAT(const VAT& vat) diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index 71159b60d7..df1bb445a5 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -18,6 +18,7 @@ #include "VideoCommon/CPMemory.h" #include "VideoCommon/DataReader.h" #include "VideoCommon/NativeVertexFormat.h" +#include "VideoCommon/VertexLoaderUtils.h" #if _M_SSE >= 0x401 #include @@ -117,7 +118,7 @@ public: { return m_native_vtx_decl; } void SetupRunVertices(const VAT& vat, int primitive, int const count); - void RunVertices(const VAT& vat, int primitive, int count); + int RunVertices(const VAT& vat, int primitive, int count, DataReader src, DataReader dst); // For debugging / profiling void AppendToString(std::string *dest) const; @@ -193,9 +194,9 @@ __forceinline void Vertex_Read_SSSE3(const T* pData, __m128 scale) { coords = _mm_shuffle_epi8(coords, threeIn ? kMaskSwap32_3 : kMaskSwap32_2); if (threeOut) - _mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, coords); + _mm_storeu_si128((__m128i*)g_vertex_manager_write_ptr, coords); else - _mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, coords); + _mm_storel_epi64((__m128i*)g_vertex_manager_write_ptr, coords); } else { @@ -212,11 +213,11 @@ __forceinline void Vertex_Read_SSSE3(const T* pData, __m128 scale) __m128 out = _mm_mul_ps(_mm_cvtepi32_ps(coords), scale); if (threeOut) - _mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, out); + _mm_storeu_ps((float*)g_vertex_manager_write_ptr, out); else - _mm_storel_pi((__m64*)VertexManager::s_pCurBufferPointer, out); + _mm_storel_pi((__m64*)g_vertex_manager_write_ptr, out); } - VertexManager::s_pCurBufferPointer += sizeof(float) * (2 + threeOut); + g_vertex_manager_write_ptr += sizeof(float) * (2 + threeOut); } -#endif \ No newline at end of file +#endif diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index b8132fa806..cf638c2495 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -130,24 +130,23 @@ static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state) return loader; } -bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing) +int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing) { if (!count) - return true; + return 0; CPState* state = &g_main_cp_state; VertexLoader* loader = RefreshLoader(vtx_attr_group, state); - size_t size = count * loader->GetVertexSize(); - if (buf_size < size) - return false; + int size = count * loader->GetVertexSize(); + if ((int)src.size() < size) + return -1; if (skip_drawing || (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5)) { // if cull mode is CULL_ALL, ignore triangles and quads - DataSkip((u32)size); - return true; + return size; } NativeVertexFormat* native = loader->GetNativeVertexFormat(); @@ -157,16 +156,18 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, VertexManager::Flush(); s_current_vtx_fmt = native; - VertexManager::PrepareForAdditionalData(primitive, count, + DataReader dst = VertexManager::PrepareForAdditionalData(primitive, count, loader->GetNativeVertexDeclaration().stride); - loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count); + count = loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count, src, dst); IndexGenerator::AddIndices(primitive, count); + VertexManager::FlushData(count, loader->GetNativeVertexDeclaration().stride); + ADDSTAT(stats.thisFrame.numPrims, count); INCSTAT(stats.thisFrame.numPrimitiveJoins); - return true; + return size; } int GetVertexSize(int vtx_attr_group, bool preprocess) diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index 8995ad2d7a..e100480fd3 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -7,6 +7,7 @@ #include #include "Common/CommonTypes.h" +#include "VideoCommon/DataReader.h" #include "VideoCommon/NativeVertexFormat.h" namespace VertexLoaderManager @@ -17,8 +18,9 @@ namespace VertexLoaderManager void MarkAllDirty(); int GetVertexSize(int vtx_attr_group, bool preprocess); - // Returns false if buf_size is insufficient. - bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing = false); + + // Returns -1 if buf_size is insufficient, else the amount of bytes consumed + int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing = false); // For debugging void AppendListToString(std::string *dest); diff --git a/Source/Core/VideoCommon/VertexLoaderUtils.h b/Source/Core/VideoCommon/VertexLoaderUtils.h new file mode 100644 index 0000000000..f9befb8444 --- /dev/null +++ b/Source/Core/VideoCommon/VertexLoaderUtils.h @@ -0,0 +1,94 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + + +#include "Common/Common.h" +#include "VideoCommon/VertexManagerBase.h" + +extern u8* g_video_buffer_read_ptr; +extern u8* g_vertex_manager_write_ptr; + + +__forceinline void DataSkip(u32 skip) +{ + g_video_buffer_read_ptr += skip; +} + +// probably unnecessary +template +__forceinline void DataSkip() +{ + g_video_buffer_read_ptr += count; +} + +template +__forceinline T DataPeek(int _uOffset, u8** bufp = &g_video_buffer_read_ptr) +{ + auto const result = Common::FromBigEndian(*reinterpret_cast(*bufp + _uOffset)); + return result; +} + +// TODO: kill these +__forceinline u8 DataPeek8(int _uOffset) +{ + return DataPeek(_uOffset); +} + +__forceinline u16 DataPeek16(int _uOffset) +{ + return DataPeek(_uOffset); +} + +__forceinline u32 DataPeek32(int _uOffset) +{ + return DataPeek(_uOffset); +} + +template +__forceinline T DataRead(u8** bufp = &g_video_buffer_read_ptr) +{ + auto const result = DataPeek(0, bufp); + *bufp += sizeof(T); + return result; +} + +// TODO: kill these +__forceinline u8 DataReadU8() +{ + return DataRead(); +} + +__forceinline s8 DataReadS8() +{ + return DataRead(); +} + +__forceinline u16 DataReadU16() +{ + return DataRead(); +} + +__forceinline u32 DataReadU32() +{ + return DataRead(); +} + +__forceinline u32 DataReadU32Unswapped() +{ + u32 tmp = *(u32*)g_video_buffer_read_ptr; + g_video_buffer_read_ptr += 4; + return tmp; +} + +__forceinline u8* DataGetPosition() +{ + return g_video_buffer_read_ptr; +} + +template +__forceinline void DataWrite(T data) +{ + *(T*)g_vertex_manager_write_ptr = data; + g_vertex_manager_write_ptr += sizeof(T); +} diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/VertexLoader_Normal.cpp index 8ff5a767e7..1e428b66a2 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Normal.cpp @@ -14,7 +14,7 @@ #include "VideoCommon/VideoCommon.h" // warning: mapping buffer should be disabled to use this -#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); +#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)g_vertex_manager_write_ptr)[-3], ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]); VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; @@ -43,13 +43,14 @@ template __forceinline void ReadIndirect(const T* data) { static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!"); - DataWriter dst; + DataReader dst(g_vertex_manager_write_ptr, nullptr); for (int i = 0; i != N; ++i) { dst.Write(FracAdjust(Common::FromBigEndian(data[i]))); } + dst.WritePointer(&g_vertex_manager_write_ptr); LOG_NORM(); } diff --git a/Source/Core/VideoCommon/VertexLoader_Position.cpp b/Source/Core/VideoCommon/VertexLoader_Position.cpp index 6c50dd1560..60a6115ee0 100644 --- a/Source/Core/VideoCommon/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Position.cpp @@ -13,7 +13,7 @@ #include "VideoCommon/VideoCommon.h" // Thoughts on the implementation of a vertex loader compiler. -// s_pCurBufferPointer should definitely be in a register. +// g_vertex_manager_write_ptr should definitely be in a register. // Could load the position scale factor in XMM7, for example. // The pointer inside DataReadU8 in another. @@ -75,12 +75,14 @@ void LOADERDECL Pos_ReadDirect() { static_assert(N <= 3, "N > 3 is not sane!"); auto const scale = posScale[0]; - DataWriter dst; - DataReader src; + DataReader dst(g_vertex_manager_write_ptr, nullptr); + DataReader src(g_video_buffer_read_ptr, nullptr); for (int i = 0; i < 3; ++i) dst.Write(i(), scale) : 0.f); + dst.WritePointer(&g_vertex_manager_write_ptr); + src.WritePointer(&g_video_buffer_read_ptr); LOG_VTX(); } @@ -93,11 +95,12 @@ void LOADERDECL Pos_ReadIndex() auto const index = DataRead(); auto const data = reinterpret_cast(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); auto const scale = posScale[0]; - DataWriter dst; + DataReader dst(g_vertex_manager_write_ptr, nullptr); for (int i = 0; i < 3; ++i) dst.Write(i __forceinline void LOG_TEX<1>() { // warning: mapping buffer should be disabled to use this - // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-1]); + // PRIM_LOG("tex: %f, ", ((float*)g_vertex_manager_write_ptr)[-1]); } template <> __forceinline void LOG_TEX<2>() { // warning: mapping buffer should be disabled to use this - // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); + // PRIM_LOG("tex: %f %f, ", ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]); } static void LOADERDECL TexCoord_Read_Dummy() @@ -50,12 +50,14 @@ template void LOADERDECL TexCoord_ReadDirect() { auto const scale = tcScale[tcIndex][0]; - DataWriter dst; - DataReader src; + DataReader dst(g_vertex_manager_write_ptr, nullptr); + DataReader src(g_video_buffer_read_ptr, nullptr); for (int i = 0; i != N; ++i) dst.Write(TCScale(src.Read(), scale)); + dst.WritePointer(&g_vertex_manager_write_ptr); + src.WritePointer(&g_video_buffer_read_ptr); LOG_TEX(); ++tcIndex; @@ -70,11 +72,12 @@ void LOADERDECL TexCoord_ReadIndex() auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex])); auto const scale = tcScale[tcIndex][0]; - DataWriter dst; + DataReader dst(g_vertex_manager_write_ptr, nullptr); for (int i = 0; i != N; ++i) dst.Write(TCScale(Common::FromBigEndian(data[i]), scale)); + dst.WritePointer(&g_vertex_manager_write_ptr); LOG_TEX(); ++tcIndex; } diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index dcd8780ede..84e0bb5efe 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -51,7 +51,7 @@ u32 VertexManager::GetRemainingSize() return (u32)(s_pEndBufferPointer - s_pCurBufferPointer); } -void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride) +DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride) { // The SSE vertex loader can write up to 4 bytes past the end u32 const needed_vertex_bytes = count * stride + 4; @@ -83,6 +83,13 @@ void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 strid g_vertex_manager->ResetBuffer(stride); IsFlushed = false; } + + return DataReader(s_pCurBufferPointer, s_pEndBufferPointer); +} + +void VertexManager::FlushData(u32 count, u32 stride) +{ + s_pCurBufferPointer += count * stride; } u32 VertexManager::GetRemainingIndices(int primitive) diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index 604bf0a480..c854cd3586 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -3,6 +3,7 @@ #include #include "Common/CommonFuncs.h" #include "Common/CommonTypes.h" +#include "VideoCommon/DataReader.h" class NativeVertexFormat; class PointerWrap; @@ -31,21 +32,14 @@ public: // needs to be virtual for DX11's dtor virtual ~VertexManager(); - static u8 *s_pCurBufferPointer; - static u8 *s_pBaseBufferPointer; - static u8 *s_pEndBufferPointer; - - static u32 GetRemainingSize(); - static void PrepareForAdditionalData(int primitive, u32 count, u32 stride); - static u32 GetRemainingIndices(int primitive); + static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride); + static void FlushData(u32 count, u32 stride); static void Flush(); virtual ::NativeVertexFormat* CreateNativeVertexFormat() = 0; static void DoState(PointerWrap& p); - virtual void CreateDeviceObjects(){} - virtual void DestroyDeviceObjects(){} protected: virtual void vDoState(PointerWrap& p) { } @@ -54,12 +48,20 @@ protected: virtual void ResetBuffer(u32 stride) = 0; + static u8* s_pCurBufferPointer; + static u8* s_pBaseBufferPointer; + static u8* s_pEndBufferPointer; + + static u32 GetRemainingSize(); + static u32 GetRemainingIndices(int primitive); + private: static bool IsFlushed; - // virtual void Draw(u32 stride, bool alphapass) = 0; - // temp virtual void vFlush(bool useDstAlpha) = 0; + + virtual void CreateDeviceObjects() {} + virtual void DestroyDeviceObjects() {} }; extern VertexManager *g_vertex_manager; diff --git a/Source/Core/VideoCommon/VideoCommon.h b/Source/Core/VideoCommon/VideoCommon.h index e4ba92adda..cdc14b0b1d 100644 --- a/Source/Core/VideoCommon/VideoCommon.h +++ b/Source/Core/VideoCommon/VideoCommon.h @@ -61,7 +61,7 @@ struct TargetRectangle : public MathUtil::Rectangle #endif // warning: mapping buffer should be disabled to use this -// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); +// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)g_vertex_manager_write_ptr)[-3], ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]); #define LOG_VTX() diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index 31dc089736..237becb45e 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -117,6 +117,7 @@ + @@ -151,4 +152,4 @@ - \ No newline at end of file + diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index f285ce35d4..971bf59bbf 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -275,6 +275,9 @@ Vertex Loading + + Vertex Loading + Util @@ -285,4 +288,4 @@ - \ No newline at end of file + diff --git a/Source/Core/VideoCommon/XFMemory.h b/Source/Core/VideoCommon/XFMemory.h index 33077aa69b..6ec28476d8 100644 --- a/Source/Core/VideoCommon/XFMemory.h +++ b/Source/Core/VideoCommon/XFMemory.h @@ -6,6 +6,7 @@ #include "Common/CommonTypes.h" #include "VideoCommon/CPMemory.h" +#include "VideoCommon/DataReader.h" // Lighting @@ -273,6 +274,6 @@ struct XFMemory extern XFMemory xfmem; -void LoadXFReg(u32 transferSize, u32 address); +void LoadXFReg(u32 transferSize, u32 address, DataReader src); void LoadIndexedXF(u32 val, int array); void PreprocessIndexedXF(u32 val, int refarray); diff --git a/Source/Core/VideoCommon/XFStructs.cpp b/Source/Core/VideoCommon/XFStructs.cpp index 0552aa0986..927fed8077 100644 --- a/Source/Core/VideoCommon/XFStructs.cpp +++ b/Source/Core/VideoCommon/XFStructs.cpp @@ -19,14 +19,14 @@ static void XFMemWritten(u32 transferSize, u32 baseAddress) VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize); } -static void XFRegWritten(int transferSize, u32 baseAddress) +static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) { u32 address = baseAddress; u32 dataIndex = 0; while (transferSize > 0 && address < 0x1058) { - u32 newValue = DataPeek(dataIndex * sizeof(u32)); + u32 newValue = src.Peek(dataIndex * sizeof(u32)); u32 nextAddress = address + 1; switch (address) @@ -193,7 +193,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress) } } -void LoadXFReg(u32 transferSize, u32 baseAddress) +void LoadXFReg(u32 transferSize, u32 baseAddress, DataReader src) { // do not allow writes past registers if (baseAddress + transferSize > 0x1058) @@ -229,17 +229,17 @@ void LoadXFReg(u32 transferSize, u32 baseAddress) XFMemWritten(xfMemTransferSize, xfMemBase); for (u32 i = 0; i < xfMemTransferSize; i++) { - ((u32*)&xfmem)[xfMemBase + i] = DataRead(); + ((u32*)&xfmem)[xfMemBase + i] = src.Read(); } } // write to XF regs if (transferSize > 0) { - XFRegWritten(transferSize, baseAddress); + XFRegWritten(transferSize, baseAddress, src); for (u32 i = 0; i < transferSize; i++) { - ((u32*)&xfmem)[baseAddress + i] = DataRead(); + ((u32*)&xfmem)[baseAddress + i] = src.Read(); } } } diff --git a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp index 576e9a8a53..45d8a23ab1 100644 --- a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp +++ b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp @@ -74,12 +74,14 @@ protected: void ResetPointers() { - g_video_buffer_read_ptr = &input_memory[0]; - VertexManager::s_pCurBufferPointer = &output_memory[0]; m_input_pos = m_output_pos = 0; + src = DataReader(input_memory, input_memory+sizeof(input_memory)); + dst = DataReader(output_memory, output_memory+sizeof(output_memory)); } u32 m_input_pos, m_output_pos; + DataReader src; + DataReader dst; TVtxDesc m_vtx_desc; VAT m_vtx_attr; @@ -103,7 +105,9 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) Input(0.0f); Input(0.0f); Input(1.0f); // Convert 4 points. "7" -> primitive are points. - loader.RunVertices(m_vtx_attr, 7, 4); + int count = loader.RunVertices(m_vtx_attr, 7, 4, src, dst); + src.Skip(4 * loader.GetVertexSize()); + dst.Skip(count * loader.GetNativeVertexDeclaration().stride); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(1.0f); ExpectOut(0.0f); ExpectOut(0.0f); @@ -113,7 +117,9 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) // Test that scale does nothing for floating point inputs. Input(1.0f); Input(2.0f); Input(4.0f); m_vtx_attr.g0.PosFrac = 1; - loader.RunVertices(m_vtx_attr, 7, 1); + count = loader.RunVertices(m_vtx_attr, 7, 1, src, dst); + src.Skip(1 * loader.GetVertexSize()); + dst.Skip(count * loader.GetNativeVertexDeclaration().stride); ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(4.0f); } @@ -136,7 +142,9 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) Input(12345); Input(54321); // Convert 5 points. "7" -> primitive are points. - loader.RunVertices(m_vtx_attr, 7, 5); + int count = loader.RunVertices(m_vtx_attr, 7, 5, src, dst); + src.Skip(5 * loader.GetVertexSize()); + dst.Skip(count * loader.GetNativeVertexDeclaration().stride); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(0.0f); @@ -147,7 +155,9 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) // Test that scale works on U16 inputs. Input(42); Input(24); m_vtx_attr.g0.PosFrac = 1; - loader.RunVertices(m_vtx_attr, 7, 1); + count = loader.RunVertices(m_vtx_attr, 7, 1, src, dst); + src.Skip(1 * loader.GetVertexSize()); + dst.Skip(count * loader.GetNativeVertexDeclaration().stride); ExpectOut(21.0f); ExpectOut(12.0f); ExpectOut(0.0f); } @@ -165,7 +175,9 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZSpeed) for (int i = 0; i < 1000; ++i) { ResetPointers(); - loader.RunVertices(m_vtx_attr, 7, 100000); + int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst); + src.Skip(100000 * loader.GetVertexSize()); + dst.Skip(count * loader.GetNativeVertexDeclaration().stride); } } @@ -183,7 +195,9 @@ TEST_F(VertexLoaderTest, PositionDirectU16XYSpeed) for (int i = 0; i < 1000; ++i) { ResetPointers(); - loader.RunVertices(m_vtx_attr, 7, 100000); + int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst); + src.Skip(100000 * loader.GetVertexSize()); + dst.Skip(count * loader.GetNativeVertexDeclaration().stride); } } @@ -244,6 +258,8 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed) for (int i = 0; i < 100; ++i) { ResetPointers(); - loader.RunVertices(m_vtx_attr, 7, 100000); + int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst); + src.Skip(100000 * loader.GetVertexSize()); + dst.Skip(count * loader.GetNativeVertexDeclaration().stride); } }