diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index 1a162b1cde..5b89e0507e 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -43,6 +43,8 @@ static size_t s_index_offset; VertexManager::VertexManager() { CreateDeviceObjects(); + CpuVBuffer.resize(MAX_VBUFFER_SIZE); + CpuIBuffer.resize(MAX_IBUFFER_SIZE); } VertexManager::~VertexManager() @@ -81,14 +83,25 @@ void VertexManager::PrepareDrawBuffers(u32 stride) void VertexManager::ResetBuffer(u32 stride) { - auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride); - s_pCurBufferPointer = s_pBaseBufferPointer = buffer.first; - s_pEndBufferPointer = buffer.first + MAXVBUFFERSIZE; - s_baseVertex = buffer.second / stride; + if (CullAll) + { + // This buffer isn't getting sent to the GPU. Just allocate it on the cpu. + s_pCurBufferPointer = s_pBaseBufferPointer = CpuVBuffer.data(); + s_pEndBufferPointer = s_pBaseBufferPointer + CpuVBuffer.size(); - buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16)); - IndexGenerator::Start((u16*)buffer.first); - s_index_offset = buffer.second; + IndexGenerator::Start((u16*)CpuIBuffer.data()); + } + else + { + auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride); + s_pCurBufferPointer = s_pBaseBufferPointer = buffer.first; + s_pEndBufferPointer = buffer.first + MAXVBUFFERSIZE; + s_baseVertex = buffer.second / stride; + + buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16)); + IndexGenerator::Start((u16*)buffer.first); + s_index_offset = buffer.second; + } } void VertexManager::Draw(u32 stride) diff --git a/Source/Core/VideoBackends/OGL/VertexManager.h b/Source/Core/VideoBackends/OGL/VertexManager.h index ab400cf43d..ba6e49c466 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.h +++ b/Source/Core/VideoBackends/OGL/VertexManager.h @@ -45,6 +45,10 @@ private: void Draw(u32 stride); void vFlush(bool useDstAlpha) override; void PrepareDrawBuffers(u32 stride); + + // Alternative buffers in CPU memory for primatives we are going to discard. + std::vector CpuVBuffer; + std::vector CpuIBuffer; }; } diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index cefda66c52..5a77c3eb03 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -110,7 +110,7 @@ public: virtual void SetupVertexPointers() = 0; u32 GetVertexStride() const { return vtx_decl.stride; } - PortableVertexDeclaration GetVertexDeclaration() const { return vtx_decl; } + const PortableVertexDeclaration& GetVertexDeclaration() const { return vtx_decl; } // TODO: move this under private: u32 m_components; // VB_HAS_X. Bitmask telling what vertex components are present. diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 9cc8861186..e0115a0d29 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -157,8 +157,12 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo VertexManager::Flush(); s_current_vtx_fmt = loader->m_native_vertex_format; + // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. + // They still need to go through vertex loading, because we need to calculate a zfreeze refrence slope. + bool cullall = (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5); + DataReader dst = VertexManager::PrepareForAdditionalData(primitive, count, - loader->m_native_vtx_decl.stride); + loader->m_native_vtx_decl.stride, cullall); count = loader->RunVertices(primitive, count, src, dst); diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 75f6de97f8..0c5ccdd10d 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -29,6 +29,7 @@ PrimitiveType VertexManager::current_primitive_type; Slope VertexManager::ZSlope; bool VertexManager::IsFlushed; +bool VertexManager::CullAll; static const PrimitiveType primitive_from_gx[8] = { PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS @@ -44,6 +45,7 @@ static const PrimitiveType primitive_from_gx[8] = { VertexManager::VertexManager() { IsFlushed = true; + CullAll = false; } VertexManager::~VertexManager() @@ -55,7 +57,7 @@ u32 VertexManager::GetRemainingSize() return (u32)(s_pEndBufferPointer - s_pCurBufferPointer); } -DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride) +DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall) { // The SSE vertex loader can write up to 4 bytes past the end u32 const needed_vertex_bytes = count * stride + 4; @@ -81,6 +83,8 @@ DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 "Increase MAXVBUFFERSIZE or we need primitive breaking after all."); } + CullAll = cullall; + // need to alloc new buffer if (IsFlushed) { @@ -192,34 +196,36 @@ void VertexManager::Flush() (int)bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable, (bpmem.alpha_test.hex>>16)&0xff); #endif - BitSet32 usedtextures; - for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) - if (bpmem.tevorders[i / 2].getEnable(i & 1)) - usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true; - - if (bpmem.genMode.numindstages > 0) - for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; - - for (unsigned int i : usedtextures) + // If the primitave is marked CullAll. All we need to do is update the vertex constants and calculate the zfreeze refrence slope + if (!CullAll) { - g_renderer->SetSamplerState(i & 3, i >> 2); - const TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i); + BitSet32 usedtextures; + for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) + if (bpmem.tevorders[i / 2].getEnable(i & 1)) + usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true; - if (tentry) + if (bpmem.genMode.numindstages > 0) + for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) + usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; + + for (unsigned int i : usedtextures) { - // 0s are probably for no manual wrapping needed. - PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height, 0, 0); + g_renderer->SetSamplerState(i & 3, i >> 2); + const TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i); + + if (tentry) + { + // 0s are probably for no manual wrapping needed. + PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height, 0, 0); + } + else + ERROR_LOG(VIDEO, "error loading texture"); } - else - ERROR_LOG(VIDEO, "error loading texture"); } - // set global constants + // set global vertex constants VertexShaderManager::SetConstants(); - GeometryShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); // Calculate ZSlope for zfreeze if (!bpmem.genMode.zfreeze) @@ -227,41 +233,37 @@ void VertexManager::Flush() // Must be done after VertexShaderManager::SetConstants() CalculateZSlope(VertexLoaderManager::GetCurrentVertexFormat()); } - else if (ZSlope.dirty) // or apply any dirty ZSlopes + else if (ZSlope.dirty && !CullAll) // or apply any dirty ZSlopes { PixelShaderManager::SetZSlope(ZSlope.dfdx, ZSlope.dfdy, ZSlope.f0); ZSlope.dirty = false; } - // If cull mode is CULL_ALL, we shouldn't render any triangles/quads (points and lines don't get culled) - // vertex loader has already converted any quads into triangles, so we just check for triangles. - // TODO: These culled primites need to get this far through the pipeline to be used as zfreeze refrence - // planes. But currently we apply excessive processing and store the vertices in buffers on the - // video card, which is a waste of bandwidth. - if (bpmem.genMode.cullmode == GenMode::CULL_ALL && current_primitive_type == PRIMITIVE_TRIANGLES) + if (!CullAll) { - GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); - IsFlushed = true; - return; + // set the rest of the global constants + GeometryShaderManager::SetConstants(); + PixelShaderManager::SetConstants(); + + bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && + bpmem.dstalpha.enable && + bpmem.blendmode.alphaupdate && + bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24; + + if (PerfQueryBase::ShouldEmulate()) + g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); + g_vertex_manager->vFlush(useDstAlpha); + if (PerfQueryBase::ShouldEmulate()) + g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); } - bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && - bpmem.dstalpha.enable && - bpmem.blendmode.alphaupdate && - bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24; - - if (PerfQueryBase::ShouldEmulate()) - g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); - g_vertex_manager->vFlush(useDstAlpha); - if (PerfQueryBase::ShouldEmulate()) - g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); - GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens) ERROR_LOG(VIDEO, "xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.", xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value()); IsFlushed = true; + CullAll = false; } void VertexManager::DoState(PointerWrap& p) @@ -279,7 +281,7 @@ void VertexManager::CalculateZSlope(NativeVertexFormat *format) // Global matrix ID. u32 mtxIdx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx; - PortableVertexDeclaration vert_decl = format->GetVertexDeclaration(); + const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration(); size_t posOff = vert_decl.position.offset; size_t mtxOff = vert_decl.posmtx.offset; diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index 9bcd71d3b3..4369438bc5 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -41,7 +41,7 @@ public: // needs to be virtual for DX11's dtor virtual ~VertexManager(); - static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride); + static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall); static void FlushData(u32 count, u32 stride); static void Flush(); @@ -67,6 +67,8 @@ protected: static Slope ZSlope; static void CalculateZSlope(NativeVertexFormat *format); + static bool CullAll; + private: static bool IsFlushed;