diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 4468f7c57d..645bb876a7 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -336,6 +336,7 @@ if(WIN32) ) target_link_libraries(core PUBLIC videod3d + videod3d12 setupapi.lib iphlpapi.lib ) diff --git a/Source/Core/DolphinQt/DolphinQt.vcxproj b/Source/Core/DolphinQt/DolphinQt.vcxproj index e2fefa340c..18f253880e 100644 --- a/Source/Core/DolphinQt/DolphinQt.vcxproj +++ b/Source/Core/DolphinQt/DolphinQt.vcxproj @@ -475,6 +475,9 @@ {4c3b2264-ea73-4a7b-9cfe-65b0fd635ebb} + + {570215b7-e32f-4438-95ae-c8d955f9fca3} + diff --git a/Source/Core/VideoBackends/CMakeLists.txt b/Source/Core/VideoBackends/CMakeLists.txt index b53d85e10a..51ea342826 100644 --- a/Source/Core/VideoBackends/CMakeLists.txt +++ b/Source/Core/VideoBackends/CMakeLists.txt @@ -6,5 +6,6 @@ add_subdirectory(Vulkan) if(CMAKE_SYSTEM_NAME STREQUAL "Windows") add_subdirectory(D3DCommon) add_subdirectory(D3D) + add_subdirectory(D3D12) endif() diff --git a/Source/Core/VideoBackends/D3D12/BoundingBox.cpp b/Source/Core/VideoBackends/D3D12/BoundingBox.cpp new file mode 100644 index 0000000000..c6b5db3b39 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/BoundingBox.cpp @@ -0,0 +1,183 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/D3D12/BoundingBox.h" +#include "Common/Logging/Log.h" +#include "VideoBackends/D3D12/DXContext.h" +#include "VideoBackends/D3D12/Renderer.h" + +namespace DX12 +{ +BoundingBox::BoundingBox() = default; + +BoundingBox::~BoundingBox() +{ + if (m_gpu_descriptor) + g_dx_context->GetDescriptorHeapManager().Free(m_gpu_descriptor); +} + +std::unique_ptr BoundingBox::Create() +{ + auto bbox = std::unique_ptr(new BoundingBox()); + if (!bbox->CreateBuffers()) + return nullptr; + + return bbox; +} + +bool BoundingBox::CreateBuffers() +{ + static constexpr D3D12_HEAP_PROPERTIES gpu_heap_properties = {D3D12_HEAP_TYPE_DEFAULT}; + static constexpr D3D12_HEAP_PROPERTIES cpu_heap_properties = {D3D12_HEAP_TYPE_READBACK}; + D3D12_RESOURCE_DESC buffer_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + BUFFER_SIZE, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS}; + + HRESULT hr = g_dx_context->GetDevice()->CreateCommittedResource( + &gpu_heap_properties, D3D12_HEAP_FLAG_NONE, &buffer_desc, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, IID_PPV_ARGS(&m_gpu_buffer)); + CHECK(SUCCEEDED(hr), "Creating bounding box GPU buffer failed"); + if (FAILED(hr) || !g_dx_context->GetDescriptorHeapManager().Allocate(&m_gpu_descriptor)) + return false; + + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {DXGI_FORMAT_R32_SINT, D3D12_UAV_DIMENSION_BUFFER}; + uav_desc.Buffer.NumElements = NUM_VALUES; + g_dx_context->GetDevice()->CreateUnorderedAccessView(m_gpu_buffer.Get(), nullptr, &uav_desc, + m_gpu_descriptor.cpu_handle); + + buffer_desc.Flags = D3D12_RESOURCE_FLAG_NONE; + hr = g_dx_context->GetDevice()->CreateCommittedResource( + &cpu_heap_properties, D3D12_HEAP_FLAG_NONE, &buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, IID_PPV_ARGS(&m_readback_buffer)); + CHECK(SUCCEEDED(hr), "Creating bounding box CPU buffer failed"); + if (FAILED(hr)) + return false; + + if (!m_upload_buffer.AllocateBuffer(STREAM_BUFFER_SIZE)) + return false; + + // Both the CPU and GPU buffer's contents is unknown, so force a flush the first time. + m_values.fill(0); + m_dirty.fill(true); + m_valid = true; + return true; +} + +void BoundingBox::Readback() +{ + // Copy from GPU->CPU buffer, and wait for the GPU to finish the copy. + ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(), + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + g_dx_context->GetCommandList()->CopyBufferRegion(m_readback_buffer.Get(), 0, m_gpu_buffer.Get(), + 0, BUFFER_SIZE); + ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(), + D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + Renderer::GetInstance()->ExecuteCommandList(true); + + // Read back to cached values. + static constexpr D3D12_RANGE read_range = {0, BUFFER_SIZE}; + void* mapped_pointer; + HRESULT hr = m_readback_buffer->Map(0, &read_range, &mapped_pointer); + CHECK(SUCCEEDED(hr), "Map bounding box CPU buffer"); + if (FAILED(hr)) + return; + + static constexpr D3D12_RANGE write_range = {0, 0}; + std::array new_values; + std::memcpy(new_values.data(), mapped_pointer, BUFFER_SIZE); + m_readback_buffer->Unmap(0, &write_range); + + // Preserve dirty values, that way we don't need to sync. + for (u32 i = 0; i < NUM_VALUES; i++) + { + if (!m_dirty[i]) + m_values[i] = new_values[i]; + } + m_valid = true; +} + +s32 BoundingBox::Get(size_t index) +{ + if (!m_valid) + Readback(); + + return m_values[index]; +} + +void BoundingBox::Set(size_t index, s32 value) +{ + m_values[index] = value; + m_dirty[index] = true; +} + +void BoundingBox::Invalidate() +{ + m_dirty.fill(false); + m_valid = false; +} + +void BoundingBox::Flush() +{ + bool in_copy_state = false; + for (u32 start = 0; start < NUM_VALUES;) + { + if (!m_dirty[start]) + { + start++; + continue; + } + + u32 end = start + 1; + m_dirty[start] = false; + for (; end < NUM_VALUES; end++) + { + if (!m_dirty[end]) + break; + + m_dirty[end] = false; + } + + const u32 copy_size = (end - start) * sizeof(ValueType); + if (!m_upload_buffer.ReserveMemory(copy_size, sizeof(ValueType))) + { + WARN_LOG(VIDEO, "Executing command list while waiting for space in bbox stream buffer"); + Renderer::GetInstance()->ExecuteCommandList(false); + if (!m_upload_buffer.ReserveMemory(copy_size, sizeof(ValueType))) + { + PanicAlert("Failed to allocate bbox stream buffer space"); + return; + } + } + + const u32 upload_buffer_offset = m_upload_buffer.GetCurrentOffset(); + std::memcpy(m_upload_buffer.GetCurrentHostPointer(), &m_values[start], copy_size); + m_upload_buffer.CommitMemory(copy_size); + + if (!in_copy_state) + { + ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(), + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST); + in_copy_state = true; + } + + g_dx_context->GetCommandList()->CopyBufferRegion(m_gpu_buffer.Get(), start * sizeof(ValueType), + m_upload_buffer.GetBuffer(), + upload_buffer_offset, copy_size); + start = end; + } + + if (in_copy_state) + { + ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(), + D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } +} +}; // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/BoundingBox.h b/Source/Core/VideoBackends/D3D12/BoundingBox.h new file mode 100644 index 0000000000..45cb979ca8 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/BoundingBox.h @@ -0,0 +1,49 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once +#include +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3D12/DescriptorHeapManager.h" +#include "VideoBackends/D3D12/StreamBuffer.h" + +namespace DX12 +{ +class BoundingBox +{ +public: + ~BoundingBox(); + + static std::unique_ptr Create(); + + const DescriptorHandle& GetGPUDescriptor() const { return m_gpu_descriptor; } + + s32 Get(size_t index); + void Set(size_t index, s32 value); + + void Invalidate(); + void Flush(); + +private: + using ValueType = s32; + static const u32 NUM_VALUES = 4; + static const u32 BUFFER_SIZE = sizeof(ValueType) * NUM_VALUES; + static const u32 MAX_UPDATES_PER_FRAME = 128; + static const u32 STREAM_BUFFER_SIZE = BUFFER_SIZE * MAX_UPDATES_PER_FRAME; + + BoundingBox(); + + bool CreateBuffers(); + void Readback(); + + // Three buffers: GPU for read/write, CPU for reading back, and CPU for staging changes. + ComPtr m_gpu_buffer; + ComPtr m_readback_buffer; + StreamBuffer m_upload_buffer; + DescriptorHandle m_gpu_descriptor; + std::array m_values = {}; + std::array m_dirty = {}; + bool m_valid = true; +}; +}; // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/CMakeLists.txt b/Source/Core/VideoBackends/D3D12/CMakeLists.txt new file mode 100644 index 0000000000..08eec49005 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/CMakeLists.txt @@ -0,0 +1,37 @@ +add_library(videod3d12 + BoundingBox.cpp + BoundingBox.h + DescriptorAllocator.cpp + DescriptorAllocator.h + DescriptorHeapManager.cpp + DescriptorHeapManager.h + DXContext.cpp + DXContext.h + DXPipeline.cpp + DXPipeline.h + DXShader.cpp + DXShader.h + DXTexture.cpp + DXTexture.h + DXVertexFormat.cpp + DXVertexFormat.h + PerfQuery.cpp + PerfQuery.h + Renderer.cpp + Renderer.h + StreamBuffer.cpp + StreamBuffer.h + SwapChain.cpp + SwapChain.h + VertexManager.cpp + VertexManager.h + VideoBackend.cpp + VideoBackend.h +) + +target_link_libraries(videod3d12 +PUBLIC + common + videocommon + videod3dcommon +) diff --git a/Source/Core/VideoBackends/D3D12/Common.h b/Source/Core/VideoBackends/D3D12/Common.h new file mode 100644 index 0000000000..47d0307350 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/Common.h @@ -0,0 +1,32 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. +#pragma once + +#include +#include + +#include "Common/MsgHandler.h" +#include "VideoBackends/D3DCommon/Common.h" + +#define CHECK(cond, Message, ...) \ + if (!(cond)) \ + { \ + PanicAlert(__FUNCTION__ " failed in %s at line %d: " Message, __FILE__, __LINE__, \ + __VA_ARGS__); \ + } + +namespace DX12 +{ +using Microsoft::WRL::ComPtr; + +static void ResourceBarrier(ID3D12GraphicsCommandList* cmdlist, ID3D12Resource* resource, + D3D12_RESOURCE_STATES from_state, D3D12_RESOURCE_STATES to_state) +{ + const D3D12_RESOURCE_BARRIER barrier = { + D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + D3D12_RESOURCE_BARRIER_FLAG_NONE, + {{resource, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, from_state, to_state}}}; + cmdlist->ResourceBarrier(1, &barrier); +} +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj new file mode 100644 index 0000000000..e382597b84 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj @@ -0,0 +1,94 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {570215B7-E32F-4438-95AE-C8D955F9FCA3} + 10.0.17134.0 + + + + StaticLibrary + v141 + Unicode + + + true + + + false + + + + + + + + + + + + + NotUsing + + + + + + NotUsing + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {3de9ee35-3e91-4f27-a014-2866ad8c3fe3} + + + {dea96cf2-f237-4a1a-b32f-c916769efb50} + + + + + + \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters new file mode 100644 index 0000000000..24feadc6ff --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/DXContext.cpp b/Source/Core/VideoBackends/D3D12/DXContext.cpp new file mode 100644 index 0000000000..6c3611ed00 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DXContext.cpp @@ -0,0 +1,548 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "Common/Assert.h" +#include "Common/DynamicLibrary.h" +#include "Common/StringUtil.h" +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3D12/DXContext.h" +#include "VideoBackends/D3D12/DescriptorHeapManager.h" +#include "VideoBackends/D3D12/StreamBuffer.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ +std::unique_ptr g_dx_context; + +// Private D3D12 state +static Common::DynamicLibrary s_d3d12_library; +static PFN_D3D12_CREATE_DEVICE s_d3d12_create_device; +static PFN_D3D12_GET_DEBUG_INTERFACE s_d3d12_get_debug_interface; +static PFN_D3D12_SERIALIZE_ROOT_SIGNATURE s_d3d12_serialize_root_signature; + +DXContext::DXContext() = default; + +DXContext::~DXContext() +{ + if (m_fence_event) + CloseHandle(m_fence_event); +} + +std::vector DXContext::GetAAModes(u32 adapter_index) +{ + // Use a temporary device if we aren't booting. + Common::DynamicLibrary temp_lib; + ComPtr temp_device = g_dx_context ? g_dx_context->m_device : nullptr; + if (!temp_device) + { + ComPtr temp_dxgi_factory = D3DCommon::CreateDXGIFactory(false); + if (!temp_dxgi_factory) + return {}; + + ComPtr adapter; + temp_dxgi_factory->EnumAdapters(adapter_index, &adapter); + + PFN_D3D12_CREATE_DEVICE d3d12_create_device; + if (!temp_lib.Open("d3d12.dll") || + !temp_lib.GetSymbol("D3D12CreateDevice", &d3d12_create_device)) + { + return {}; + } + + HRESULT hr = d3d12_create_device(nullptr, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&temp_device)); + if (!SUCCEEDED(hr)) + return {}; + } + + std::vector aa_modes; + for (u32 samples = 1; samples < D3D12_MAX_MULTISAMPLE_SAMPLE_COUNT; ++samples) + { + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS multisample_quality_levels = {}; + multisample_quality_levels.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + multisample_quality_levels.SampleCount = samples; + + temp_device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + &multisample_quality_levels, + sizeof(multisample_quality_levels)); + + if (multisample_quality_levels.NumQualityLevels > 0) + aa_modes.push_back(samples); + } + + return aa_modes; +} + +bool DXContext::SupportsTextureFormat(DXGI_FORMAT format) +{ + constexpr u32 required = D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE; + + D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {format}; + return SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, + sizeof(support))) && + (support.Support1 & required) == required; +} + +bool DXContext::Create(u32 adapter_index, bool enable_debug_layer) +{ + ASSERT(!g_dx_context); + if (!s_d3d12_library.Open("d3d12.dll") || + !s_d3d12_library.GetSymbol("D3D12CreateDevice", &s_d3d12_create_device) || + !s_d3d12_library.GetSymbol("D3D12GetDebugInterface", &s_d3d12_get_debug_interface) || + !s_d3d12_library.GetSymbol("D3D12SerializeRootSignature", &s_d3d12_serialize_root_signature)) + { + PanicAlertT("d3d12.dll could not be loaded."); + s_d3d12_library.Close(); + return false; + } + + if (!D3DCommon::LoadLibraries()) + { + s_d3d12_library.Close(); + return false; + } + + g_dx_context.reset(new DXContext()); + if (!g_dx_context->CreateDXGIFactory(enable_debug_layer) || + !g_dx_context->CreateDevice(adapter_index, enable_debug_layer) || + !g_dx_context->CreateCommandQueue() || !g_dx_context->CreateFence()) + { + Destroy(); + return false; + } + + return true; +} + +bool DXContext::CreateGlobalResources() +{ + return g_dx_context->CreateDescriptorHeaps() && g_dx_context->CreateRootSignatures() && + g_dx_context->CreateTextureUploadBuffer() && g_dx_context->CreateCommandLists(); +} + +void DXContext::Destroy() +{ + if (g_dx_context) + g_dx_context.reset(); + + s_d3d12_serialize_root_signature = nullptr; + s_d3d12_get_debug_interface = nullptr; + s_d3d12_create_device = nullptr; + s_d3d12_library.Close(); + D3DCommon::UnloadLibraries(); +} + +bool DXContext::CreateDXGIFactory(bool enable_debug_layer) +{ + m_dxgi_factory = D3DCommon::CreateDXGIFactory(enable_debug_layer); + return m_dxgi_factory != nullptr; +} + +bool DXContext::CreateDevice(u32 adapter_index, bool enable_debug_layer) +{ + ComPtr adapter; + HRESULT hr = m_dxgi_factory->EnumAdapters(adapter_index, &adapter); + if (FAILED(hr)) + { + ERROR_LOG(VIDEO, "Adapter %u not found, using default", adapter_index); + adapter = nullptr; + } + + // Enabling the debug layer will fail if the Graphics Tools feature is not installed. + if (enable_debug_layer) + { + hr = s_d3d12_get_debug_interface(IID_PPV_ARGS(&m_debug_interface)); + if (SUCCEEDED(hr)) + { + m_debug_interface->EnableDebugLayer(); + } + else + { + ERROR_LOG(VIDEO, "Debug layer requested but not available."); + enable_debug_layer = false; + } + } + + // Create the actual device. + hr = s_d3d12_create_device(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device)); + CHECK(SUCCEEDED(hr), "Create D3D12 device"); + if (FAILED(hr)) + return false; + + if (enable_debug_layer) + { + ComPtr info_queue; + if (SUCCEEDED(m_device->QueryInterface(IID_PPV_ARGS(&info_queue)))) + { + info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); + info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); + + D3D12_INFO_QUEUE_FILTER filter = {}; + D3D12_MESSAGE_ID id_list[] = { + D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, + D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, + D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, + D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, + D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE}; + filter.DenyList.NumIDs = static_cast(ArraySize(id_list)); + filter.DenyList.pIDList = id_list; + info_queue->PushStorageFilter(&filter); + } + } + + return true; +} + +bool DXContext::CreateCommandQueue() +{ + const D3D12_COMMAND_QUEUE_DESC queue_desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, + D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, + D3D12_COMMAND_QUEUE_FLAG_NONE}; + HRESULT hr = m_device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&m_command_queue)); + CHECK(SUCCEEDED(hr), "Create command queue"); + return SUCCEEDED(hr); +} + +bool DXContext::CreateFence() +{ + HRESULT hr = + m_device->CreateFence(m_completed_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence)); + CHECK(SUCCEEDED(hr), "Create fence"); + if (FAILED(hr)) + return false; + + m_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); + CHECK(m_fence_event != NULL, "Create fence event"); + if (!m_fence_event) + return false; + + return true; +} + +bool DXContext::CreateDescriptorHeaps() +{ + static constexpr size_t MAX_SRVS = 16384; + static constexpr size_t MAX_RTVS = 8192; + static constexpr size_t MAX_DSVS = 128; + static constexpr size_t MAX_SAMPLERS = 16384; + + if (!m_descriptor_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + MAX_SRVS) || + !m_rtv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MAX_RTVS) || + !m_dsv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MAX_DSVS) || + !m_sampler_heap_manager.Create(m_device.Get(), MAX_SAMPLERS)) + { + return false; + } + + m_gpu_descriptor_heaps[1] = m_sampler_heap_manager.GetDescriptorHeap(); + + // Allocate null SRV descriptor for unbound textures. + constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = { + DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; + + if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor)) + { + PanicAlert("Failed to allocate null descriptor"); + return false; + } + + m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle); + return true; +} + +static void SetRootParamCBV(D3D12_ROOT_PARAMETER* rp, u32 shader_reg, + D3D12_SHADER_VISIBILITY visibility) +{ + rp->ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + rp->Descriptor.ShaderRegister = shader_reg; + rp->Descriptor.RegisterSpace = 0; + rp->ShaderVisibility = visibility; +} + +static void SetRootParamTable(D3D12_ROOT_PARAMETER* rp, D3D12_DESCRIPTOR_RANGE* dr, + D3D12_DESCRIPTOR_RANGE_TYPE rt, u32 start_shader_reg, + u32 num_shader_regs, D3D12_SHADER_VISIBILITY visibility) +{ + dr->RangeType = rt; + dr->NumDescriptors = num_shader_regs; + dr->BaseShaderRegister = start_shader_reg; + dr->RegisterSpace = 0; + dr->OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + rp->ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rp->DescriptorTable.pDescriptorRanges = dr; + rp->DescriptorTable.NumDescriptorRanges = 1; + rp->ShaderVisibility = visibility; +} + +static bool BuildRootSignature(ID3D12Device* device, ID3D12RootSignature** sig_ptr, + const D3D12_ROOT_PARAMETER* params, u32 num_params) +{ + D3D12_ROOT_SIGNATURE_DESC desc = {}; + desc.pParameters = params; + desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS; + desc.NumParameters = num_params; + + ComPtr root_signature_blob; + ComPtr root_signature_error_blob; + + HRESULT hr = s_d3d12_serialize_root_signature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, + &root_signature_blob, &root_signature_error_blob); + if (FAILED(hr)) + { + PanicAlert("Failed to serialize root signature: %s", + static_cast(root_signature_error_blob->GetBufferPointer())); + return false; + } + + hr = device->CreateRootSignature(0, root_signature_blob->GetBufferPointer(), + root_signature_blob->GetBufferSize(), IID_PPV_ARGS(sig_ptr)); + CHECK(SUCCEEDED(hr), "Create root signature"); + return true; +} + +bool DXContext::CreateRootSignatures() +{ + return CreateGXRootSignature() && CreateUtilityRootSignature() && CreateComputeRootSignature(); +} + +bool DXContext::CreateGXRootSignature() +{ + // GX: + // - 3 constant buffers (bindings 0-2), 0/1 visible in PS, 1 visible in VS, 2 visible in GS. + // - 8 textures (visible in PS). + // - 8 samplers (visible in PS). + // - 1 UAV (visible in PS). + + std::array params; + std::array ranges; + u32 param_count = 0; + SetRootParamCBV(¶ms[param_count], 0, D3D12_SHADER_VISIBILITY_PIXEL); + param_count++; + SetRootParamTable(¶ms[param_count], &ranges[param_count], D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, + 8, D3D12_SHADER_VISIBILITY_PIXEL); + param_count++; + SetRootParamTable(¶ms[param_count], &ranges[param_count], D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, + 0, 8, D3D12_SHADER_VISIBILITY_PIXEL); + param_count++; + SetRootParamCBV(¶ms[param_count], 0, D3D12_SHADER_VISIBILITY_VERTEX); + param_count++; + SetRootParamCBV(¶ms[param_count], 0, D3D12_SHADER_VISIBILITY_GEOMETRY); + param_count++; + + // Since these must be contiguous, pixel lighting goes to bbox if not enabled. + if (g_ActiveConfig.bBBoxEnable) + { + SetRootParamTable(¶ms[param_count], &ranges[param_count], D3D12_DESCRIPTOR_RANGE_TYPE_UAV, + 2, 1, D3D12_SHADER_VISIBILITY_PIXEL); + param_count++; + } + if (g_ActiveConfig.bEnablePixelLighting) + { + SetRootParamCBV(¶ms[param_count], 1, D3D12_SHADER_VISIBILITY_PIXEL); + param_count++; + } + + return BuildRootSignature(m_device.Get(), &m_gx_root_signature, params.data(), param_count); +} + +bool DXContext::CreateUtilityRootSignature() +{ + // Utility: + // - 1 constant buffer (binding 0, visible in VS/PS). + // - 8 textures (visible in PS). + // - 8 samplers (visible in PS). + + std::array params; + std::array ranges; + SetRootParamCBV(¶ms[ROOT_PARAMETER_PS_CBV], 0, D3D12_SHADER_VISIBILITY_ALL); + SetRootParamTable(¶ms[ROOT_PARAMETER_PS_SRV], &ranges[ROOT_PARAMETER_PS_SRV], + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 8, D3D12_SHADER_VISIBILITY_PIXEL); + SetRootParamTable(¶ms[ROOT_PARAMETER_PS_SAMPLERS], &ranges[ROOT_PARAMETER_PS_SAMPLERS], + D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 8, D3D12_SHADER_VISIBILITY_PIXEL); + return BuildRootSignature(m_device.Get(), &m_utility_root_signature, params.data(), 3); +} + +bool DXContext::CreateComputeRootSignature() +{ + // Compute: + // - 1 constant buffer (binding 0). + // - 8 textures. + // - 8 samplers. + // - 1 UAV. + + std::array params; + std::array ranges; + SetRootParamCBV(¶ms[CS_ROOT_PARAMETER_CBV], 0, D3D12_SHADER_VISIBILITY_ALL); + SetRootParamTable(¶ms[CS_ROOT_PARAMETER_SRV], &ranges[CS_ROOT_PARAMETER_CBV], + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 8, D3D12_SHADER_VISIBILITY_ALL); + SetRootParamTable(¶ms[CS_ROOT_PARAMETER_SAMPLERS], &ranges[CS_ROOT_PARAMETER_SAMPLERS], + D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 8, D3D12_SHADER_VISIBILITY_ALL); + SetRootParamTable(¶ms[CS_ROOT_PARAMETER_UAV], &ranges[CS_ROOT_PARAMETER_UAV], + D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, 1, D3D12_SHADER_VISIBILITY_ALL); + return BuildRootSignature(m_device.Get(), &m_compute_root_signature, params.data(), 4); +} + +bool DXContext::CreateTextureUploadBuffer() +{ + if (!m_texture_upload_buffer.AllocateBuffer(TEXTURE_UPLOAD_BUFFER_SIZE)) + { + PanicAlert("Failed to create texture upload buffer"); + return false; + } + + return true; +} + +bool DXContext::CreateCommandLists() +{ + static constexpr size_t MAX_DRAWS_PER_FRAME = 8192; + static constexpr size_t TEMPORARY_SLOTS = MAX_DRAWS_PER_FRAME * 8; + + for (u32 i = 0; i < NUM_COMMAND_LISTS; i++) + { + CommandListResources& res = m_command_lists[i]; + HRESULT hr = m_device->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(res.command_allocator.GetAddressOf())); + CHECK(SUCCEEDED(hr), "Create command allocator"); + if (FAILED(hr)) + return false; + + hr = m_device->CreateCommandList(1, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocator.Get(), + nullptr, IID_PPV_ARGS(res.command_list.GetAddressOf())); + if (FAILED(hr)) + { + PanicAlert("Failed to create command list."); + return false; + } + + // Close the command list, since the first thing we do is reset them. + hr = res.command_list->Close(); + CHECK(SUCCEEDED(hr), "Closing new command list failed"); + if (FAILED(hr)) + return false; + + if (!res.descriptor_allocator.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + TEMPORARY_SLOTS) || + !res.sampler_allocator.Create(m_device.Get())) + { + return false; + } + } + + MoveToNextCommandList(); + return true; +} + +void DXContext::MoveToNextCommandList() +{ + m_current_command_list = (m_current_command_list + 1) % NUM_COMMAND_LISTS; + m_current_fence_value++; + + // We may have to wait if this command list hasn't finished on the GPU. + CommandListResources& res = m_command_lists[m_current_command_list]; + WaitForFence(res.ready_fence_value); + + // Begin command list. + res.command_allocator->Reset(); + res.command_list->Reset(res.command_allocator.Get(), nullptr); + res.descriptor_allocator.Reset(); + if (res.sampler_allocator.ShouldReset()) + res.sampler_allocator.Reset(); + m_gpu_descriptor_heaps[0] = res.descriptor_allocator.GetDescriptorHeap(); + m_gpu_descriptor_heaps[1] = res.sampler_allocator.GetDescriptorHeap(); + res.ready_fence_value = m_current_fence_value; +} + +void DXContext::ExecuteCommandList(bool wait_for_completion) +{ + CommandListResources& res = m_command_lists[m_current_command_list]; + + // Close and queue command list. + HRESULT hr = res.command_list->Close(); + CHECK(SUCCEEDED(hr), "Close command list"); + ID3D12CommandList* const execute_lists[] = {res.command_list.Get()}; + m_command_queue->ExecuteCommandLists(static_cast(ArraySize(execute_lists)), execute_lists); + + // Update fence when GPU has completed. + hr = m_command_queue->Signal(m_fence.Get(), m_current_fence_value); + CHECK(SUCCEEDED(hr), "Signal fence"); + + MoveToNextCommandList(); + if (wait_for_completion) + WaitForFence(res.ready_fence_value); +} + +void DXContext::DeferResourceDestruction(ID3D12Resource* resource) +{ + resource->AddRef(); + m_command_lists[m_current_command_list].pending_resources.push_back(resource); +} + +void DXContext::DeferDescriptorDestruction(DescriptorHeapManager& manager, u32 index) +{ + m_command_lists[m_current_command_list].pending_descriptors.emplace_back(manager, index); +} + +void DXContext::ResetSamplerAllocators() +{ + for (CommandListResources& res : m_command_lists) + res.sampler_allocator.Reset(); +} + +void DXContext::RecreateGXRootSignature() +{ + m_gx_root_signature.Reset(); + if (!CreateGXRootSignature()) + PanicAlert("Failed to re-create GX root signature."); +} + +void DXContext::DestroyPendingResources(CommandListResources& cmdlist) +{ + for (const auto& dd : cmdlist.pending_descriptors) + dd.first.Free(dd.second); + cmdlist.pending_descriptors.clear(); + + for (ID3D12Resource* res : cmdlist.pending_resources) + res->Release(); + cmdlist.pending_resources.clear(); +} + +void DXContext::WaitForFence(u64 fence) +{ + if (m_completed_fence_value >= fence) + return; + + // Try non-blocking check. + m_completed_fence_value = m_fence->GetCompletedValue(); + if (m_completed_fence_value < fence) + { + // Fall back to event. + HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event); + CHECK(SUCCEEDED(hr), "Set fence event on completion"); + WaitForSingleObject(m_fence_event, INFINITE); + m_completed_fence_value = m_fence->GetCompletedValue(); + } + + // Release resources for as many command lists which have completed. + u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS; + for (u32 i = 0; i < NUM_COMMAND_LISTS; i++) + { + CommandListResources& res = m_command_lists[index]; + if (m_completed_fence_value < res.ready_fence_value) + break; + + DestroyPendingResources(res); + index = (index + 1) % NUM_COMMAND_LISTS; + } +} +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DXContext.h b/Source/Core/VideoBackends/D3D12/DXContext.h new file mode 100644 index 0000000000..4f1993d2b5 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DXContext.h @@ -0,0 +1,191 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once +#include "Common/CommonTypes.h" +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3D12/DescriptorAllocator.h" +#include "VideoBackends/D3D12/DescriptorHeapManager.h" +#include "VideoBackends/D3D12/StreamBuffer.h" + +#include +#include +#include + +struct IDXGIFactory2; + +namespace DX12 +{ +// Vertex/Pixel shader root parameters +enum ROOT_PARAMETER +{ + ROOT_PARAMETER_PS_CBV, + ROOT_PARAMETER_PS_SRV, + ROOT_PARAMETER_PS_SAMPLERS, + ROOT_PARAMETER_VS_CBV, + ROOT_PARAMETER_GS_CBV, + ROOT_PARAMETER_PS_UAV_OR_CBV2, + ROOT_PARAMETER_PS_CBV2, // ROOT_PARAMETER_PS_UAV_OR_CBV2 if bbox is not enabled + NUM_ROOT_PARAMETERS +}; +// Compute shader root parameters +enum CS_ROOT_PARAMETERS +{ + CS_ROOT_PARAMETER_CBV, + CS_ROOT_PARAMETER_SRV, + CS_ROOT_PARAMETER_SAMPLERS, + CS_ROOT_PARAMETER_UAV, + NUM_CS_ROOT_PARAMETERS, +}; + +class DXContext +{ +public: + ~DXContext(); + + // Returns a list of AA modes. + static std::vector GetAAModes(u32 adapter_index); + + // Creates new device and context. + static bool Create(u32 adapter_index, bool enable_debug_layer); + + // Destroys active context. + static void Destroy(); + + IDXGIFactory2* GetDXGIFactory() const { return m_dxgi_factory.Get(); } + ID3D12Device* GetDevice() const { return m_device.Get(); } + ID3D12CommandQueue* GetCommandQueue() const { return m_command_queue.Get(); } + + // Returns the current command list, commands can be recorded directly. + ID3D12GraphicsCommandList* GetCommandList() const + { + return m_command_lists[m_current_command_list].command_list.Get(); + } + DescriptorAllocator* GetDescriptorAllocator() + { + return &m_command_lists[m_current_command_list].descriptor_allocator; + } + SamplerAllocator* GetSamplerAllocator() + { + return &m_command_lists[m_current_command_list].sampler_allocator; + } + + // Descriptor manager access. + DescriptorHeapManager& GetDescriptorHeapManager() { return m_descriptor_heap_manager; } + DescriptorHeapManager& GetRTVHeapManager() { return m_rtv_heap_manager; } + DescriptorHeapManager& GetDSVHeapManager() { return m_dsv_heap_manager; } + SamplerHeapManager& GetSamplerHeapManager() { return m_sampler_heap_manager; } + ID3D12DescriptorHeap* const* GetGPUDescriptorHeaps() const + { + return m_gpu_descriptor_heaps.data(); + } + u32 GetGPUDescriptorHeapCount() const { return static_cast(m_gpu_descriptor_heaps.size()); } + const DescriptorHandle& GetNullSRVDescriptor() const { return m_null_srv_descriptor; } + + // Root signature access. + ID3D12RootSignature* GetGXRootSignature() const { return m_gx_root_signature.Get(); } + ID3D12RootSignature* GetUtilityRootSignature() const { return m_utility_root_signature.Get(); } + ID3D12RootSignature* GetComputeRootSignature() const { return m_compute_root_signature.Get(); } + + // Fence value for current command list. + u64 GetCurrentFenceValue() const { return m_current_fence_value; } + + // Last "completed" fence. + u64 GetCompletedFenceValue() const { return m_completed_fence_value; } + + // Texture streaming buffer for uploads. + StreamBuffer& GetTextureUploadBuffer() { return m_texture_upload_buffer; } + + // Feature level to use when compiling shaders. + D3D_FEATURE_LEVEL GetFeatureLevel() const { return m_feature_level; } + + // Test for support for the specified texture format. + bool SupportsTextureFormat(DXGI_FORMAT format); + + // Creates command lists, global buffers and descriptor heaps. + bool CreateGlobalResources(); + + // Executes the current command list. + void ExecuteCommandList(bool wait_for_completion); + + // Waits for a specific fence. + void WaitForFence(u64 fence); + + // Defers destruction of a D3D resource (associates it with the current list). + void DeferResourceDestruction(ID3D12Resource* resource); + + // Defers destruction of a descriptor handle (associates it with the current list). + void DeferDescriptorDestruction(DescriptorHeapManager& manager, u32 index); + + // Clears all samplers from the per-frame allocators. + void ResetSamplerAllocators(); + + // Re-creates the root signature. Call when the host config changes (e.g. bbox/per-pixel shading). + void RecreateGXRootSignature(); + +private: + // Number of command lists. One is being built while the other(s) are executed. + static const u32 NUM_COMMAND_LISTS = 3; + + // Textures that don't fit into this buffer will be uploaded with a staging buffer. + static const u32 TEXTURE_UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024; + + struct CommandListResources + { + ComPtr command_allocator; + ComPtr command_list; + DescriptorAllocator descriptor_allocator; + SamplerAllocator sampler_allocator; + std::vector pending_resources; + std::vector> pending_descriptors; + u64 ready_fence_value = 0; + }; + + DXContext(); + + bool CreateDXGIFactory(bool enable_debug_layer); + bool CreateDevice(u32 adapter_index, bool enable_debug_layer); + bool CreateCommandQueue(); + bool CreateFence(); + bool CreateDescriptorHeaps(); + bool CreateRootSignatures(); + bool CreateGXRootSignature(); + bool CreateUtilityRootSignature(); + bool CreateComputeRootSignature(); + bool CreateTextureUploadBuffer(); + bool CreateCommandLists(); + void MoveToNextCommandList(); + void DestroyPendingResources(CommandListResources& cmdlist); + + ComPtr m_dxgi_factory; + ComPtr m_debug_interface; + ComPtr m_device; + ComPtr m_command_queue; + + ComPtr m_fence = nullptr; + HANDLE m_fence_event = {}; + u32 m_current_fence_value = 0; + u64 m_completed_fence_value = 0; + + std::array m_command_lists; + u32 m_current_command_list = NUM_COMMAND_LISTS - 1; + + DescriptorHeapManager m_descriptor_heap_manager; + DescriptorHeapManager m_rtv_heap_manager; + DescriptorHeapManager m_dsv_heap_manager; + SamplerHeapManager m_sampler_heap_manager; + std::array m_gpu_descriptor_heaps = {}; + DescriptorHandle m_null_srv_descriptor; + D3D_FEATURE_LEVEL m_feature_level = D3D_FEATURE_LEVEL_11_0; + + ComPtr m_gx_root_signature; + ComPtr m_utility_root_signature; + ComPtr m_compute_root_signature; + + StreamBuffer m_texture_upload_buffer; +}; + +extern std::unique_ptr g_dx_context; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DXPipeline.cpp b/Source/Core/VideoBackends/D3D12/DXPipeline.cpp new file mode 100644 index 0000000000..0c896bb425 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DXPipeline.cpp @@ -0,0 +1,217 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/Assert.h" +#include "Common/MsgHandler.h" + +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3D12/DXContext.h" +#include "VideoBackends/D3D12/DXPipeline.h" +#include "VideoBackends/D3D12/DXShader.h" +#include "VideoBackends/D3D12/DXTexture.h" +#include "VideoBackends/D3D12/DXVertexFormat.h" + +namespace DX12 +{ +DXPipeline::DXPipeline(ID3D12PipelineState* pipeline, ID3D12RootSignature* root_signature, + AbstractPipelineUsage usage, D3D12_PRIMITIVE_TOPOLOGY primitive_topology, + bool use_integer_rtv) + : m_pipeline(pipeline), m_root_signature(root_signature), m_usage(usage), + m_primitive_topology(primitive_topology), m_use_integer_rtv(use_integer_rtv) +{ +} + +DXPipeline::~DXPipeline() +{ + m_pipeline->Release(); +} + +static D3D12_PRIMITIVE_TOPOLOGY GetD3DTopology(const RasterizationState& state) +{ + switch (state.primitive) + { + case PrimitiveType::Points: + return D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + case PrimitiveType::Lines: + return D3D_PRIMITIVE_TOPOLOGY_LINELIST; + case PrimitiveType::Triangles: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case PrimitiveType::TriangleStrip: + default: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + } +} + +static D3D12_PRIMITIVE_TOPOLOGY_TYPE GetD3DTopologyType(const RasterizationState& state) +{ + switch (state.primitive) + { + case PrimitiveType::Points: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + case PrimitiveType::Lines: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + case PrimitiveType::Triangles: + case PrimitiveType::TriangleStrip: + default: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + } +} + +static void GetD3DRasterizerDesc(D3D12_RASTERIZER_DESC* desc, const RasterizationState& rs_state, + const FramebufferState& fb_state) +{ + // No CULL_ALL here. + static constexpr std::array cull_modes = { + {D3D12_CULL_MODE_NONE, D3D12_CULL_MODE_BACK, D3D12_CULL_MODE_FRONT, D3D12_CULL_MODE_FRONT}}; + + desc->FillMode = D3D12_FILL_MODE_SOLID; + desc->CullMode = cull_modes[rs_state.cullmode]; + desc->MultisampleEnable = fb_state.samples > 1; +} + +static void GetD3DDepthDesc(D3D12_DEPTH_STENCIL_DESC* desc, const DepthState& state) +{ + // Less/greater are swapped due to inverted depth. + static constexpr std::array compare_funcs = { + {D3D12_COMPARISON_FUNC_NEVER, D3D12_COMPARISON_FUNC_GREATER, D3D12_COMPARISON_FUNC_EQUAL, + D3D12_COMPARISON_FUNC_GREATER_EQUAL, D3D12_COMPARISON_FUNC_LESS, + D3D12_COMPARISON_FUNC_NOT_EQUAL, D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_COMPARISON_FUNC_ALWAYS}}; + + desc->DepthEnable = state.testenable; + desc->DepthFunc = compare_funcs[state.func]; + desc->DepthWriteMask = + state.updateenable ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; +} + +static void GetD3DBlendDesc(D3D12_BLEND_DESC* desc, const BlendingState& state) +{ + static constexpr std::array src_dual_src_factors = { + {D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_DEST_COLOR, D3D12_BLEND_INV_DEST_COLOR, + D3D12_BLEND_SRC1_ALPHA, D3D12_BLEND_INV_SRC1_ALPHA, D3D12_BLEND_DEST_ALPHA, + D3D12_BLEND_INV_DEST_ALPHA}}; + static constexpr std::array dst_dual_src_factors = { + {D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_SRC_COLOR, D3D12_BLEND_INV_SRC_COLOR, + D3D12_BLEND_SRC1_ALPHA, D3D12_BLEND_INV_SRC1_ALPHA, D3D12_BLEND_DEST_ALPHA, + D3D12_BLEND_INV_DEST_ALPHA}}; + static constexpr std::array src_factors = { + {D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_DEST_COLOR, D3D12_BLEND_INV_DEST_COLOR, + D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_DEST_ALPHA, + D3D12_BLEND_INV_DEST_ALPHA}}; + + static constexpr std::array dst_factors = { + {D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_SRC_COLOR, D3D12_BLEND_INV_SRC_COLOR, + D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_DEST_ALPHA, + D3D12_BLEND_INV_DEST_ALPHA}}; + + static constexpr std::array logic_ops = { + {D3D12_LOGIC_OP_CLEAR, D3D12_LOGIC_OP_AND, D3D12_LOGIC_OP_AND_REVERSE, D3D12_LOGIC_OP_COPY, + D3D12_LOGIC_OP_AND_INVERTED, D3D12_LOGIC_OP_NOOP, D3D12_LOGIC_OP_XOR, D3D12_LOGIC_OP_OR, + D3D12_LOGIC_OP_NOR, D3D12_LOGIC_OP_EQUIV, D3D12_LOGIC_OP_INVERT, D3D12_LOGIC_OP_OR_REVERSE, + D3D12_LOGIC_OP_COPY_INVERTED, D3D12_LOGIC_OP_OR_INVERTED, D3D12_LOGIC_OP_NAND, + D3D12_LOGIC_OP_SET}}; + + desc->AlphaToCoverageEnable = FALSE; + desc->IndependentBlendEnable = FALSE; + + D3D12_RENDER_TARGET_BLEND_DESC* rtblend = &desc->RenderTarget[0]; + if (state.colorupdate) + { + rtblend->RenderTargetWriteMask |= D3D12_COLOR_WRITE_ENABLE_RED | + D3D12_COLOR_WRITE_ENABLE_GREEN | + D3D12_COLOR_WRITE_ENABLE_BLUE; + } + if (state.alphaupdate) + { + rtblend->RenderTargetWriteMask |= D3D12_COLOR_WRITE_ENABLE_ALPHA; + } + + // blend takes precedence over logic op + rtblend->BlendEnable = state.blendenable; + if (state.blendenable) + { + rtblend->BlendOp = state.subtract ? D3D12_BLEND_OP_REV_SUBTRACT : D3D12_BLEND_OP_ADD; + rtblend->BlendOpAlpha = state.subtractAlpha ? D3D12_BLEND_OP_REV_SUBTRACT : D3D12_BLEND_OP_ADD; + if (state.usedualsrc) + { + rtblend->SrcBlend = src_dual_src_factors[state.srcfactor]; + rtblend->SrcBlendAlpha = src_dual_src_factors[state.srcfactoralpha]; + rtblend->DestBlend = dst_dual_src_factors[state.dstfactor]; + rtblend->DestBlendAlpha = dst_dual_src_factors[state.dstfactoralpha]; + } + else + { + rtblend->SrcBlend = src_factors[state.srcfactor]; + rtblend->SrcBlendAlpha = src_factors[state.srcfactoralpha]; + rtblend->DestBlend = dst_factors[state.dstfactor]; + rtblend->DestBlendAlpha = dst_factors[state.dstfactoralpha]; + } + } + else + { + rtblend->LogicOpEnable = state.logicopenable; + if (state.logicopenable) + rtblend->LogicOp = logic_ops[state.logicmode]; + } +} + +std::unique_ptr DXPipeline::Create(const AbstractPipelineConfig& config) +{ + DEBUG_ASSERT(config.vertex_shader && config.pixel_shader); + + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {}; + switch (config.usage) + { + case AbstractPipelineUsage::GX: + desc.pRootSignature = g_dx_context->GetGXRootSignature(); + break; + case AbstractPipelineUsage::Utility: + desc.pRootSignature = g_dx_context->GetUtilityRootSignature(); + break; + default: + PanicAlert("Unknown pipeline layout."); + return nullptr; + } + + if (config.vertex_shader) + desc.VS = static_cast(config.vertex_shader)->GetD3DByteCode(); + if (config.geometry_shader) + desc.GS = static_cast(config.geometry_shader)->GetD3DByteCode(); + if (config.pixel_shader) + desc.PS = static_cast(config.pixel_shader)->GetD3DByteCode(); + + GetD3DBlendDesc(&desc.BlendState, config.blending_state); + desc.SampleMask = 0xFFFFFFFF; + GetD3DRasterizerDesc(&desc.RasterizerState, config.rasterization_state, config.framebuffer_state); + GetD3DDepthDesc(&desc.DepthStencilState, config.depth_state); + if (config.vertex_format) + static_cast(config.vertex_format)->GetInputLayoutDesc(&desc.InputLayout); + desc.IBStripCutValue = config.rasterization_state.primitive == PrimitiveType::TriangleStrip ? + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF : + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; + desc.PrimitiveTopologyType = GetD3DTopologyType(config.rasterization_state); + if (config.framebuffer_state.color_texture_format != AbstractTextureFormat::Undefined) + { + desc.NumRenderTargets = 1; + desc.RTVFormats[0] = D3DCommon::GetRTVFormatForAbstractFormat( + config.framebuffer_state.color_texture_format, config.blending_state.logicopenable); + } + if (config.framebuffer_state.depth_texture_format != AbstractTextureFormat::Undefined) + desc.DSVFormat = + D3DCommon::GetDSVFormatForAbstractFormat(config.framebuffer_state.depth_texture_format); + desc.SampleDesc.Count = config.framebuffer_state.samples; + desc.NodeMask = 1; + + ID3D12PipelineState* pso; + HRESULT hr = g_dx_context->GetDevice()->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&pso)); + CHECK(SUCCEEDED(hr), "Create PSO"); + if (FAILED(hr)) + return nullptr; + + const bool use_integer_rtv = + !config.blending_state.blendenable && config.blending_state.logicopenable; + return std::make_unique(pso, desc.pRootSignature, config.usage, + GetD3DTopology(config.rasterization_state), use_integer_rtv); +} +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DXPipeline.h b/Source/Core/VideoBackends/D3D12/DXPipeline.h new file mode 100644 index 0000000000..04608cae1f --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DXPipeline.h @@ -0,0 +1,38 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "VideoCommon/AbstractPipeline.h" + +namespace DX12 +{ +class DXPipeline final : public AbstractPipeline +{ +public: + DXPipeline(ID3D12PipelineState* pipeline, ID3D12RootSignature* root_signature, + AbstractPipelineUsage usage, D3D12_PRIMITIVE_TOPOLOGY primitive_topology, + bool use_integer_rtv); + ~DXPipeline() override; + + static std::unique_ptr Create(const AbstractPipelineConfig& config); + + ID3D12PipelineState* GetPipeline() const { return m_pipeline; } + ID3D12RootSignature* GetRootSignature() const { return m_root_signature; } + AbstractPipelineUsage GetUsage() const { return m_usage; } + D3D12_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_primitive_topology; } + bool UseIntegerRTV() const { return m_use_integer_rtv; } + +private: + ID3D12PipelineState* m_pipeline; + ID3D12RootSignature* m_root_signature; + AbstractPipelineUsage m_usage; + D3D12_PRIMITIVE_TOPOLOGY m_primitive_topology; + bool m_use_integer_rtv; +}; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DXShader.cpp b/Source/Core/VideoBackends/D3D12/DXShader.cpp new file mode 100644 index 0000000000..e82d2bfc3a --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DXShader.cpp @@ -0,0 +1,55 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/D3D12/DXShader.h" +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3D12/DXContext.h" + +namespace DX12 +{ +DXShader::DXShader(ShaderStage stage, BinaryData bytecode) + : D3DCommon::Shader(stage, std::move(bytecode)) +{ +} + +DXShader::~DXShader() = default; + +std::unique_ptr DXShader::CreateFromBytecode(ShaderStage stage, BinaryData bytecode) +{ + std::unique_ptr shader(new DXShader(stage, std::move(bytecode))); + if (stage == ShaderStage::Compute && !shader->CreateComputePipeline()) + return nullptr; + + return shader; +} + +std::unique_ptr DXShader::CreateFromSource(ShaderStage stage, const char* source, + size_t length) +{ + BinaryData bytecode; + if (!CompileShader(g_dx_context->GetFeatureLevel(), &bytecode, stage, source, length)) + return nullptr; + + return CreateFromBytecode(stage, std::move(bytecode)); +} + +D3D12_SHADER_BYTECODE DXShader::GetD3DByteCode() const +{ + return D3D12_SHADER_BYTECODE{m_bytecode.data(), m_bytecode.size()}; +} + +bool DXShader::CreateComputePipeline() +{ + D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {}; + desc.pRootSignature = g_dx_context->GetComputeRootSignature(); + desc.CS = GetD3DByteCode(); + desc.NodeMask = 1; + + HRESULT hr = g_dx_context->GetDevice()->CreateComputePipelineState( + &desc, IID_PPV_ARGS(&m_compute_pipeline)); + CHECK(SUCCEEDED(hr), "Creating compute pipeline failed"); + return SUCCEEDED(hr); +} + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DXShader.h b/Source/Core/VideoBackends/D3D12/DXShader.h new file mode 100644 index 0000000000..cfc9d466de --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DXShader.h @@ -0,0 +1,32 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once +#include +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3DCommon/Shader.h" + +namespace DX12 +{ +class DXShader final : public D3DCommon::Shader +{ +public: + ~DXShader() override; + + ID3D12PipelineState* GetComputePipeline() const { return m_compute_pipeline.Get(); } + D3D12_SHADER_BYTECODE GetD3DByteCode() const; + + static std::unique_ptr CreateFromBytecode(ShaderStage stage, BinaryData bytecode); + static std::unique_ptr CreateFromSource(ShaderStage stage, const char* source, + size_t length); + +private: + DXShader(ShaderStage stage, BinaryData bytecode); + + bool CreateComputePipeline(); + + ComPtr m_compute_pipeline; +}; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DXTexture.cpp b/Source/Core/VideoBackends/D3D12/DXTexture.cpp new file mode 100644 index 0000000000..f27ba5ad54 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DXTexture.cpp @@ -0,0 +1,666 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/D3D12/DXTexture.h" +#include "Common/Align.h" +#include "Common/Assert.h" +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3D12/DXContext.h" +#include "VideoBackends/D3D12/DescriptorHeapManager.h" +#include "VideoBackends/D3D12/Renderer.h" +#include "VideoBackends/D3D12/StreamBuffer.h" + +namespace DX12 +{ +static D3D12_BOX RectangleToBox(const MathUtil::Rectangle& rc) +{ + return D3D12_BOX{static_cast(rc.left), static_cast(rc.top), 0, + static_cast(rc.right), static_cast(rc.bottom), 1}; +} + +static ComPtr CreateTextureUploadBuffer(u32 buffer_size) +{ + const D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_UPLOAD}; + const D3D12_RESOURCE_DESC desc = {D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + buffer_size, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + + ComPtr resource; + HRESULT hr = g_dx_context->GetDevice()->CreateCommittedResource( + &heap_properties, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&resource)); + CHECK(SUCCEEDED(hr), "Create texture upload buffer"); + return resource; +} + +DXTexture::DXTexture(const TextureConfig& config, ID3D12Resource* resource, + D3D12_RESOURCE_STATES state) + : AbstractTexture(config), m_resource(resource), m_state(state) +{ +} + +DXTexture::~DXTexture() +{ + if (m_uav_descriptor) + { + g_dx_context->DeferDescriptorDestruction(g_dx_context->GetDescriptorHeapManager(), + m_uav_descriptor.index); + } + + if (m_srv_descriptor) + { + g_dx_context->DeferDescriptorDestruction(g_dx_context->GetDescriptorHeapManager(), + m_srv_descriptor.index); + } + if (m_resource) + g_dx_context->DeferResourceDestruction(m_resource.Get()); +} + +std::unique_ptr DXTexture::Create(const TextureConfig& config) +{ + constexpr D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_DEFAULT}; + D3D12_RESOURCE_STATES resource_state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + D3D12_RESOURCE_FLAGS resource_flags = D3D12_RESOURCE_FLAG_NONE; + if (config.IsRenderTarget()) + { + if (IsDepthFormat(config.format)) + { + resource_state = D3D12_RESOURCE_STATE_DEPTH_WRITE; + resource_flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + } + else + { + resource_state = D3D12_RESOURCE_STATE_RENDER_TARGET; + resource_flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } + } + if (config.IsComputeImage()) + resource_flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + const D3D12_RESOURCE_DESC resource_desc = { + D3D12_RESOURCE_DIMENSION_TEXTURE2D, + 0, + config.width, + config.height, + static_cast(config.layers), + static_cast(config.levels), + D3DCommon::GetDXGIFormatForAbstractFormat(config.format, config.IsRenderTarget()), + {config.samples, 0}, + D3D12_TEXTURE_LAYOUT_UNKNOWN, + resource_flags}; + + D3D12_CLEAR_VALUE optimized_clear_value = {}; + if (config.IsRenderTarget()) + { + optimized_clear_value.Format = + IsDepthFormat(config.format) ? + D3DCommon::GetDSVFormatForAbstractFormat(config.format) : + D3DCommon::GetRTVFormatForAbstractFormat(config.format, false); + } + + ComPtr resource; + HRESULT hr = g_dx_context->GetDevice()->CreateCommittedResource( + &heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, resource_state, + config.IsRenderTarget() ? &optimized_clear_value : nullptr, IID_PPV_ARGS(&resource)); + CHECK(SUCCEEDED(hr), "Create D3D12 texture resource"); + if (FAILED(hr)) + return nullptr; + + auto tex = std::unique_ptr(new DXTexture(config, resource.Get(), resource_state)); + if (!tex->CreateSRVDescriptor() || (config.IsComputeImage() && !tex->CreateUAVDescriptor())) + return nullptr; + + return tex; +} + +std::unique_ptr DXTexture::CreateAdopted(ID3D12Resource* resource) +{ + const D3D12_RESOURCE_DESC desc = resource->GetDesc(); + const AbstractTextureFormat format = D3DCommon::GetAbstractFormatForDXGIFormat(desc.Format); + if (desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D || + format == AbstractTextureFormat::Undefined) + { + PanicAlert("Unknown format for adopted texture"); + return nullptr; + } + + TextureConfig config(static_cast(desc.Width), desc.Height, desc.MipLevels, + desc.DepthOrArraySize, desc.SampleDesc.Count, format, 0); + if (desc.Flags & + (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) + { + config.flags |= AbstractTextureFlag_RenderTarget; + } + if (desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) + config.flags |= AbstractTextureFlag_ComputeImage; + + auto tex = + std::unique_ptr(new DXTexture(config, resource, D3D12_RESOURCE_STATE_COMMON)); + if (!tex->CreateSRVDescriptor()) + return nullptr; + + return tex; +} + +bool DXTexture::CreateSRVDescriptor() +{ + if (!g_dx_context->GetDescriptorHeapManager().Allocate(&m_srv_descriptor)) + { + PanicAlert("Failed to allocate SRV descriptor"); + return false; + } + + D3D12_SHADER_RESOURCE_VIEW_DESC desc = {D3DCommon::GetSRVFormatForAbstractFormat(m_config.format), + m_config.IsMultisampled() ? + D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY : + D3D12_SRV_DIMENSION_TEXTURE2DARRAY, + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; + if (m_config.IsMultisampled()) + { + desc.Texture2DMSArray.ArraySize = m_config.layers; + } + else + { + desc.Texture2DArray.MipLevels = m_config.levels; + desc.Texture2DArray.ArraySize = m_config.layers; + } + g_dx_context->GetDevice()->CreateShaderResourceView(m_resource.Get(), &desc, + m_srv_descriptor.cpu_handle); + return true; +} + +bool DXTexture::CreateUAVDescriptor() +{ + if (!g_dx_context->GetDescriptorHeapManager().Allocate(&m_uav_descriptor)) + { + PanicAlert("Failed to allocate UAV descriptor"); + return false; + } + + D3D12_UNORDERED_ACCESS_VIEW_DESC desc = { + D3DCommon::GetSRVFormatForAbstractFormat(m_config.format), + D3D12_UAV_DIMENSION_TEXTURE2DARRAY}; + desc.Texture2DArray.ArraySize = m_config.layers; + g_dx_context->GetDevice()->CreateUnorderedAccessView(m_resource.Get(), nullptr, &desc, + m_uav_descriptor.cpu_handle); + + return true; +} + +void DXTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, + size_t buffer_size) +{ + // Textures greater than 1024*1024 will be put in staging textures that are released after + // execution instead. A 2048x2048 texture is 16MB, and we'd only fit four of these in our + // streaming buffer and be blocking frequently. Games are unlikely to have textures this + // large anyway, so it's only really an issue for HD texture packs, and memory is not + // a limiting factor in these scenarios anyway. + constexpr u32 STAGING_BUFFER_UPLOAD_THRESHOLD = 1024 * 1024 * 4; + + // Determine the stride in the stream buffer. It must be aligned to 256 bytes. + const u32 block_size = GetBlockSizeForFormat(GetFormat()); + const u32 num_rows = Common::AlignUp(height, block_size) / block_size; + const u32 source_stride = CalculateStrideForFormat(m_config.format, row_length); + const u32 upload_stride = Common::AlignUp(source_stride, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + const u32 upload_size = upload_stride * num_rows; + + // Both paths need us in COPY_DEST state, and avoids switching back and forth for mips. + TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); + + ComPtr staging_buffer; + ID3D12Resource* upload_buffer_resource; + void* upload_buffer_ptr; + u32 upload_buffer_offset; + if (upload_size >= STAGING_BUFFER_UPLOAD_THRESHOLD) + { + const D3D12_RANGE read_range = {0, 0}; + staging_buffer = CreateTextureUploadBuffer(upload_size); + if (!staging_buffer || FAILED(staging_buffer->Map(0, &read_range, &upload_buffer_ptr))) + { + PanicAlert("Failed to allocate/map temporary texture upload buffer"); + return; + } + + // We defer releasing the buffer until after the command list with the copy has executed. + g_dx_context->DeferResourceDestruction(staging_buffer.Get()); + upload_buffer_resource = staging_buffer.Get(); + upload_buffer_offset = 0; + } + else + { + if (!g_dx_context->GetTextureUploadBuffer().ReserveMemory( + upload_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) + { + WARN_LOG(VIDEO, "Executing command list while waiting for space in texture upload buffer"); + Renderer::GetInstance()->ExecuteCommandList(false); + if (!g_dx_context->GetTextureUploadBuffer().ReserveMemory( + upload_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) + { + PanicAlert("Failed to allocate texture upload buffer"); + return; + } + } + + upload_buffer_resource = g_dx_context->GetTextureUploadBuffer().GetBuffer(); + upload_buffer_ptr = g_dx_context->GetTextureUploadBuffer().GetCurrentHostPointer(); + upload_buffer_offset = g_dx_context->GetTextureUploadBuffer().GetCurrentOffset(); + } + + // Copy in, slow path if the pitch differs. + if (source_stride != upload_stride) + { + const u8* src_ptr = buffer; + const u32 copy_size = std::min(source_stride, upload_stride); + u8* dst_ptr = reinterpret_cast(upload_buffer_ptr); + for (u32 i = 0; i < num_rows; i++) + { + std::memcpy(dst_ptr, src_ptr, copy_size); + src_ptr += source_stride; + dst_ptr += upload_stride; + } + } + else + { + std::memcpy(upload_buffer_ptr, buffer, std::min(buffer_size, upload_size)); + } + + if (staging_buffer) + { + const D3D12_RANGE write_range = {0, std::min(buffer_size, upload_size)}; + staging_buffer->Unmap(0, &write_range); + } + else + { + g_dx_context->GetTextureUploadBuffer().CommitMemory(upload_size); + } + + // Issue copy from buffer->texture. + const u32 aligned_width = Common::AlignUp(width, block_size); + const u32 aligned_height = Common::AlignUp(height, block_size); + const D3D12_TEXTURE_COPY_LOCATION dst_loc = {m_resource.Get(), + D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + {static_cast(CalcSubresource(level, 0))}}; + const D3D12_TEXTURE_COPY_LOCATION src_loc = { + upload_buffer_resource, + D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + {{upload_buffer_offset, D3DCommon::GetDXGIFormatForAbstractFormat(m_config.format, false), + aligned_width, aligned_height, 1, upload_stride}}}; + const D3D12_BOX src_box{0, 0, 0, aligned_width, aligned_height, 1}; + g_dx_context->GetCommandList()->CopyTextureRegion(&dst_loc, 0, 0, 0, &src_loc, &src_box); + + // Preemptively transition to shader read only after uploading the last mip level, as we're + // likely finished with writes to this texture for now. + if (level == (m_config.levels - 1)) + TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); +} + +void DXTexture::CopyRectangleFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect, + u32 dst_layer, u32 dst_level) +{ + const DXTexture* src_dxtex = static_cast(src); + ASSERT(static_cast(src_rect.right) <= src->GetWidth() && + static_cast(src_rect.bottom) <= src->GetHeight() && src_layer <= src->GetLayers() && + src_level <= src->GetLevels() && static_cast(dst_rect.right) <= GetWidth() && + static_cast(dst_rect.bottom) <= GetHeight() && dst_layer <= GetLayers() && + dst_level <= GetLevels() && src_rect.GetWidth() == dst_rect.GetWidth() && + src_rect.GetHeight() == dst_rect.GetHeight()); + const D3D12_TEXTURE_COPY_LOCATION dst_loc = { + m_resource.Get(), + D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + {static_cast(CalcSubresource(dst_level, dst_layer))}}; + const D3D12_TEXTURE_COPY_LOCATION src_loc = { + src_dxtex->m_resource.Get(), + D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + {static_cast(src_dxtex->CalcSubresource(src_level, src_layer))}}; + const D3D12_BOX src_box = RectangleToBox(src_rect); + const D3D12_RESOURCE_STATES old_src_state = src_dxtex->m_state; + src_dxtex->TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); + TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); + + g_dx_context->GetCommandList()->CopyTextureRegion(&dst_loc, dst_rect.left, dst_rect.top, 0, + &src_loc, &src_box); + + // Only restore the source layout. Destination is restored by FinishedRendering(). + src_dxtex->TransitionToState(old_src_state); +} + +void DXTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, + u32 layer, u32 level) +{ + const DXTexture* src_dxtex = static_cast(src); + + D3D12_RESOURCE_STATES old_src_state = src_dxtex->m_state; + src_dxtex->TransitionToState(D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + TransitionToState(D3D12_RESOURCE_STATE_RESOLVE_DEST); + + g_dx_context->GetCommandList()->ResolveSubresource( + m_resource.Get(), CalcSubresource(level, layer), src_dxtex->m_resource.Get(), + src_dxtex->CalcSubresource(level, layer), + D3DCommon::GetDXGIFormatForAbstractFormat(m_config.format, false)); + + // Only restore the source layout. Destination is restored by FinishedRendering(). + src_dxtex->TransitionToState(old_src_state); +} + +void DXTexture::FinishedRendering() +{ + if (m_state != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); +} + +void DXTexture::TransitionToState(D3D12_RESOURCE_STATES state) const +{ + if (m_state == state) + return; + + ResourceBarrier(g_dx_context->GetCommandList(), m_resource.Get(), m_state, state); + m_state = state; +} + +void DXTexture::DestroyResource() +{ + if (m_uav_descriptor) + g_dx_context->GetDescriptorHeapManager().Free(m_uav_descriptor); + + if (m_srv_descriptor) + g_dx_context->GetDescriptorHeapManager().Free(m_srv_descriptor); + + m_resource.Reset(); +} + +DXFramebuffer::DXFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, + u32 width, u32 height, u32 layers, u32 samples) + : AbstractFramebuffer(color_attachment, depth_attachment, color_format, depth_format, width, + height, layers, samples) +{ +} + +DXFramebuffer::~DXFramebuffer() +{ + if (m_depth_attachment) + g_dx_context->DeferDescriptorDestruction(g_dx_context->GetDSVHeapManager(), + m_dsv_descriptor.index); + if (m_color_attachment) + { + if (m_int_rtv_descriptor) + { + g_dx_context->DeferDescriptorDestruction(g_dx_context->GetRTVHeapManager(), + m_int_rtv_descriptor.index); + } + g_dx_context->DeferDescriptorDestruction(g_dx_context->GetRTVHeapManager(), + m_rtv_descriptor.index); + } +} + +std::unique_ptr DXFramebuffer::Create(DXTexture* color_attachment, + DXTexture* depth_attachment) +{ + if (!ValidateConfig(color_attachment, depth_attachment)) + return nullptr; + + const AbstractTextureFormat color_format = + color_attachment ? color_attachment->GetFormat() : AbstractTextureFormat::Undefined; + const AbstractTextureFormat depth_format = + depth_attachment ? depth_attachment->GetFormat() : AbstractTextureFormat::Undefined; + const DXTexture* either_attachment = color_attachment ? color_attachment : depth_attachment; + const u32 width = either_attachment->GetWidth(); + const u32 height = either_attachment->GetHeight(); + const u32 layers = either_attachment->GetLayers(); + const u32 samples = either_attachment->GetSamples(); + + std::unique_ptr fb(new DXFramebuffer(color_attachment, depth_attachment, + color_format, depth_format, width, height, + layers, samples)); + if ((color_attachment && !fb->CreateRTVDescriptor()) || + (depth_attachment && !fb->CreateDSVDescriptor())) + { + return nullptr; + } + + return fb; +} + +bool DXFramebuffer::CreateRTVDescriptor() +{ + if (!g_dx_context->GetRTVHeapManager().Allocate(&m_rtv_descriptor)) + { + PanicAlert("Failed to allocate RTV descriptor"); + return false; + } + + const bool multisampled = m_samples > 1; + D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = { + D3DCommon::GetRTVFormatForAbstractFormat(m_color_format, false), + multisampled ? D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY : D3D12_RTV_DIMENSION_TEXTURE2DARRAY}; + if (multisampled) + rtv_desc.Texture2DMSArray.ArraySize = m_layers; + else + rtv_desc.Texture2DArray.ArraySize = m_layers; + g_dx_context->GetDevice()->CreateRenderTargetView( + static_cast(m_color_attachment)->GetResource(), &rtv_desc, + m_rtv_descriptor.cpu_handle); + + DXGI_FORMAT int_format = D3DCommon::GetRTVFormatForAbstractFormat(m_color_format, true); + if (int_format != rtv_desc.Format) + { + if (!g_dx_context->GetRTVHeapManager().Allocate(&m_int_rtv_descriptor)) + return false; + + rtv_desc.Format = int_format; + g_dx_context->GetDevice()->CreateRenderTargetView( + static_cast(m_color_attachment)->GetResource(), &rtv_desc, + m_int_rtv_descriptor.cpu_handle); + } + + return true; +} + +bool DXFramebuffer::CreateDSVDescriptor() +{ + if (!g_dx_context->GetDSVHeapManager().Allocate(&m_dsv_descriptor)) + { + PanicAlert("Failed to allocate RTV descriptor"); + return false; + } + + const bool multisampled = m_samples > 1; + D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = { + D3DCommon::GetDSVFormatForAbstractFormat(m_depth_format), + multisampled ? D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY : D3D12_DSV_DIMENSION_TEXTURE2DARRAY, + D3D12_DSV_FLAG_NONE}; + if (multisampled) + dsv_desc.Texture2DMSArray.ArraySize = m_layers; + else + dsv_desc.Texture2DArray.ArraySize = m_layers; + g_dx_context->GetDevice()->CreateDepthStencilView( + static_cast(m_depth_attachment)->GetResource(), &dsv_desc, + m_dsv_descriptor.cpu_handle); + return true; +} + +DXStagingTexture::DXStagingTexture(StagingTextureType type, const TextureConfig& config, + ID3D12Resource* resource, u32 stride, u32 buffer_size) + : AbstractStagingTexture(type, config), m_resource(resource), m_buffer_size(buffer_size) +{ + m_map_stride = stride; +} + +DXStagingTexture::~DXStagingTexture() +{ + g_dx_context->DeferResourceDestruction(m_resource.Get()); +} + +void DXStagingTexture::CopyFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect) +{ + const DXTexture* src_tex = static_cast(src); + ASSERT(m_type == StagingTextureType::Readback || m_type == StagingTextureType::Mutable); + ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && + src_rect.GetHeight() == dst_rect.GetHeight()); + ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= src_tex->GetWidth() && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= src_tex->GetHeight()); + ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= m_config.width && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= m_config.height); + + const D3D12_RESOURCE_STATES old_state = src_tex->GetState(); + src_tex->TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); + + // Can't copy while it's mapped like in Vulkan. + Unmap(); + + // Copy from VRAM -> host-visible memory. + const D3D12_TEXTURE_COPY_LOCATION dst_loc = { + m_resource.Get(), + D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + {0, + {D3DCommon::GetDXGIFormatForAbstractFormat(m_config.format, false), m_config.width, + m_config.height, 1u, static_cast(m_map_stride)}}}; + const D3D12_TEXTURE_COPY_LOCATION src_loc = { + src_tex->GetResource(), D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + static_cast(src_tex->CalcSubresource(src_level, src_layer))}; + const D3D12_BOX src_box = RectangleToBox(src_rect); + g_dx_context->GetCommandList()->CopyTextureRegion(&dst_loc, dst_rect.left, dst_rect.top, 0, + &src_loc, &src_box); + + // Restore old source texture layout. + src_tex->TransitionToState(old_state); + + // Data is ready when the current command list is complete. + m_needs_flush = true; + m_completed_fence = g_dx_context->GetCurrentFenceValue(); +} + +void DXStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) +{ + const DXTexture* dst_tex = static_cast(dst); + ASSERT(m_type == StagingTextureType::Upload || m_type == StagingTextureType::Mutable); + ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && + src_rect.GetHeight() == dst_rect.GetHeight()); + ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= m_config.width && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= m_config.height); + ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst_tex->GetWidth() && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst_tex->GetHeight()); + + const D3D12_RESOURCE_STATES old_state = dst_tex->GetState(); + dst_tex->TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); + + // Can't copy while it's mapped like in Vulkan. + Unmap(); + + // Copy from VRAM -> host-visible memory. + const D3D12_TEXTURE_COPY_LOCATION dst_loc = { + dst_tex->GetResource(), D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + static_cast(dst_tex->CalcSubresource(dst_level, dst_layer))}; + const D3D12_TEXTURE_COPY_LOCATION src_loc = { + m_resource.Get(), + D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + {0, + {D3DCommon::GetDXGIFormatForAbstractFormat(m_config.format, false), m_config.width, + m_config.height, 1u, static_cast(m_map_stride)}}}; + const D3D12_BOX src_box = RectangleToBox(src_rect); + g_dx_context->GetCommandList()->CopyTextureRegion(&dst_loc, dst_rect.left, dst_rect.top, 0, + &src_loc, &src_box); + + // Restore old source texture layout. + dst_tex->TransitionToState(old_state); + + // Data is ready when the current command list is complete. + m_needs_flush = true; + m_completed_fence = g_dx_context->GetCurrentFenceValue(); +} + +bool DXStagingTexture::Map() +{ + if (m_map_pointer) + return true; + + const D3D12_RANGE read_range = {0u, m_type == StagingTextureType::Upload ? 0u : m_buffer_size}; + HRESULT hr = m_resource->Map(0, &read_range, reinterpret_cast(&m_map_pointer)); + CHECK(SUCCEEDED(hr), "Map resource failed"); + if (FAILED(hr)) + return false; + + return true; +} + +void DXStagingTexture::Unmap() +{ + if (!m_map_pointer) + return; + + const D3D12_RANGE write_range = {0u, m_type != StagingTextureType::Upload ? 0 : m_buffer_size}; + m_resource->Unmap(0, &write_range); + m_map_pointer = nullptr; +} + +void DXStagingTexture::Flush() +{ + if (!m_needs_flush) + return; + + m_needs_flush = false; + + // If the completed fence is the same as the current command buffer fence, we need to execute + // the current list and wait for it to complete. This is the slowest path. Otherwise, if the + // command list with the copy has been submitted, we only need to wait for the fence. + if (m_completed_fence == g_dx_context->GetCurrentFenceValue()) + Renderer::GetInstance()->ExecuteCommandList(true); + else + g_dx_context->WaitForFence(m_completed_fence); +} + +std::unique_ptr DXStagingTexture::Create(StagingTextureType type, + const TextureConfig& config) +{ + ASSERT(config.levels == 1 && config.layers == 1 && config.samples == 1); + + // Readback and mutable share the same heap type. + const bool is_upload = type == StagingTextureType::Upload; + const D3D12_HEAP_PROPERTIES heap_properties = {is_upload ? D3D12_HEAP_TYPE_UPLOAD : + D3D12_HEAP_TYPE_READBACK}; + + const u32 texel_size = AbstractTexture::GetTexelSizeForFormat(config.format); + const u32 stride = Common::AlignUp(config.width * texel_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 size = stride * config.height; + + const D3D12_RESOURCE_DESC desc = {D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + size, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + + // Readback textures are stuck in COPY_DEST and are never GPU readable. + // Upload textures are stuck in GENERIC_READ, and are never CPU readable. + ComPtr resource; + HRESULT hr = g_dx_context->GetDevice()->CreateCommittedResource( + &heap_properties, D3D12_HEAP_FLAG_NONE, &desc, + is_upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + IID_PPV_ARGS(&resource)); + CHECK(SUCCEEDED(hr), "Create staging texture resource"); + if (FAILED(hr)) + return nullptr; + + return std::unique_ptr( + new DXStagingTexture(type, config, resource.Get(), stride, size)); +} + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DXTexture.h b/Source/Core/VideoBackends/D3D12/DXTexture.h new file mode 100644 index 0000000000..5faab6cf82 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DXTexture.h @@ -0,0 +1,126 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include "Common/CommonTypes.h" +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3D12/DescriptorHeapManager.h" +#include "VideoCommon/AbstractFramebuffer.h" +#include "VideoCommon/AbstractStagingTexture.h" +#include "VideoCommon/AbstractTexture.h" + +namespace DX12 +{ +class DXTexture final : public AbstractTexture +{ +public: + ~DXTexture(); + + static std::unique_ptr Create(const TextureConfig& config); + static std::unique_ptr CreateAdopted(ID3D12Resource* resource); + + void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, + size_t buffer_size) override; + void CopyRectangleFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect, + u32 dst_layer, u32 dst_level) override; + void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, + u32 layer, u32 level) override; + void FinishedRendering() override; + + ID3D12Resource* GetResource() const { return m_resource.Get(); } + const DescriptorHandle& GetSRVDescriptor() const { return m_srv_descriptor; } + const DescriptorHandle& GetUAVDescriptor() const { return m_uav_descriptor; } + D3D12_RESOURCE_STATES GetState() const { return m_state; } + u32 CalcSubresource(u32 level, u32 layer) const { return level + layer * m_config.layers; } + + void TransitionToState(D3D12_RESOURCE_STATES state) const; + + // Destoys the resource backing this texture. The resource must not be in use by the GPU. + void DestroyResource(); + +private: + DXTexture(const TextureConfig& config, ID3D12Resource* resource, D3D12_RESOURCE_STATES state); + + bool CreateSRVDescriptor(); + bool CreateUAVDescriptor(); + + ComPtr m_resource; + DescriptorHandle m_srv_descriptor = {}; + DescriptorHandle m_uav_descriptor = {}; + + mutable D3D12_RESOURCE_STATES m_state; +}; + +class DXFramebuffer final : public AbstractFramebuffer +{ +public: + ~DXFramebuffer() override; + + const DescriptorHandle& GetRTVDescriptor() const { return m_rtv_descriptor; } + const DescriptorHandle& GetIntRTVDescriptor() const { return m_int_rtv_descriptor; } + const DescriptorHandle& GetDSVDescriptor() const { return m_dsv_descriptor; } + + UINT GetRTVDescriptorCount() const { return m_color_attachment ? 1 : 0; } + const D3D12_CPU_DESCRIPTOR_HANDLE* GetRTVDescriptorArray() const + { + return m_color_attachment ? &m_rtv_descriptor.cpu_handle : nullptr; + } + const D3D12_CPU_DESCRIPTOR_HANDLE* GetIntRTVDescriptorArray() const + { + return m_color_attachment ? &m_int_rtv_descriptor.cpu_handle : nullptr; + } + const D3D12_CPU_DESCRIPTOR_HANDLE* GetDSVDescriptorArray() const + { + return m_depth_attachment ? &m_dsv_descriptor.cpu_handle : nullptr; + } + + static std::unique_ptr Create(DXTexture* color_attachment, + DXTexture* depth_attachment); + +private: + DXFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, + u32 height, u32 layers, u32 samples); + + bool CreateRTVDescriptor(); + bool CreateDSVDescriptor(); + + DescriptorHandle m_rtv_descriptor = {}; + DescriptorHandle m_int_rtv_descriptor = {}; + DescriptorHandle m_dsv_descriptor = {}; +}; + +class DXStagingTexture final : public AbstractStagingTexture +{ +public: + ~DXStagingTexture(); + + void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) override; + void CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) override; + + bool Map() override; + void Unmap() override; + void Flush() override; + + static std::unique_ptr Create(StagingTextureType type, + const TextureConfig& config); + +private: + DXStagingTexture(StagingTextureType type, const TextureConfig& config, ID3D12Resource* resource, + u32 stride, u32 buffer_size); + + ComPtr m_resource; + u64 m_completed_fence = 0; + u32 m_buffer_size; +}; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DXVertexFormat.cpp b/Source/Core/VideoBackends/D3D12/DXVertexFormat.cpp new file mode 100644 index 0000000000..5d49810d19 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DXVertexFormat.cpp @@ -0,0 +1,130 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/D3D12/DXVertexFormat.h" + +#include "Common/Assert.h" + +#include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexShaderGen.h" + +namespace DX12 +{ +static DXGI_FORMAT VarToDXGIFormat(VarType t, u32 components, bool integer) +{ + // NOTE: 3-component formats are not valid. + static const DXGI_FORMAT float_type_lookup[][4] = { + {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE + {DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM, + DXGI_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE + {DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, + DXGI_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT + {DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM, + DXGI_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT + {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT + }; + + static const DXGI_FORMAT integer_type_lookup[][4] = { + {DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT, + DXGI_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE + {DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT, + DXGI_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE + {DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT, + DXGI_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT + {DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT + {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT + }; + + ASSERT(components > 0 && components <= 4); + return integer ? integer_type_lookup[t][components - 1] : float_type_lookup[t][components - 1]; +} + +DXVertexFormat::DXVertexFormat(const PortableVertexDeclaration& vtx_decl) + : NativeVertexFormat(vtx_decl) +{ + MapAttributes(); +} + +void DXVertexFormat::GetInputLayoutDesc(D3D12_INPUT_LAYOUT_DESC* desc) const +{ + desc->pInputElementDescs = m_attribute_descriptions.data(); + desc->NumElements = m_num_attributes; +} + +void DXVertexFormat::AddAttribute(const char* semantic_name, u32 semantic_index, u32 slot, + DXGI_FORMAT format, u32 offset) +{ + ASSERT(m_num_attributes < MAX_VERTEX_ATTRIBUTES); + + auto* attr_desc = &m_attribute_descriptions[m_num_attributes]; + attr_desc->SemanticName = semantic_name; + attr_desc->SemanticIndex = semantic_index; + attr_desc->Format = format; + attr_desc->InputSlot = slot; + attr_desc->AlignedByteOffset = offset; + attr_desc->InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + attr_desc->InstanceDataStepRate = 0; + + m_num_attributes++; +} + +void DXVertexFormat::MapAttributes() +{ + m_num_attributes = 0; + + if (m_decl.position.enable) + { + AddAttribute( + "POSITION", 0, 0, + VarToDXGIFormat(m_decl.position.type, m_decl.position.components, m_decl.position.integer), + m_decl.position.offset); + } + + for (uint32_t i = 0; i < 3; i++) + { + if (m_decl.normals[i].enable) + { + AddAttribute("NORMAL", i, 0, + VarToDXGIFormat(m_decl.normals[i].type, m_decl.normals[i].components, + m_decl.normals[i].integer), + m_decl.normals[i].offset); + } + } + + for (uint32_t i = 0; i < 2; i++) + { + if (m_decl.colors[i].enable) + { + AddAttribute("COLOR", i, 0, + VarToDXGIFormat(m_decl.colors[i].type, m_decl.colors[i].components, + m_decl.colors[i].integer), + m_decl.colors[i].offset); + } + } + + for (uint32_t i = 0; i < 8; i++) + { + if (m_decl.texcoords[i].enable) + { + AddAttribute("TEXCOORD", i, 0, + VarToDXGIFormat(m_decl.texcoords[i].type, m_decl.texcoords[i].components, + m_decl.texcoords[i].integer), + m_decl.texcoords[i].offset); + } + } + + if (m_decl.posmtx.enable) + { + AddAttribute( + "BLENDINDICES", 0, 0, + VarToDXGIFormat(m_decl.posmtx.type, m_decl.posmtx.components, m_decl.posmtx.integer), + m_decl.posmtx.offset); + } +} + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DXVertexFormat.h b/Source/Core/VideoBackends/D3D12/DXVertexFormat.h new file mode 100644 index 0000000000..02f27d544b --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DXVertexFormat.h @@ -0,0 +1,33 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "Common/CommonTypes.h" +#include "VideoCommon/NativeVertexFormat.h" + +namespace DX12 +{ +class DXVertexFormat : public NativeVertexFormat +{ +public: + static const u32 MAX_VERTEX_ATTRIBUTES = 16; + + DXVertexFormat(const PortableVertexDeclaration& vtx_decl); + + // Passed to pipeline state creation + void GetInputLayoutDesc(D3D12_INPUT_LAYOUT_DESC* desc) const; + +private: + void AddAttribute(const char* semantic_name, u32 semantic_index, u32 slot, DXGI_FORMAT format, + u32 offset); + void MapAttributes(); + + std::array m_attribute_descriptions = {}; + u32 m_num_attributes = 0; +}; +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/D3D12/DescriptorAllocator.cpp b/Source/Core/VideoBackends/D3D12/DescriptorAllocator.cpp new file mode 100644 index 0000000000..220099642c --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DescriptorAllocator.cpp @@ -0,0 +1,121 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/D3D12/DescriptorAllocator.h" +#include "VideoBackends/D3D12/DXContext.h" + +namespace DX12 +{ +DescriptorAllocator::DescriptorAllocator() = default; +DescriptorAllocator::~DescriptorAllocator() = default; + +bool DescriptorAllocator::Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, + u32 num_descriptors) +{ + const D3D12_DESCRIPTOR_HEAP_DESC desc = {type, static_cast(num_descriptors), + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE}; + HRESULT hr = device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&m_descriptor_heap)); + CHECK(SUCCEEDED(hr), "Creating descriptor heap for linear allocator failed"); + if (FAILED(hr)) + return false; + + m_num_descriptors = num_descriptors; + m_descriptor_increment_size = device->GetDescriptorHandleIncrementSize(type); + m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart(); + m_heap_base_gpu = m_descriptor_heap->GetGPUDescriptorHandleForHeapStart(); + return true; +} + +bool DescriptorAllocator::Allocate(u32 num_handles, DescriptorHandle* out_base_handle) +{ + if ((m_current_offset + num_handles) > m_num_descriptors) + return false; + + out_base_handle->index = m_current_offset; + out_base_handle->cpu_handle.ptr = + m_heap_base_cpu.ptr + m_current_offset * m_descriptor_increment_size; + out_base_handle->gpu_handle.ptr = + m_heap_base_gpu.ptr + m_current_offset * m_descriptor_increment_size; + m_current_offset += num_handles; + return true; +} + +void DescriptorAllocator::Reset() +{ + m_current_offset = 0; +} + +bool operator==(const SamplerStateSet& lhs, const SamplerStateSet& rhs) +{ + // There shouldn't be any padding here, so this will be safe. + return std::memcmp(lhs.states, rhs.states, sizeof(lhs.states)) == 0; +} + +bool operator!=(const SamplerStateSet& lhs, const SamplerStateSet& rhs) +{ + return std::memcmp(lhs.states, rhs.states, sizeof(lhs.states)) != 0; +} + +bool operator<(const SamplerStateSet& lhs, const SamplerStateSet& rhs) +{ + return std::memcmp(lhs.states, rhs.states, sizeof(lhs.states)) < 0; +} + +SamplerAllocator::SamplerAllocator() = default; +SamplerAllocator::~SamplerAllocator() = default; + +bool SamplerAllocator::Create(ID3D12Device* device) +{ + return DescriptorAllocator::Create(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE); +} + +bool SamplerAllocator::GetGroupHandle(const SamplerStateSet& sss, + D3D12_GPU_DESCRIPTOR_HANDLE* handle) +{ + auto it = m_sampler_map.find(sss); + if (it != m_sampler_map.end()) + { + *handle = it->second; + return true; + } + + // Allocate a group of descriptors. + DescriptorHandle allocation; + if (!Allocate(SamplerStateSet::NUM_SAMPLERS_PER_GROUP, &allocation)) + return false; + + // Lookup sampler handles from global cache. + std::array source_handles; + for (u32 i = 0; i < SamplerStateSet::NUM_SAMPLERS_PER_GROUP; i++) + { + if (!g_dx_context->GetSamplerHeapManager().Lookup(sss.states[i], &source_handles[i])) + return false; + } + + // Copy samplers from the sampler heap. + static constexpr std::array source_sizes = { + {1, 1, 1, 1, 1, 1, 1, 1}}; + g_dx_context->GetDevice()->CopyDescriptors( + 1, &allocation.cpu_handle, &SamplerStateSet::NUM_SAMPLERS_PER_GROUP, + SamplerStateSet::NUM_SAMPLERS_PER_GROUP, source_handles.data(), source_sizes.data(), + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + *handle = allocation.gpu_handle; + m_sampler_map.emplace(sss, allocation.gpu_handle); + return true; +} + +bool SamplerAllocator::ShouldReset() const +{ + // We only reset the sampler heap if more than half of the descriptors are used. + // This saves descriptor copying when there isn't a large number of sampler configs per frame. + return m_sampler_map.size() >= (D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE / 2); +} + +void SamplerAllocator::Reset() +{ + DescriptorAllocator::Reset(); + m_sampler_map.clear(); +} +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DescriptorAllocator.h b/Source/Core/VideoBackends/D3D12/DescriptorAllocator.h new file mode 100644 index 0000000000..4e66bec2b1 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DescriptorAllocator.h @@ -0,0 +1,61 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include "VideoBackends/D3D12/DescriptorHeapManager.h" + +namespace DX12 +{ +class DescriptorAllocator +{ +public: + DescriptorAllocator(); + ~DescriptorAllocator(); + + ID3D12DescriptorHeap* GetDescriptorHeap() const { return m_descriptor_heap.Get(); } + u32 GetDescriptorIncrementSize() const { return m_descriptor_increment_size; } + + bool Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors); + + bool Allocate(u32 num_handles, DescriptorHandle* out_base_handle); + void Reset(); + +protected: + ComPtr m_descriptor_heap; + u32 m_descriptor_increment_size = 0; + u32 m_num_descriptors = 0; + u32 m_current_offset = 0; + + D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu = {}; + D3D12_GPU_DESCRIPTOR_HANDLE m_heap_base_gpu = {}; +}; + +struct SamplerStateSet final +{ + static const u32 NUM_SAMPLERS_PER_GROUP = 8; + SamplerState states[NUM_SAMPLERS_PER_GROUP]; +}; + +bool operator==(const SamplerStateSet& lhs, const SamplerStateSet& rhs); +bool operator!=(const SamplerStateSet& lhs, const SamplerStateSet& rhs); +bool operator<(const SamplerStateSet& lhs, const SamplerStateSet& rhs); + +class SamplerAllocator final : public DescriptorAllocator +{ +public: + SamplerAllocator(); + ~SamplerAllocator(); + + bool Create(ID3D12Device* device); + bool GetGroupHandle(const SamplerStateSet& sss, D3D12_GPU_DESCRIPTOR_HANDLE* handle); + bool ShouldReset() const; + void Reset(); + +private: + std::map m_sampler_map; +}; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.cpp b/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.cpp new file mode 100644 index 0000000000..2b3627c4f1 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.cpp @@ -0,0 +1,188 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/D3D12/DescriptorHeapManager.h" +#include "Common/Assert.h" +#include "VideoBackends/D3D12/DXContext.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ +DescriptorHeapManager::DescriptorHeapManager() = default; +DescriptorHeapManager::~DescriptorHeapManager() = default; + +bool DescriptorHeapManager::Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, + u32 num_descriptors) +{ + D3D12_DESCRIPTOR_HEAP_DESC desc = {type, static_cast(num_descriptors), + D3D12_DESCRIPTOR_HEAP_FLAG_NONE}; + + HRESULT hr = device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&m_descriptor_heap)); + CHECK(SUCCEEDED(hr), "Create descriptor heap"); + if (FAILED(hr)) + return false; + + m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart(); + m_heap_base_gpu = m_descriptor_heap->GetGPUDescriptorHandleForHeapStart(); + m_num_descriptors = num_descriptors; + m_descriptor_increment_size = device->GetDescriptorHandleIncrementSize(type); + + // Set all slots to unallocated (1) + const u32 bitset_count = + num_descriptors / BITSET_SIZE + (((num_descriptors % BITSET_SIZE) != 0) ? 1 : 0); + m_free_slots.resize(bitset_count); + for (BitSetType& bs : m_free_slots) + bs.flip(); + + return true; +} + +bool DescriptorHeapManager::Allocate(DescriptorHandle* handle) +{ + // Start past the temporary slots, no point in searching those. + for (u32 group = 0; group < m_free_slots.size(); group++) + { + BitSetType& bs = m_free_slots[group]; + if (bs.none()) + continue; + + u32 bit = 0; + for (; bit < BITSET_SIZE; bit++) + { + if (bs[bit]) + break; + } + + u32 index = group * BITSET_SIZE + bit; + bs[bit] = false; + + handle->index = index; + handle->cpu_handle.ptr = m_heap_base_cpu.ptr + index * m_descriptor_increment_size; + handle->gpu_handle.ptr = m_heap_base_gpu.ptr + index * m_descriptor_increment_size; + return true; + } + + PanicAlert("Out of fixed descriptors"); + return false; +} + +void DescriptorHeapManager::Free(u32 index) +{ + ASSERT(index < m_num_descriptors); + + u32 group = index / BITSET_SIZE; + u32 bit = index % BITSET_SIZE; + m_free_slots[group][bit] = true; +} + +void DescriptorHeapManager::Free(const DescriptorHandle& handle) +{ + Free(handle.index); +} + +SamplerHeapManager::SamplerHeapManager() = default; +SamplerHeapManager::~SamplerHeapManager() = default; + +static void GetD3DSamplerDesc(D3D12_SAMPLER_DESC* desc, const SamplerState& state) +{ + if (state.mipmap_filter == SamplerState::Filter::Linear) + { + if (state.min_filter == SamplerState::Filter::Linear) + { + desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + D3D12_FILTER_MIN_MAG_MIP_LINEAR : + D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR; + } + else + { + desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR : + D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR; + } + } + else + { + if (state.min_filter == SamplerState::Filter::Linear) + { + desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT : + D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT; + } + else + { + desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT : + D3D12_FILTER_MIN_MAG_MIP_POINT; + } + } + + static constexpr std::array address_modes = { + {D3D12_TEXTURE_ADDRESS_MODE_CLAMP, D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE_MIRROR}}; + desc->AddressU = address_modes[static_cast(state.wrap_u.Value())]; + desc->AddressV = address_modes[static_cast(state.wrap_v.Value())]; + desc->AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + desc->MaxLOD = state.max_lod / 16.f; + desc->MinLOD = state.min_lod / 16.f; + desc->MipLODBias = static_cast(state.lod_bias) / 256.f; + desc->ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + + if (state.anisotropic_filtering) + { + desc->Filter = D3D12_FILTER_ANISOTROPIC; + desc->MaxAnisotropy = 1u << g_ActiveConfig.iMaxAnisotropy; + } +} + +bool SamplerHeapManager::Lookup(const SamplerState& ss, D3D12_CPU_DESCRIPTOR_HANDLE* handle) +{ + const auto it = m_sampler_map.find(ss.hex); + if (it != m_sampler_map.end()) + { + *handle = it->second; + return true; + } + + if (m_current_offset == m_num_descriptors) + { + // We can clear at any time because the descriptors are copied prior to execution. + // It's still not free, since we have to recreate all our samplers again. + WARN_LOG(VIDEO, "Out of samplers, resetting CPU heap"); + Clear(); + } + + D3D12_SAMPLER_DESC desc = {}; + GetD3DSamplerDesc(&desc, ss); + + const D3D12_CPU_DESCRIPTOR_HANDLE new_handle = {m_heap_base_cpu.ptr + + m_current_offset * m_descriptor_increment_size}; + g_dx_context->GetDevice()->CreateSampler(&desc, new_handle); + + m_sampler_map.emplace(ss.hex, new_handle); + m_current_offset++; + *handle = new_handle; + return true; +} + +void SamplerHeapManager::Clear() +{ + m_sampler_map.clear(); + m_current_offset = 0; +} + +bool SamplerHeapManager::Create(ID3D12Device* device, u32 num_descriptors) +{ + const D3D12_DESCRIPTOR_HEAP_DESC desc = {D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, num_descriptors}; + HRESULT hr = device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&m_descriptor_heap)); + CHECK(SUCCEEDED(hr), "Failed to create sampler descriptor heap"); + if (FAILED(hr)) + return false; + + m_num_descriptors = num_descriptors; + m_descriptor_increment_size = + device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart(); + return true; +} +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.h b/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.h new file mode 100644 index 0000000000..0e6f68524f --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.h @@ -0,0 +1,74 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "VideoBackends/D3D12/Common.h" +#include "VideoCommon/RenderState.h" + +namespace DX12 +{ +// This class provides an abstraction for D3D12 descriptor heaps. +struct DescriptorHandle final +{ + D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle; + u32 index; + + operator bool() const { return cpu_handle.ptr != 0; } +}; + +class DescriptorHeapManager final +{ +public: + DescriptorHeapManager(); + ~DescriptorHeapManager(); + + ID3D12DescriptorHeap* GetDescriptorHeap() const { return m_descriptor_heap.Get(); } + u32 GetDescriptorIncrementSize() const { return m_descriptor_increment_size; } + + bool Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors); + + bool Allocate(DescriptorHandle* handle); + void Free(const DescriptorHandle& handle); + void Free(u32 index); + +private: + ComPtr m_descriptor_heap; + u32 m_num_descriptors = 0; + u32 m_descriptor_increment_size = 0; + + D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu = {}; + D3D12_GPU_DESCRIPTOR_HANDLE m_heap_base_gpu = {}; + + static constexpr u32 BITSET_SIZE = 1024; + using BitSetType = std::bitset; + std::vector m_free_slots = {}; +}; + +class SamplerHeapManager final +{ +public: + SamplerHeapManager(); + ~SamplerHeapManager(); + + ID3D12DescriptorHeap* GetDescriptorHeap() const { return m_descriptor_heap.Get(); } + + bool Create(ID3D12Device* device, u32 num_descriptors); + bool Lookup(const SamplerState& ss, D3D12_CPU_DESCRIPTOR_HANDLE* handle); + void Clear(); + +private: + ComPtr m_descriptor_heap; + u32 m_num_descriptors = 0; + u32 m_descriptor_increment_size = 0; + u32 m_current_offset = 0; + + D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu; + + std::unordered_map m_sampler_map; +}; +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/PerfQuery.cpp b/Source/Core/VideoBackends/D3D12/PerfQuery.cpp new file mode 100644 index 0000000000..98e24d934c --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/PerfQuery.cpp @@ -0,0 +1,235 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "Common/Assert.h" +#include "Common/Logging/Log.h" +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3D12/DXContext.h" +#include "VideoBackends/D3D12/PerfQuery.h" +#include "VideoBackends/D3D12/Renderer.h" + +namespace DX12 +{ +PerfQuery::PerfQuery() = default; + +PerfQuery::~PerfQuery() = default; + +bool PerfQuery::Initialize() +{ + constexpr D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_OCCLUSION, PERF_QUERY_BUFFER_SIZE}; + HRESULT hr = g_dx_context->GetDevice()->CreateQueryHeap(&desc, IID_PPV_ARGS(&m_query_heap)); + CHECK(SUCCEEDED(hr), "Failed to create query heap"); + if (FAILED(hr)) + return false; + + constexpr D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_READBACK}; + constexpr D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + PERF_QUERY_BUFFER_SIZE * sizeof(PerfQueryDataType), + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + hr = g_dx_context->GetDevice()->CreateCommittedResource( + &heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, IID_PPV_ARGS(&m_query_readback_buffer)); + CHECK(SUCCEEDED(hr), "Failed to create query buffer"); + if (FAILED(hr)) + return false; + + return true; +} + +void PerfQuery::EnableQuery(PerfQueryGroup type) +{ + // Block if there are no free slots. + // Otherwise, try to keep half of them available. + if (m_query_count > m_query_buffer.size() / 2) + { + const bool do_resolve = m_unresolved_queries > m_query_buffer.size() / 2; + const bool blocking = m_query_count == PERF_QUERY_BUFFER_SIZE; + PartialFlush(do_resolve, blocking); + } + + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + ActiveQuery& entry = m_query_buffer[m_query_next_pos]; + ASSERT(!entry.has_value && !entry.resolved); + entry.has_value = true; + + g_dx_context->GetCommandList()->BeginQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION, + m_query_next_pos); + } +} + +void PerfQuery::DisableQuery(PerfQueryGroup type) +{ + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + g_dx_context->GetCommandList()->EndQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION, + m_query_next_pos); + m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE; + m_query_count++; + m_unresolved_queries++; + } +} + +void PerfQuery::ResetQuery() +{ + m_query_count = 0; + m_unresolved_queries = 0; + m_query_resolve_pos = 0; + m_query_readback_pos = 0; + m_query_next_pos = 0; + std::fill_n(m_results, ArraySize(m_results), 0); + for (auto& entry : m_query_buffer) + { + entry.fence_value = 0; + entry.resolved = false; + entry.has_value = false; + } +} + +u32 PerfQuery::GetQueryResult(PerfQueryType type) +{ + u32 result = 0; + if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC) + result = m_results[PQG_ZCOMP_ZCOMPLOC]; + else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT) + result = m_results[PQG_ZCOMP]; + else if (type == PQ_BLEND_INPUT) + result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC]; + else if (type == PQ_EFB_COPY_CLOCKS) + result = m_results[PQG_EFB_COPY_CLOCKS]; + + return result / 4; +} + +void PerfQuery::FlushResults() +{ + while (!IsFlushed()) + PartialFlush(true, true); +} + +bool PerfQuery::IsFlushed() const +{ + return m_query_count == 0; +} + +void PerfQuery::ResolveQueries() +{ + // Do we need to split the resolve as it's wrapping around? + if ((m_query_resolve_pos + m_unresolved_queries) > PERF_QUERY_BUFFER_SIZE) + ResolveQueries(PERF_QUERY_BUFFER_SIZE - m_query_resolve_pos); + + ResolveQueries(m_unresolved_queries); +} + +void PerfQuery::ResolveQueries(u32 query_count) +{ + DEBUG_ASSERT(m_unresolved_queries >= query_count && + (m_query_resolve_pos + query_count) <= PERF_QUERY_BUFFER_SIZE); + + g_dx_context->GetCommandList()->ResolveQueryData( + m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION, m_query_resolve_pos, query_count, + m_query_readback_buffer.Get(), m_query_resolve_pos * sizeof(PerfQueryDataType)); + + // Flag all queries as available, but with a fence that has to be completed first + for (u32 i = 0; i < query_count; i++) + { + ActiveQuery& entry = m_query_buffer[m_query_resolve_pos + i]; + DEBUG_ASSERT(entry.has_value && !entry.resolved); + entry.fence_value = g_dx_context->GetCurrentFenceValue(); + entry.resolved = true; + } + m_query_resolve_pos = (m_query_resolve_pos + query_count) % PERF_QUERY_BUFFER_SIZE; + m_unresolved_queries -= query_count; +} + +void PerfQuery::ReadbackQueries() +{ + const u64 completed_fence_counter = g_dx_context->GetCompletedFenceValue(); + + // Need to save these since ProcessResults will modify them. + const u32 outstanding_queries = m_query_count; + u32 readback_count = 0; + for (u32 i = 0; i < outstanding_queries; i++) + { + u32 index = (m_query_readback_pos + readback_count) % PERF_QUERY_BUFFER_SIZE; + const ActiveQuery& entry = m_query_buffer[index]; + if (!entry.resolved || entry.fence_value > completed_fence_counter) + break; + + // If this wrapped around, we need to flush the entries before the end of the buffer. + if (index < m_query_readback_pos) + { + ReadbackQueries(readback_count); + DEBUG_ASSERT(m_query_readback_pos == 0); + readback_count = 0; + } + + readback_count++; + } + + if (readback_count > 0) + ReadbackQueries(readback_count); +} + +void PerfQuery::ReadbackQueries(u32 query_count) +{ + // Should be at maximum query_count queries pending. + ASSERT(query_count <= m_query_count && + (m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE); + + const D3D12_RANGE read_range = {m_query_readback_pos * sizeof(PerfQueryDataType), + (m_query_readback_pos + query_count) * sizeof(PerfQueryDataType)}; + u8* mapped_ptr; + HRESULT hr = m_query_readback_buffer->Map(0, &read_range, reinterpret_cast(&mapped_ptr)); + CHECK(SUCCEEDED(hr), "Failed to map query readback buffer"); + if (FAILED(hr)) + return; + + // Remove pending queries. + for (u32 i = 0; i < query_count; i++) + { + u32 index = (m_query_readback_pos + i) % PERF_QUERY_BUFFER_SIZE; + ActiveQuery& entry = m_query_buffer[index]; + + // Should have a fence associated with it (waiting for a result). + ASSERT(entry.fence_value != 0); + entry.fence_value = 0; + entry.resolved = false; + entry.has_value = false; + + // Grab result from readback buffer, it will already have been invalidated. + PerfQueryDataType result; + std::memcpy(&result, mapped_ptr + (index * sizeof(PerfQueryDataType)), sizeof(result)); + + // NOTE: Reported pixel metrics should be referenced to native resolution + m_results[entry.query_type] += + static_cast(static_cast(result) * EFB_WIDTH / g_renderer->GetTargetWidth() * + EFB_HEIGHT / g_renderer->GetTargetHeight()); + } + + constexpr D3D12_RANGE write_range = {0, 0}; + m_query_readback_buffer->Unmap(0, &write_range); + + m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE; + m_query_count -= query_count; +} + +void PerfQuery::PartialFlush(bool resolve, bool blocking) +{ + // Submit a command buffer in the background if the front query is not bound to one. + if ((resolve || blocking) && !m_query_buffer[m_query_resolve_pos].resolved) + Renderer::GetInstance()->ExecuteCommandList(blocking); + + ReadbackQueries(); +} +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/PerfQuery.h b/Source/Core/VideoBackends/D3D12/PerfQuery.h new file mode 100644 index 0000000000..c21436652e --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/PerfQuery.h @@ -0,0 +1,60 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include "VideoBackends/D3D12/DXContext.h" +#include "VideoCommon/PerfQueryBase.h" + +namespace DX12 +{ +class PerfQuery final : public PerfQueryBase +{ +public: + PerfQuery(); + ~PerfQuery(); + + static PerfQuery* GetInstance() { return static_cast(g_perf_query.get()); } + + bool Initialize(); + void ResolveQueries(); + + void EnableQuery(PerfQueryGroup type) override; + void DisableQuery(PerfQueryGroup type) override; + void ResetQuery() override; + u32 GetQueryResult(PerfQueryType type) override; + void FlushResults() override; + bool IsFlushed() const override; + +private: + struct ActiveQuery + { + u64 fence_value; + PerfQueryType query_type; + bool has_value; + bool resolved; + }; + + void ResolveQueries(u32 query_count); + void ReadbackQueries(); + void ReadbackQueries(u32 query_count); + + void PartialFlush(bool resolve, bool blocking); + + // when testing in SMS: 64 was too small, 128 was ok + // TODO: This should be size_t, but the base class uses u32s + using PerfQueryDataType = u64; + static const u32 PERF_QUERY_BUFFER_SIZE = 512; + std::array m_query_buffer = {}; + u32 m_unresolved_queries = 0; + u32 m_query_resolve_pos = 0; + u32 m_query_readback_pos = 0; + u32 m_query_next_pos = 0; + + ComPtr m_query_heap; + ComPtr m_query_readback_buffer; +}; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/Renderer.cpp b/Source/Core/VideoBackends/D3D12/Renderer.cpp new file mode 100644 index 0000000000..49de5344e6 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/Renderer.cpp @@ -0,0 +1,735 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/Logging/Log.h" + +#include "VideoBackends/D3D12/BoundingBox.h" +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3D12/DXContext.h" +#include "VideoBackends/D3D12/DXPipeline.h" +#include "VideoBackends/D3D12/DXShader.h" +#include "VideoBackends/D3D12/DXTexture.h" +#include "VideoBackends/D3D12/DXVertexFormat.h" +#include "VideoBackends/D3D12/DescriptorHeapManager.h" +#include "VideoBackends/D3D12/PerfQuery.h" +#include "VideoBackends/D3D12/Renderer.h" +#include "VideoBackends/D3D12/SwapChain.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ +Renderer::Renderer(std::unique_ptr swap_chain, float backbuffer_scale) + : ::Renderer(swap_chain ? swap_chain->GetWidth() : 0, swap_chain ? swap_chain->GetHeight() : 0, + backbuffer_scale, + swap_chain ? swap_chain->GetFormat() : AbstractTextureFormat::Undefined), + m_swap_chain(std::move(swap_chain)) +{ + m_state.root_signature = g_dx_context->GetGXRootSignature(); + + // Textures must be populated with null descriptors, since we copy directly from this array. + for (u32 i = 0; i < MAX_TEXTURES; i++) + { + m_state.textures[i].ptr = g_dx_context->GetNullSRVDescriptor().cpu_handle.ptr; + m_state.samplers.states[i] = RenderState::GetPointSamplerState(); + } +} + +Renderer::~Renderer() = default; + +bool Renderer::IsHeadless() const +{ + return !m_swap_chain; +} + +bool Renderer::Initialize() +{ + if (!::Renderer::Initialize()) + return false; + + m_bounding_box = BoundingBox::Create(); + if (!m_bounding_box) + return false; + + SetPixelShaderUAV(m_bounding_box->GetGPUDescriptor().cpu_handle); + return true; +} + +void Renderer::Shutdown() +{ + m_bounding_box.reset(); + m_swap_chain.reset(); + + ::Renderer::Shutdown(); +} + +std::unique_ptr Renderer::CreateTexture(const TextureConfig& config) +{ + return DXTexture::Create(config); +} + +std::unique_ptr Renderer::CreateStagingTexture(StagingTextureType type, + const TextureConfig& config) +{ + return DXStagingTexture::Create(type, config); +} + +std::unique_ptr Renderer::CreateFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment) +{ + return DXFramebuffer::Create(static_cast(color_attachment), + static_cast(depth_attachment)); +} + +std::unique_ptr Renderer::CreateShaderFromSource(ShaderStage stage, + const char* source, size_t length) +{ + return DXShader::CreateFromSource(stage, source, length); +} + +std::unique_ptr Renderer::CreateShaderFromBinary(ShaderStage stage, + const void* data, size_t length) +{ + return DXShader::CreateFromBytecode(stage, DXShader::CreateByteCode(data, length)); +} + +std::unique_ptr +Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +{ + return std::make_unique(vtx_decl); +} + +std::unique_ptr Renderer::CreatePipeline(const AbstractPipelineConfig& config) +{ + return DXPipeline::Create(config); +} + +u16 Renderer::BBoxRead(int index) +{ + return static_cast(m_bounding_box->Get(index)); +} + +void Renderer::BBoxWrite(int index, u16 value) +{ + m_bounding_box->Set(index, value); +} + +void Renderer::BBoxFlush() +{ + m_bounding_box->Flush(); + m_bounding_box->Invalidate(); +} + +void Renderer::Flush() +{ + ExecuteCommandList(false); +} + +void Renderer::WaitForGPUIdle() +{ + ExecuteCommandList(true); +} + +void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, + bool z_enable, u32 color, u32 z) +{ + // Use a fast path without the shader if both color/alpha are enabled. + const bool fast_color_clear = color_enable && (alpha_enable || !EFBHasAlphaChannel()); + if (fast_color_clear || z_enable) + { + MathUtil::Rectangle native_rc = ConvertEFBRectangle(rc); + native_rc.ClampUL(0, 0, m_current_framebuffer->GetWidth(), m_current_framebuffer->GetHeight()); + const D3D12_RECT d3d_clear_rc{native_rc.left, native_rc.top, native_rc.right, native_rc.bottom}; + + if (fast_color_clear) + { + static_cast(m_current_framebuffer->GetColorAttachment()) + ->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + + const std::array clear_color = { + {static_cast((color >> 16) & 0xFF) / 255.0f, + static_cast((color >> 8) & 0xFF) / 255.0f, + static_cast((color >> 0) & 0xFF) / 255.0f, + static_cast((color >> 24) & 0xFF) / 255.0f}}; + g_dx_context->GetCommandList()->ClearRenderTargetView( + static_cast(m_current_framebuffer)->GetRTVDescriptor().cpu_handle, + clear_color.data(), 1, &d3d_clear_rc); + color_enable = false; + alpha_enable = false; + } + + if (z_enable) + { + static_cast(m_current_framebuffer->GetDepthAttachment()) + ->TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE); + + // D3D does not support reversed depth ranges. + const float clear_depth = 1.0f - static_cast(z & 0xFFFFFF) / 16777216.0f; + g_dx_context->GetCommandList()->ClearDepthStencilView( + static_cast(m_current_framebuffer)->GetDSVDescriptor().cpu_handle, + D3D12_CLEAR_FLAG_DEPTH, clear_depth, 0, 1, &d3d_clear_rc); + z_enable = false; + } + } + + // Anything left over, fall back to clear triangle. + if (color_enable || alpha_enable || z_enable) + ::Renderer::ClearScreen(rc, color_enable, alpha_enable, z_enable, color, z); +} + +void Renderer::SetPipeline(const AbstractPipeline* pipeline) +{ + const DXPipeline* dx_pipeline = static_cast(pipeline); + if (m_current_pipeline == dx_pipeline) + return; + + m_current_pipeline = dx_pipeline; + m_dirty_bits |= DirtyState_Pipeline; + + if (dx_pipeline) + { + if (dx_pipeline->GetRootSignature() != m_state.root_signature) + { + m_state.root_signature = dx_pipeline->GetRootSignature(); + m_dirty_bits |= DirtyState_RootSignature | DirtyState_PS_CBV | DirtyState_VS_CBV | + DirtyState_GS_CBV | DirtyState_SRV_Descriptor | + DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor; + } + if (dx_pipeline->UseIntegerRTV() != m_state.using_integer_rtv) + { + m_state.using_integer_rtv = dx_pipeline->UseIntegerRTV(); + m_dirty_bits |= DirtyState_Framebuffer; + } + if (dx_pipeline->GetPrimitiveTopology() != m_state.primitive_topology) + { + m_state.primitive_topology = dx_pipeline->GetPrimitiveTopology(); + m_dirty_bits |= DirtyState_PrimitiveTopology; + } + } +} + +void Renderer::BindFramebuffer(DXFramebuffer* fb) +{ + if (fb->HasColorBuffer()) + { + static_cast(fb->GetColorAttachment()) + ->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + } + if (fb->HasDepthBuffer()) + { + static_cast(fb->GetDepthAttachment()) + ->TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE); + } + + g_dx_context->GetCommandList()->OMSetRenderTargets( + fb->GetRTVDescriptorCount(), + m_state.using_integer_rtv ? fb->GetIntRTVDescriptorArray() : fb->GetRTVDescriptorArray(), + FALSE, fb->GetDSVDescriptorArray()); + m_current_framebuffer = fb; + m_dirty_bits &= ~DirtyState_Framebuffer; +} + +void Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer) +{ + if (m_current_framebuffer == framebuffer) + return; + + m_current_framebuffer = framebuffer; + m_dirty_bits |= DirtyState_Framebuffer; +} + +void Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) +{ + BindFramebuffer(static_cast(framebuffer)); + + static const D3D12_DISCARD_REGION dr = {0, nullptr, 0, 1}; + if (framebuffer->HasColorBuffer()) + { + g_dx_context->GetCommandList()->DiscardResource( + static_cast(framebuffer->GetColorAttachment())->GetResource(), &dr); + } + if (framebuffer->HasDepthBuffer()) + { + g_dx_context->GetCommandList()->DiscardResource( + static_cast(framebuffer->GetDepthAttachment())->GetResource(), &dr); + } +} + +void Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, + const ClearColor& color_value, float depth_value) +{ + DXFramebuffer* dxfb = static_cast(framebuffer); + BindFramebuffer(dxfb); + + static const D3D12_DISCARD_REGION dr = {0, nullptr, 0, 1}; + if (framebuffer->HasColorBuffer()) + { + g_dx_context->GetCommandList()->ClearRenderTargetView(dxfb->GetRTVDescriptor().cpu_handle, + color_value.data(), 0, nullptr); + } + if (framebuffer->HasDepthBuffer()) + { + g_dx_context->GetCommandList()->ClearDepthStencilView( + dxfb->GetDSVDescriptor().cpu_handle, D3D12_CLEAR_FLAG_DEPTH, depth_value, 0, 0, nullptr); + } +} + +void Renderer::SetScissorRect(const MathUtil::Rectangle& rc) +{ + if (m_state.scissor.left == rc.left && m_state.scissor.right == rc.right && + m_state.scissor.top == rc.top && m_state.scissor.bottom == rc.bottom) + { + return; + } + + m_state.scissor.left = rc.left; + m_state.scissor.right = rc.right; + m_state.scissor.top = rc.top; + m_state.scissor.bottom = rc.bottom; + m_dirty_bits |= DirtyState_ScissorRect; +} + +void Renderer::SetTexture(u32 index, const AbstractTexture* texture) +{ + const DXTexture* dxtex = static_cast(texture); + if (m_state.textures[index].ptr == dxtex->GetSRVDescriptor().cpu_handle.ptr) + return; + + m_state.textures[index].ptr = dxtex->GetSRVDescriptor().cpu_handle.ptr; + if (dxtex) + dxtex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + m_dirty_bits |= DirtyState_Textures; +} + +void Renderer::SetSamplerState(u32 index, const SamplerState& state) +{ + if (m_state.samplers.states[index] == state) + return; + + m_state.samplers.states[index] = state; + m_dirty_bits |= DirtyState_Samplers; +} + +void Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) +{ + const DXTexture* dxtex = static_cast(texture); + if (m_state.compute_image_texture == dxtex) + return; + + m_state.compute_image_texture = dxtex; + m_dirty_bits |= DirtyState_ComputeImageTexture; +} + +void Renderer::UnbindTexture(const AbstractTexture* texture) +{ + const auto srv_shadow_descriptor = + static_cast(texture)->GetSRVDescriptor().cpu_handle; + for (u32 i = 0; i < MAX_TEXTURES; i++) + { + if (m_state.textures[i].ptr == srv_shadow_descriptor.ptr) + { + m_state.textures[i].ptr = g_dx_context->GetNullSRVDescriptor().cpu_handle.ptr; + m_dirty_bits |= DirtyState_Textures; + } + } + if (m_state.compute_image_texture == texture) + { + m_state.compute_image_texture = nullptr; + m_dirty_bits |= DirtyState_ComputeImageTexture; + } +} + +void Renderer::SetViewport(float x, float y, float width, float height, float near_depth, + float far_depth) +{ + if (m_state.viewport.TopLeftX == x && m_state.viewport.TopLeftY == y && + m_state.viewport.Width == width && m_state.viewport.Height == height && + near_depth == m_state.viewport.MinDepth && far_depth == m_state.viewport.MaxDepth) + { + return; + } + + m_state.viewport.TopLeftX = x; + m_state.viewport.TopLeftY = y; + m_state.viewport.Width = width; + m_state.viewport.Height = height; + m_state.viewport.MinDepth = near_depth; + m_state.viewport.MaxDepth = far_depth; + m_dirty_bits |= DirtyState_Viewport; +} + +void Renderer::Draw(u32 base_vertex, u32 num_vertices) +{ + if (!ApplyState()) + return; + + g_dx_context->GetCommandList()->DrawInstanced(num_vertices, 1, base_vertex, 0); +} + +void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) +{ + if (!ApplyState()) + return; + + g_dx_context->GetCommandList()->DrawIndexedInstanced(num_indices, 1, base_index, base_vertex, 0); +} + +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) +{ + SetRootSignatures(); + SetDescriptorHeaps(); + UpdateDescriptorTables(); + + if (m_dirty_bits & DirtyState_ComputeImageTexture && !UpdateComputeUAVDescriptorTable()) + { + ExecuteCommandList(false); + SetRootSignatures(); + SetDescriptorHeaps(); + UpdateDescriptorTables(); + UpdateComputeUAVDescriptorTable(); + } + + // Share graphics and compute state. No need to track now since dispatches are infrequent. + auto* const cmdlist = g_dx_context->GetCommandList(); + cmdlist->SetPipelineState(static_cast(shader)->GetComputePipeline()); + cmdlist->SetComputeRootConstantBufferView(CS_ROOT_PARAMETER_CBV, m_state.constant_buffers[0]); + cmdlist->SetComputeRootDescriptorTable(CS_ROOT_PARAMETER_SRV, m_state.srv_descriptor_base); + cmdlist->SetComputeRootDescriptorTable(CS_ROOT_PARAMETER_SAMPLERS, + m_state.sampler_descriptor_base); + cmdlist->SetComputeRootDescriptorTable(CS_ROOT_PARAMETER_UAV, + m_state.compute_uav_descriptor_base); + cmdlist->Dispatch(groups_x, groups_y, groups_z); + + // Compute and graphics state share the same pipeline object? :( + m_dirty_bits |= DirtyState_Pipeline; +} + +void Renderer::BindBackbuffer(const ClearColor& clear_color) +{ + CheckForSwapChainChanges(); + SetAndClearFramebuffer(m_swap_chain->GetCurrentFramebuffer(), clear_color); +} + +void Renderer::CheckForSwapChainChanges() +{ + const bool surface_changed = m_surface_changed.TestAndClear(); + const bool surface_resized = + m_surface_resized.TestAndClear() || m_swap_chain->CheckForFullscreenChange(); + if (!surface_changed && !surface_resized) + return; + + // The swap chain could be in use from a previous frame. + WaitForGPUIdle(); + if (surface_changed) + { + m_swap_chain->ChangeSurface(m_new_surface_handle); + m_new_surface_handle = nullptr; + } + else + { + m_swap_chain->ResizeSwapChain(); + } + + m_backbuffer_width = m_swap_chain->GetWidth(); + m_backbuffer_height = m_swap_chain->GetHeight(); +} + +void Renderer::PresentBackbuffer() +{ + m_current_framebuffer = nullptr; + + m_swap_chain->GetCurrentTexture()->TransitionToState(D3D12_RESOURCE_STATE_PRESENT); + ExecuteCommandList(false); + + m_swap_chain->Present(); +} + +void Renderer::OnConfigChanged(u32 bits) +{ + ::Renderer::OnConfigChanged(bits); + + // For quad-buffered stereo we need to change the layer count, so recreate the swap chain. + if (m_swap_chain && bits & CONFIG_CHANGE_BIT_STEREO_MODE) + { + ExecuteCommandList(true); + m_swap_chain->SetStereo(SwapChain::WantsStereo()); + } + + // Wipe sampler cache if force texture filtering or anisotropy changes. + if (bits & (CONFIG_CHANGE_BIT_ANISOTROPY | CONFIG_CHANGE_BIT_FORCE_TEXTURE_FILTERING)) + { + ExecuteCommandList(true); + g_dx_context->GetSamplerHeapManager().Clear(); + g_dx_context->ResetSamplerAllocators(); + } + + // If the host config changed (e.g. bbox/per-pixel-shading), recreate the root signature. + if (bits & CONFIG_CHANGE_BIT_HOST_CONFIG) + g_dx_context->RecreateGXRootSignature(); +} + +void Renderer::ExecuteCommandList(bool wait_for_completion) +{ + PerfQuery::GetInstance()->ResolveQueries(); + g_dx_context->ExecuteCommandList(wait_for_completion); + m_dirty_bits = DirtyState_All; +} + +void Renderer::SetConstantBuffer(u32 index, D3D12_GPU_VIRTUAL_ADDRESS address) +{ + if (m_state.constant_buffers[index] == address) + return; + + m_state.constant_buffers[index] = address; + m_dirty_bits |= DirtyState_PS_CBV << index; +} + +void Renderer::SetTextureDescriptor(u32 index, D3D12_CPU_DESCRIPTOR_HANDLE handle) +{ + if (m_state.textures[index].ptr == handle.ptr) + return; + + m_state.textures[index].ptr = handle.ptr; + m_dirty_bits |= DirtyState_Textures; +} + +void Renderer::SetPixelShaderUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle) +{ + if (m_state.ps_uav.ptr == handle.ptr) + return; + + m_state.ps_uav = handle; + m_dirty_bits |= DirtyState_PS_UAV; +} + +void Renderer::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size) +{ + if (m_state.vertex_buffer.BufferLocation == address && + m_state.vertex_buffer.StrideInBytes == stride && m_state.vertex_buffer.SizeInBytes == size) + { + return; + } + + m_state.vertex_buffer.BufferLocation = address; + m_state.vertex_buffer.StrideInBytes = stride; + m_state.vertex_buffer.SizeInBytes = size; + m_dirty_bits |= DirtyState_VertexBuffer; +} + +void Renderer::SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format) +{ + if (m_state.index_buffer.BufferLocation == address && m_state.index_buffer.SizeInBytes == size && + m_state.index_buffer.Format == format) + { + return; + } + + m_state.index_buffer.BufferLocation = address; + m_state.index_buffer.SizeInBytes = size; + m_state.index_buffer.Format = format; + m_dirty_bits |= DirtyState_IndexBuffer; +} + +bool Renderer::ApplyState() +{ + if (!m_current_framebuffer || !m_current_pipeline) + return false; + + // Updating the descriptor tables can cause command list execution if no descriptors remain. + SetRootSignatures(); + SetDescriptorHeaps(); + UpdateDescriptorTables(); + + // Clear bits before actually changing state. Some state (e.g. cbuffers) can't be set + // if utility pipelines are bound. + const u32 dirty_bits = m_dirty_bits; + m_dirty_bits &= ~( + DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Viewport | DirtyState_ScissorRect | + DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV | DirtyState_GS_CBV | + DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor | + DirtyState_VertexBuffer | DirtyState_IndexBuffer | DirtyState_PrimitiveTopology); + + auto* const cmdlist = g_dx_context->GetCommandList(); + if (dirty_bits & DirtyState_Pipeline) + cmdlist->SetPipelineState(static_cast(m_current_pipeline)->GetPipeline()); + + if (dirty_bits & DirtyState_Framebuffer) + BindFramebuffer(static_cast(m_current_framebuffer)); + + if (dirty_bits & DirtyState_Viewport) + cmdlist->RSSetViewports(1, &m_state.viewport); + + if (dirty_bits & DirtyState_ScissorRect) + cmdlist->RSSetScissorRects(1, &m_state.scissor); + + if (dirty_bits & DirtyState_VertexBuffer) + cmdlist->IASetVertexBuffers(0, 1, &m_state.vertex_buffer); + + if (dirty_bits & DirtyState_IndexBuffer) + cmdlist->IASetIndexBuffer(&m_state.index_buffer); + + if (dirty_bits & DirtyState_PrimitiveTopology) + cmdlist->IASetPrimitiveTopology(m_state.primitive_topology); + + if (dirty_bits & DirtyState_SRV_Descriptor) + cmdlist->SetGraphicsRootDescriptorTable(ROOT_PARAMETER_PS_SRV, m_state.srv_descriptor_base); + + if (dirty_bits & DirtyState_Sampler_Descriptor) + { + cmdlist->SetGraphicsRootDescriptorTable(ROOT_PARAMETER_PS_SAMPLERS, + m_state.sampler_descriptor_base); + } + + if (static_cast(m_current_pipeline)->GetUsage() == AbstractPipelineUsage::GX) + { + if (dirty_bits & DirtyState_VS_CBV) + { + cmdlist->SetGraphicsRootConstantBufferView(ROOT_PARAMETER_VS_CBV, + m_state.constant_buffers[1]); + + if (g_ActiveConfig.bEnablePixelLighting) + { + cmdlist->SetGraphicsRootConstantBufferView( + g_ActiveConfig.bBBoxEnable ? ROOT_PARAMETER_PS_CBV2 : ROOT_PARAMETER_PS_UAV_OR_CBV2, + m_state.constant_buffers[1]); + } + } + + if (dirty_bits & DirtyState_GS_CBV) + { + cmdlist->SetGraphicsRootConstantBufferView(ROOT_PARAMETER_GS_CBV, + m_state.constant_buffers[2]); + } + + if (dirty_bits & DirtyState_UAV_Descriptor && g_ActiveConfig.bBBoxEnable) + { + cmdlist->SetGraphicsRootDescriptorTable(ROOT_PARAMETER_PS_UAV_OR_CBV2, + m_state.uav_descriptor_base); + } + } + + if (dirty_bits & DirtyState_PS_CBV) + { + cmdlist->SetGraphicsRootConstantBufferView(ROOT_PARAMETER_PS_CBV, m_state.constant_buffers[0]); + } + + return true; +} + +void Renderer::SetRootSignatures() +{ + const u32 dirty_bits = m_dirty_bits; + if (dirty_bits & DirtyState_RootSignature) + g_dx_context->GetCommandList()->SetGraphicsRootSignature(m_state.root_signature); + if (dirty_bits & DirtyState_ComputeRootSignature) + { + g_dx_context->GetCommandList()->SetComputeRootSignature( + g_dx_context->GetComputeRootSignature()); + } + m_dirty_bits &= ~(DirtyState_RootSignature | DirtyState_ComputeRootSignature); +} + +void Renderer::SetDescriptorHeaps() +{ + if (m_dirty_bits & DirtyState_DescriptorHeaps) + { + g_dx_context->GetCommandList()->SetDescriptorHeaps(g_dx_context->GetGPUDescriptorHeapCount(), + g_dx_context->GetGPUDescriptorHeaps()); + m_dirty_bits &= ~DirtyState_DescriptorHeaps; + } +} + +void Renderer::UpdateDescriptorTables() +{ + // Samplers force a full sync because any of the samplers could be in use. + const bool texture_update_failed = + (m_dirty_bits & DirtyState_Textures) && !UpdateSRVDescriptorTable(); + const bool sampler_update_failed = + (m_dirty_bits & DirtyState_Samplers) && !UpdateSamplerDescriptorTable(); + const bool uav_update_failed = (m_dirty_bits & DirtyState_PS_UAV) && !UpdateUAVDescriptorTable(); + if (texture_update_failed || sampler_update_failed || uav_update_failed) + { + WARN_LOG(VIDEO, "Executing command list while waiting for temporary %s", + texture_update_failed ? "descriptors" : "samplers"); + ExecuteCommandList(false); + SetRootSignatures(); + SetDescriptorHeaps(); + UpdateSRVDescriptorTable(); + UpdateSamplerDescriptorTable(); + UpdateUAVDescriptorTable(); + } +} + +bool Renderer::UpdateSRVDescriptorTable() +{ + static constexpr std::array src_sizes = {1, 1, 1, 1, 1, 1, 1, 1}; + DescriptorHandle dst_base_handle; + const UINT dst_handle_sizes = 8; + if (!g_dx_context->GetDescriptorAllocator()->Allocate(MAX_TEXTURES, &dst_base_handle)) + return false; + + g_dx_context->GetDevice()->CopyDescriptors( + 1, &dst_base_handle.cpu_handle, &dst_handle_sizes, MAX_TEXTURES, m_state.textures.data(), + src_sizes.data(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_state.srv_descriptor_base = dst_base_handle.gpu_handle; + m_dirty_bits = (m_dirty_bits & ~DirtyState_Textures) | DirtyState_SRV_Descriptor; + return true; +} + +bool Renderer::UpdateSamplerDescriptorTable() +{ + if (!g_dx_context->GetSamplerAllocator()->GetGroupHandle(m_state.samplers, + &m_state.sampler_descriptor_base)) + { + g_dx_context->ResetSamplerAllocators(); + return false; + } + + m_dirty_bits = (m_dirty_bits & ~DirtyState_Samplers) | DirtyState_Sampler_Descriptor; + return true; +} + +bool Renderer::UpdateUAVDescriptorTable() +{ + // We can skip writing the UAV descriptor if bbox isn't enabled, since it's not used otherwise. + if (!g_ActiveConfig.bBBoxEnable) + return true; + + DescriptorHandle handle; + if (!g_dx_context->GetDescriptorAllocator()->Allocate(1, &handle)) + return false; + + g_dx_context->GetDevice()->CopyDescriptorsSimple(1, handle.cpu_handle, m_state.ps_uav, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_state.uav_descriptor_base = handle.gpu_handle; + m_dirty_bits = (m_dirty_bits & ~DirtyState_PS_UAV) | DirtyState_UAV_Descriptor; + return true; +} + +bool Renderer::UpdateComputeUAVDescriptorTable() +{ + DescriptorHandle handle; + if (!g_dx_context->GetDescriptorAllocator()->Allocate(1, &handle)) + return false; + + if (m_state.compute_image_texture) + { + g_dx_context->GetDevice()->CopyDescriptorsSimple( + 1, handle.cpu_handle, m_state.compute_image_texture->GetUAVDescriptor().cpu_handle, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + else + { + constexpr D3D12_UNORDERED_ACCESS_VIEW_DESC null_uav_desc = {}; + g_dx_context->GetDevice()->CreateUnorderedAccessView(nullptr, nullptr, &null_uav_desc, + handle.cpu_handle); + } + + m_dirty_bits &= ~DirtyState_ComputeImageTexture; + m_state.compute_uav_descriptor_base = handle.gpu_handle; + return true; +} + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/Renderer.h b/Source/Core/VideoBackends/D3D12/Renderer.h new file mode 100644 index 0000000000..60e0000cff --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/Renderer.h @@ -0,0 +1,172 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once +#include +#include "VideoBackends/D3D12/DescriptorHeapManager.h" +#include "VideoCommon/RenderBase.h" + +namespace DX12 +{ +class BoundingBox; +class DXFramebuffer; +class DXTexture; +class DXShader; +class DXPipeline; +class SwapChain; + +class Renderer final : public ::Renderer +{ +public: + Renderer(std::unique_ptr swap_chain, float backbuffer_scale); + ~Renderer() override; + + static Renderer* GetInstance() { return static_cast(g_renderer.get()); } + + bool IsHeadless() const override; + + bool Initialize() override; + void Shutdown() override; + + std::unique_ptr CreateTexture(const TextureConfig& config) override; + std::unique_ptr + CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; + std::unique_ptr + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; + + std::unique_ptr CreateShaderFromSource(ShaderStage stage, const char* source, + size_t length) override; + std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, + size_t length) override; + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) override; + + u16 BBoxRead(int index) override; + void BBoxWrite(int index, u16 value) override; + void BBoxFlush() override; + + void Flush() override; + void WaitForGPUIdle() override; + + void ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable, + u32 color, u32 z) override; + + void SetPipeline(const AbstractPipeline* pipeline) override; + void SetFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {}, + float depth_value = 0.0f) override; + void SetScissorRect(const MathUtil::Rectangle& rc) override; + void SetTexture(u32 index, const AbstractTexture* texture) override; + void SetSamplerState(u32 index, const SamplerState& state) override; + void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override; + void UnbindTexture(const AbstractTexture* texture) override; + void SetViewport(float x, float y, float width, float height, float near_depth, + float far_depth) override; + void Draw(u32 base_vertex, u32 num_vertices) override; + void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) override; + void BindBackbuffer(const ClearColor& clear_color = {}) override; + void PresentBackbuffer() override; + + // Completes the current render pass, executes the command buffer, and restores state ready for + // next render. Use when you want to kick the current buffer to make room for new data. + void ExecuteCommandList(bool wait_for_completion); + + // Setting constant buffer handles. + void SetConstantBuffer(u32 index, D3D12_GPU_VIRTUAL_ADDRESS address); + + // Setting textures via descriptor handles. This is assumed to be in the shadow heap. + void SetTextureDescriptor(u32 index, D3D12_CPU_DESCRIPTOR_HANDLE handle); + + // Pixel shader UAV. + void SetPixelShaderUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle); + + // Graphics vertex/index buffer binding. + void SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size); + void SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format); + +protected: + void OnConfigChanged(u32 bits) override; + +private: + static const u32 MAX_TEXTURES = 8; + static const u32 NUM_CONSTANT_BUFFERS = 3; + + // Dirty bits + enum DirtyStates + { + DirtyState_Framebuffer = (1 << 0), + DirtyState_Pipeline = (1 << 1), + DirtyState_Textures = (1 << 2), + DirtyState_Samplers = (1 << 3), + DirtyState_Viewport = (1 << 4), + DirtyState_ScissorRect = (1 << 5), + DirtyState_ComputeImageTexture = (1 << 6), + DirtyState_PS_UAV = (1 << 7), + DirtyState_PS_CBV = (1 << 8), + DirtyState_VS_CBV = (1 << 9), + DirtyState_GS_CBV = (1 << 10), + DirtyState_SRV_Descriptor = (1 << 11), + DirtyState_Sampler_Descriptor = (1 << 12), + DirtyState_UAV_Descriptor = (1 << 13), + DirtyState_VertexBuffer = (1 << 14), + DirtyState_IndexBuffer = (1 << 15), + DirtyState_PrimitiveTopology = (1 << 16), + DirtyState_RootSignature = (1 << 17), + DirtyState_ComputeRootSignature = (1 << 18), + DirtyState_DescriptorHeaps = (1 << 19), + + DirtyState_All = + DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Textures | DirtyState_Samplers | + DirtyState_Viewport | DirtyState_ScissorRect | DirtyState_ComputeImageTexture | + DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV | DirtyState_GS_CBV | + DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor | + DirtyState_VertexBuffer | DirtyState_IndexBuffer | DirtyState_PrimitiveTopology | + DirtyState_RootSignature | DirtyState_ComputeRootSignature | DirtyState_DescriptorHeaps + }; + + void CheckForSwapChainChanges(); + + // Binds all dirty state + bool ApplyState(); + void BindFramebuffer(DXFramebuffer* fb); + void SetRootSignatures(); + void SetDescriptorHeaps(); + void UpdateDescriptorTables(); + bool UpdateSRVDescriptorTable(); + bool UpdateUAVDescriptorTable(); + bool UpdateComputeUAVDescriptorTable(); + bool UpdateSamplerDescriptorTable(); + + // Owned objects + std::unique_ptr m_swap_chain; + std::unique_ptr m_bounding_box; + + // Current state + struct + { + ID3D12RootSignature* root_signature = nullptr; + DXShader* compute_shader = nullptr; + std::array constant_buffers = {}; + std::array textures = {}; + D3D12_CPU_DESCRIPTOR_HANDLE ps_uav = {}; + SamplerStateSet samplers = {}; + const DXTexture* compute_image_texture = nullptr; + D3D12_VIEWPORT viewport = {}; + D3D12_RECT scissor = {}; + D3D12_GPU_DESCRIPTOR_HANDLE srv_descriptor_base = {}; + D3D12_GPU_DESCRIPTOR_HANDLE sampler_descriptor_base = {}; + D3D12_GPU_DESCRIPTOR_HANDLE uav_descriptor_base = {}; + D3D12_GPU_DESCRIPTOR_HANDLE compute_uav_descriptor_base = {}; + D3D12_VERTEX_BUFFER_VIEW vertex_buffer = {}; + D3D12_INDEX_BUFFER_VIEW index_buffer = {}; + D3D12_PRIMITIVE_TOPOLOGY primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + bool using_integer_rtv = false; + } m_state; + u32 m_dirty_bits = DirtyState_All; +}; +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/StreamBuffer.cpp b/Source/Core/VideoBackends/D3D12/StreamBuffer.cpp new file mode 100644 index 0000000000..a3702a7256 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/StreamBuffer.cpp @@ -0,0 +1,249 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/D3D12/StreamBuffer.h" + +#include +#include + +#include "Common/Align.h" +#include "Common/Assert.h" +#include "Common/MsgHandler.h" + +#include "VideoBackends/D3D12/DXContext.h" + +namespace DX12 +{ +StreamBuffer::StreamBuffer() = default; + +StreamBuffer::~StreamBuffer() +{ + if (m_host_pointer) + { + const D3D12_RANGE written_range = {0, m_size}; + m_buffer->Unmap(0, &written_range); + } + + // These get destroyed at shutdown anyway, so no need to defer destruction. + if (m_buffer) + m_buffer->Release(); +} + +bool StreamBuffer::AllocateBuffer(u32 size) +{ + static const D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_UPLOAD}; + const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + size, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + + HRESULT hr = g_dx_context->GetDevice()->CreateCommittedResource( + &heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, IID_PPV_ARGS(&m_buffer)); + CHECK(SUCCEEDED(hr), "Allocate buffer"); + if (FAILED(hr)) + return false; + + static const D3D12_RANGE read_range = {}; + hr = m_buffer->Map(0, &read_range, reinterpret_cast(&m_host_pointer)); + CHECK(SUCCEEDED(hr), "Map buffer"); + if (FAILED(hr)) + return false; + + m_size = size; + m_gpu_pointer = m_buffer->GetGPUVirtualAddress(); + m_current_offset = 0; + m_current_gpu_position = 0; + m_tracked_fences.clear(); + return true; +} + +bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) +{ + const u32 required_bytes = num_bytes + alignment; + + // Check for sane allocations + if (required_bytes > m_size) + { + PanicAlert("Attempting to allocate %u bytes from a %u byte stream buffer", + static_cast(num_bytes), static_cast(m_size)); + + return false; + } + + // Is the GPU behind or up to date with our current offset? + UpdateCurrentFencePosition(); + if (m_current_offset >= m_current_gpu_position) + { + const u32 remaining_bytes = m_size - m_current_offset; + if (required_bytes <= remaining_bytes) + { + // Place at the current position, after the GPU position. + m_current_offset = Common::AlignUp(m_current_offset, alignment); + m_last_allocation_size = num_bytes; + return true; + } + + // Check for space at the start of the buffer + // We use < here because we don't want to have the case of m_current_offset == + // m_current_gpu_position. That would mean the code above would assume the + // GPU has caught up to us, which it hasn't. + if (required_bytes < m_current_gpu_position) + { + // Reset offset to zero, since we're allocating behind the gpu now + m_current_offset = 0; + m_last_allocation_size = num_bytes; + return true; + } + } + else + { + // We have from m_current_offset..m_current_gpu_position space to use. + const u32 remaining_bytes = m_current_gpu_position - m_current_offset; + if (required_bytes < remaining_bytes) + { + // Place at the current position, since this is still behind the GPU. + m_current_offset = Common::AlignUp(m_current_offset, alignment); + m_last_allocation_size = num_bytes; + return true; + } + } + + // Can we find a fence to wait on that will give us enough memory? + if (WaitForClearSpace(required_bytes)) + { + m_current_offset = Common::AlignUp(m_current_offset, alignment); + m_last_allocation_size = num_bytes; + return true; + } + + // We tried everything we could, and still couldn't get anything. This means that too much space + // in the buffer is being used by the command buffer currently being recorded. Therefore, the + // only option is to execute it, and wait until it's done. + return false; +} + +void StreamBuffer::CommitMemory(u32 final_num_bytes) +{ + ASSERT((m_current_offset + final_num_bytes) <= m_size); + ASSERT(final_num_bytes <= m_last_allocation_size); + m_current_offset += final_num_bytes; +} + +void StreamBuffer::UpdateCurrentFencePosition() +{ + // Don't create a tracking entry if the GPU is caught up with the buffer. + if (m_current_offset == m_current_gpu_position) + return; + + // Has the offset changed since the last fence? + const u64 fence = g_dx_context->GetCurrentFenceValue(); + if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence) + { + // Still haven't executed a command buffer, so just update the offset. + m_tracked_fences.back().second = m_current_offset; + return; + } + + UpdateGPUPosition(); + m_tracked_fences.emplace_back(fence, m_current_offset); +} + +void StreamBuffer::UpdateGPUPosition() +{ + auto start = m_tracked_fences.begin(); + auto end = start; + + const u64 completed_counter = g_dx_context->GetCompletedFenceValue(); + while (end != m_tracked_fences.end() && completed_counter >= end->first) + { + m_current_gpu_position = end->second; + ++end; + } + + if (start != end) + m_tracked_fences.erase(start, end); +} + +bool StreamBuffer::WaitForClearSpace(u32 num_bytes) +{ + u32 new_offset = 0; + u32 new_gpu_position = 0; + + auto iter = m_tracked_fences.begin(); + for (; iter != m_tracked_fences.end(); iter++) + { + // Would this fence bring us in line with the GPU? + // This is the "last resort" case, where a command buffer execution has been forced + // after no additional data has been written to it, so we can assume that after the + // fence has been signaled the entire buffer is now consumed. + u32 gpu_position = iter->second; + if (m_current_offset == gpu_position) + { + new_offset = 0; + new_gpu_position = 0; + break; + } + + // Assuming that we wait for this fence, are we allocating in front of the GPU? + if (m_current_offset > gpu_position) + { + // This would suggest the GPU has now followed us and wrapped around, so we have from + // m_current_position..m_size free, as well as and 0..gpu_position. + const u32 remaining_space_after_offset = m_size - m_current_offset; + if (remaining_space_after_offset >= num_bytes) + { + // Switch to allocating in front of the GPU, using the remainder of the buffer. + new_offset = m_current_offset; + new_gpu_position = gpu_position; + break; + } + + // We can wrap around to the start, behind the GPU, if there is enough space. + // We use > here because otherwise we'd end up lining up with the GPU, and then the + // allocator would assume that the GPU has consumed what we just wrote. + if (gpu_position > num_bytes) + { + new_offset = 0; + new_gpu_position = gpu_position; + break; + } + } + else + { + // We're currently allocating behind the GPU. This would give us between the current + // offset and the GPU position worth of space to work with. Again, > because we can't + // align the GPU position with the buffer offset. + u32 available_space_inbetween = gpu_position - m_current_offset; + if (available_space_inbetween > num_bytes) + { + // Leave the offset as-is, but update the GPU position. + new_offset = m_current_offset; + new_gpu_position = gpu_position; + break; + } + } + } + + // Did any fences satisfy this condition? + // Has the command buffer been executed yet? If not, the caller should execute it. + if (iter == m_tracked_fences.end() || iter->first == g_dx_context->GetCurrentFenceValue()) + return false; + + // Wait until this fence is signaled. This will fire the callback, updating the GPU position. + g_dx_context->WaitForFence(iter->first); + m_tracked_fences.erase(m_tracked_fences.begin(), + m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); + m_current_offset = new_offset; + m_current_gpu_position = new_gpu_position; + return true; +} + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/StreamBuffer.h b/Source/Core/VideoBackends/D3D12/StreamBuffer.h new file mode 100644 index 0000000000..0f17bd35ac --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/StreamBuffer.h @@ -0,0 +1,56 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "Common/CommonTypes.h" +#include "VideoBackends/D3D12/Common.h" + +namespace DX12 +{ +class StreamBuffer +{ +public: + StreamBuffer(); + ~StreamBuffer(); + + bool AllocateBuffer(u32 size); + + ID3D12Resource* GetBuffer() const { return m_buffer; } + D3D12_GPU_VIRTUAL_ADDRESS GetGPUPointer() const { return m_gpu_pointer; } + u8* GetHostPointer() const { return m_host_pointer; } + u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; } + D3D12_GPU_VIRTUAL_ADDRESS GetCurrentGPUPointer() const + { + return m_gpu_pointer + m_current_offset; + } + u32 GetSize() const { return m_size; } + u32 GetCurrentOffset() const { return m_current_offset; } + bool ReserveMemory(u32 num_bytes, u32 alignment); + void CommitMemory(u32 final_num_bytes); + +private: + void UpdateCurrentFencePosition(); + void UpdateGPUPosition(); + + // Waits for as many fences as needed to allocate num_bytes bytes from the buffer. + bool WaitForClearSpace(u32 num_bytes); + + u32 m_size = 0; + u32 m_current_offset = 0; + u32 m_current_gpu_position = 0; + u32 m_last_allocation_size = 0; + + ID3D12Resource* m_buffer = nullptr; + D3D12_GPU_VIRTUAL_ADDRESS m_gpu_pointer = {}; + u8* m_host_pointer = nullptr; + + // List of fences and the corresponding positions in the buffer + std::deque> m_tracked_fences; +}; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/SwapChain.cpp b/Source/Core/VideoBackends/D3D12/SwapChain.cpp new file mode 100644 index 0000000000..6601004e16 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/SwapChain.cpp @@ -0,0 +1,76 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/D3D12/SwapChain.h" +#include "VideoBackends/D3D12/DXContext.h" +#include "VideoBackends/D3D12/DXTexture.h" + +namespace DX12 +{ +SwapChain::SwapChain(const WindowSystemInfo& wsi, IDXGIFactory2* dxgi_factory, + ID3D12CommandQueue* d3d_command_queue) + : D3DCommon::SwapChain(wsi, dxgi_factory, d3d_command_queue) +{ +} + +SwapChain::~SwapChain() = default; + +std::unique_ptr SwapChain::Create(const WindowSystemInfo& wsi) +{ + std::unique_ptr swap_chain = std::make_unique( + wsi, g_dx_context->GetDXGIFactory(), g_dx_context->GetCommandQueue()); + if (!swap_chain->CreateSwapChain(WantsStereo())) + return nullptr; + + return swap_chain; +} + +bool SwapChain::CreateSwapChainBuffers() +{ + for (u32 i = 0; i < SWAP_CHAIN_BUFFER_COUNT; i++) + { + ComPtr resource; + HRESULT hr = m_swap_chain->GetBuffer(i, IID_PPV_ARGS(&resource)); + CHECK(SUCCEEDED(hr), "Get swap chain buffer"); + + BufferResources buffer; + buffer.texture = DXTexture::CreateAdopted(resource.Get()); + CHECK(buffer.texture, "Create swap chain buffer texture"); + if (!buffer.texture) + return false; + + buffer.framebuffer = DXFramebuffer::Create(buffer.texture.get(), nullptr); + CHECK(buffer.texture, "Create swap chain buffer framebuffer"); + if (!buffer.framebuffer) + return false; + + m_buffers.push_back(std::move(buffer)); + } + + m_current_buffer = 0; + return true; +} + +void SwapChain::DestroySwapChainBuffers() +{ + // Swap chain textures must be released before it can be resized, therefore we need to destroy all + // of them immediately, and not place them onto the deferred desturction queue. + for (BufferResources& res : m_buffers) + { + res.framebuffer.reset(); + res.texture->DestroyResource(); + res.texture.release(); + } + m_buffers.clear(); +} + +bool SwapChain::Present() +{ + if (!D3DCommon::SwapChain::Present()) + return false; + + m_current_buffer = (m_current_buffer + 1) % SWAP_CHAIN_BUFFER_COUNT; + return true; +} +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/SwapChain.h b/Source/Core/VideoBackends/D3D12/SwapChain.h new file mode 100644 index 0000000000..8291c32c7b --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/SwapChain.h @@ -0,0 +1,55 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include + +#include "Common/CommonTypes.h" +#include "Common/WindowSystemInfo.h" +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3DCommon/SwapChain.h" +#include "VideoCommon/TextureConfig.h" + +namespace DX12 +{ +class DXTexture; +class DXFramebuffer; + +class SwapChain : public D3DCommon::SwapChain +{ +public: + SwapChain(const WindowSystemInfo& wsi, IDXGIFactory2* dxgi_factory, + ID3D12CommandQueue* d3d_command_queue); + ~SwapChain(); + + static std::unique_ptr Create(const WindowSystemInfo& wsi); + + bool Present() override; + + DXTexture* GetCurrentTexture() const { return m_buffers[m_current_buffer].texture.get(); } + DXFramebuffer* GetCurrentFramebuffer() const + { + return m_buffers[m_current_buffer].framebuffer.get(); + } + +protected: + bool CreateSwapChainBuffers() override; + void DestroySwapChainBuffers() override; + +private: + struct BufferResources + { + std::unique_ptr texture; + std::unique_ptr framebuffer; + }; + + std::vector m_buffers; + u32 m_current_buffer = 0; +}; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/VertexManager.cpp b/Source/Core/VideoBackends/D3D12/VertexManager.cpp new file mode 100644 index 0000000000..de5facc7a8 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/VertexManager.cpp @@ -0,0 +1,321 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/CommonTypes.h" + +#include "VideoBackends/D3D12/VertexManager.h" + +#include "Common/Align.h" +#include "Common/CommonTypes.h" +#include "Common/Logging/Log.h" +#include "Common/MsgHandler.h" + +#include "VideoBackends/D3D12/DXContext.h" +#include "VideoBackends/D3D12/Renderer.h" +#include "VideoBackends/D3D12/StreamBuffer.h" + +#include "VideoCommon/GeometryShaderManager.h" +#include "VideoCommon/IndexGenerator.h" +#include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexShaderManager.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ +VertexManager::VertexManager() = default; + +VertexManager::~VertexManager() = default; + +bool VertexManager::Initialize() +{ + if (!m_vertex_stream_buffer.AllocateBuffer(VERTEX_STREAM_BUFFER_SIZE) || + !m_index_stream_buffer.AllocateBuffer(INDEX_STREAM_BUFFER_SIZE) || + !m_uniform_stream_buffer.AllocateBuffer(UNIFORM_STREAM_BUFFER_SIZE) || + !m_texel_stream_buffer.AllocateBuffer(TEXEL_STREAM_BUFFER_SIZE)) + { + PanicAlert("Failed to allocate streaming buffers"); + return false; + } + + static constexpr std::array, NUM_TEXEL_BUFFER_FORMATS> + format_mapping = {{ + {TEXEL_BUFFER_FORMAT_R8_UINT, DXGI_FORMAT_R8_UINT}, + {TEXEL_BUFFER_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT}, + {TEXEL_BUFFER_FORMAT_RGBA8_UINT, DXGI_FORMAT_R8G8B8A8_UINT}, + {TEXEL_BUFFER_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_UINT}, + }}; + for (const auto& it : format_mapping) + { + DescriptorHandle& dh = m_texel_buffer_views[it.first]; + if (!g_dx_context->GetDescriptorHeapManager().Allocate(&dh)) + { + PanicAlert("Failed to allocate descriptor for texel buffer"); + return false; + } + + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {it.second, D3D12_SRV_DIMENSION_BUFFER, + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; + srv_desc.Buffer.NumElements = + m_texel_stream_buffer.GetSize() / GetTexelBufferElementSize(it.first); + g_dx_context->GetDevice()->CreateShaderResourceView(m_texel_stream_buffer.GetBuffer(), + &srv_desc, dh.cpu_handle); + } + + UploadAllConstants(); + return true; +} + +void VertexManager::ResetBuffer(u32 vertex_stride) +{ + // Attempt to allocate from buffers + bool has_vbuffer_allocation = m_vertex_stream_buffer.ReserveMemory(MAXVBUFFERSIZE, vertex_stride); + bool has_ibuffer_allocation = + m_index_stream_buffer.ReserveMemory(MAXIBUFFERSIZE * sizeof(u16), sizeof(u16)); + if (!has_vbuffer_allocation || !has_ibuffer_allocation) + { + // Flush any pending commands first, so that we can wait on the fences + WARN_LOG(VIDEO, "Executing command list while waiting for space in vertex/index buffer"); + Renderer::GetInstance()->ExecuteCommandList(false); + + // Attempt to allocate again, this may cause a fence wait + if (!has_vbuffer_allocation) + has_vbuffer_allocation = m_vertex_stream_buffer.ReserveMemory(MAXVBUFFERSIZE, vertex_stride); + if (!has_ibuffer_allocation) + has_ibuffer_allocation = + m_index_stream_buffer.ReserveMemory(MAXIBUFFERSIZE * sizeof(u16), sizeof(u16)); + + // If we still failed, that means the allocation was too large and will never succeed, so panic + if (!has_vbuffer_allocation || !has_ibuffer_allocation) + PanicAlert("Failed to allocate space in streaming buffers for pending draw"); + } + + // Update pointers + m_base_buffer_pointer = m_vertex_stream_buffer.GetHostPointer(); + m_end_buffer_pointer = m_vertex_stream_buffer.GetCurrentHostPointer() + MAXVBUFFERSIZE; + m_cur_buffer_pointer = m_vertex_stream_buffer.GetCurrentHostPointer(); + IndexGenerator::Start(reinterpret_cast(m_index_stream_buffer.GetCurrentHostPointer())); +} + +void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) +{ + const u32 vertex_data_size = num_vertices * vertex_stride; + const u32 index_data_size = num_indices * sizeof(u16); + + *out_base_vertex = + vertex_stride > 0 ? (m_vertex_stream_buffer.GetCurrentOffset() / vertex_stride) : 0; + *out_base_index = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16); + + m_vertex_stream_buffer.CommitMemory(vertex_data_size); + m_index_stream_buffer.CommitMemory(index_data_size); + + ADDSTAT(stats.thisFrame.bytesVertexStreamed, static_cast(vertex_data_size)); + ADDSTAT(stats.thisFrame.bytesIndexStreamed, static_cast(index_data_size)); + + Renderer::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), vertex_stride, + m_vertex_stream_buffer.GetSize()); + Renderer::GetInstance()->SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), + m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT); +} + +void VertexManager::UploadUniforms() +{ + UpdateVertexShaderConstants(); + UpdateGeometryShaderConstants(); + UpdatePixelShaderConstants(); +} + +void VertexManager::UpdateVertexShaderConstants() +{ + if (!VertexShaderManager::dirty || !ReserveConstantStorage()) + return; + + Renderer::GetInstance()->SetConstantBuffer(1, m_uniform_stream_buffer.GetCurrentGPUPointer()); + std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), &VertexShaderManager::constants, + sizeof(VertexShaderConstants)); + m_uniform_stream_buffer.CommitMemory(sizeof(VertexShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(VertexShaderConstants)); + VertexShaderManager::dirty = false; +} + +void VertexManager::UpdateGeometryShaderConstants() +{ + if (!GeometryShaderManager::dirty || !ReserveConstantStorage()) + return; + + Renderer::GetInstance()->SetConstantBuffer(2, m_uniform_stream_buffer.GetCurrentGPUPointer()); + std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), &GeometryShaderManager::constants, + sizeof(GeometryShaderConstants)); + m_uniform_stream_buffer.CommitMemory(sizeof(GeometryShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(GeometryShaderConstants)); + GeometryShaderManager::dirty = false; +} + +void VertexManager::UpdatePixelShaderConstants() +{ + if (!PixelShaderManager::dirty || !ReserveConstantStorage()) + return; + + Renderer::GetInstance()->SetConstantBuffer(0, m_uniform_stream_buffer.GetCurrentGPUPointer()); + std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), &PixelShaderManager::constants, + sizeof(PixelShaderConstants)); + m_uniform_stream_buffer.CommitMemory(sizeof(PixelShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants)); + PixelShaderManager::dirty = false; +} + +bool VertexManager::ReserveConstantStorage() +{ + static constexpr u32 reserve_size = + static_cast(std::max({sizeof(PixelShaderConstants), sizeof(VertexShaderConstants), + sizeof(GeometryShaderConstants)})); + if (m_uniform_stream_buffer.ReserveMemory(reserve_size, + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) + { + return true; + } + + // The only places that call constant updates are safe to have state restored. + WARN_LOG(VIDEO, "Executing command list while waiting for space in uniform buffer"); + Renderer::GetInstance()->ExecuteCommandList(false); + + // Since we are on a new command buffer, all constants have been invalidated, and we need + // to reupload them. We may as well do this now, since we're issuing a draw anyway. + UploadAllConstants(); + return false; +} + +void VertexManager::UploadAllConstants() +{ + // We are free to re-use parts of the buffer now since we're uploading all constants. + const u32 pixel_constants_offset = 0; + const u32 vertex_constants_offset = + Common::AlignUp(pixel_constants_offset + sizeof(PixelShaderConstants), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + const u32 geometry_constants_offset = + Common::AlignUp(vertex_constants_offset + sizeof(VertexShaderConstants), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + const u32 allocation_size = geometry_constants_offset + sizeof(GeometryShaderConstants); + + // Allocate everything at once. + // We should only be here if the buffer was full and a command buffer was submitted anyway. + if (!m_uniform_stream_buffer.ReserveMemory(allocation_size, + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) + { + PanicAlert("Failed to allocate space for constants in streaming buffer"); + return; + } + + // Update bindings + Renderer::GetInstance()->SetConstantBuffer(0, m_uniform_stream_buffer.GetCurrentGPUPointer() + + pixel_constants_offset); + Renderer::GetInstance()->SetConstantBuffer(1, m_uniform_stream_buffer.GetCurrentGPUPointer() + + vertex_constants_offset); + Renderer::GetInstance()->SetConstantBuffer(2, m_uniform_stream_buffer.GetCurrentGPUPointer() + + geometry_constants_offset); + + // Copy the actual data in + std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer() + pixel_constants_offset, + &PixelShaderManager::constants, sizeof(PixelShaderConstants)); + std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer() + vertex_constants_offset, + &VertexShaderManager::constants, sizeof(VertexShaderConstants)); + std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer() + geometry_constants_offset, + &GeometryShaderManager::constants, sizeof(GeometryShaderConstants)); + + // Finally, flush buffer memory after copying + m_uniform_stream_buffer.CommitMemory(allocation_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, allocation_size); + + // Clear dirty flags + VertexShaderManager::dirty = false; + GeometryShaderManager::dirty = false; + PixelShaderManager::dirty = false; +} + +void VertexManager::UploadUtilityUniforms(const void* data, u32 data_size) +{ + InvalidateConstants(); + if (!m_uniform_stream_buffer.ReserveMemory(data_size, + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) + { + WARN_LOG(VIDEO, "Executing command buffer while waiting for ext space in uniform buffer"); + Renderer::GetInstance()->ExecuteCommandList(false); + } + + Renderer::GetInstance()->SetConstantBuffer(0, m_uniform_stream_buffer.GetCurrentGPUPointer()); + Renderer::GetInstance()->SetConstantBuffer(1, m_uniform_stream_buffer.GetCurrentGPUPointer()); + Renderer::GetInstance()->SetConstantBuffer(2, m_uniform_stream_buffer.GetCurrentGPUPointer()); + std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), data, data_size); + m_uniform_stream_buffer.CommitMemory(data_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); +} + +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) +{ + if (data_size > m_texel_stream_buffer.GetSize()) + return false; + + const u32 elem_size = GetTexelBufferElementSize(format); + if (!m_texel_stream_buffer.ReserveMemory(data_size, elem_size)) + { + // Try submitting cmdbuffer. + WARN_LOG(VIDEO, "Submitting command buffer while waiting for space in texel buffer"); + Renderer::GetInstance()->ExecuteCommandList(false); + if (!m_texel_stream_buffer.ReserveMemory(data_size, elem_size)) + { + PanicAlert("Failed to allocate %u bytes from texel buffer", data_size); + return false; + } + } + + std::memcpy(m_texel_stream_buffer.GetCurrentHostPointer(), data, data_size); + *out_offset = static_cast(m_texel_stream_buffer.GetCurrentOffset()) / elem_size; + m_texel_stream_buffer.CommitMemory(data_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); + Renderer::GetInstance()->SetTextureDescriptor(0, m_texel_buffer_views[format].cpu_handle); + return true; +} + +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset, const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) +{ + const u32 elem_size = GetTexelBufferElementSize(format); + const u32 palette_elem_size = GetTexelBufferElementSize(palette_format); + const u32 reserve_size = data_size + palette_size + palette_elem_size; + if (reserve_size > m_texel_stream_buffer.GetSize()) + return false; + + if (!m_texel_stream_buffer.ReserveMemory(reserve_size, elem_size)) + { + // Try submitting cmdbuffer. + WARN_LOG(VIDEO, "Submitting command buffer while waiting for space in texel buffer"); + Renderer::GetInstance()->ExecuteCommandList(false); + if (!m_texel_stream_buffer.ReserveMemory(reserve_size, elem_size)) + { + PanicAlert("Failed to allocate %u bytes from texel buffer", reserve_size); + return false; + } + } + + const u32 palette_byte_offset = Common::AlignUp(data_size, palette_elem_size); + std::memcpy(m_texel_stream_buffer.GetCurrentHostPointer(), data, data_size); + std::memcpy(m_texel_stream_buffer.GetCurrentHostPointer() + palette_byte_offset, palette_data, + palette_size); + *out_offset = static_cast(m_texel_stream_buffer.GetCurrentOffset()) / elem_size; + *out_palette_offset = + (static_cast(m_texel_stream_buffer.GetCurrentOffset()) + palette_byte_offset) / + palette_elem_size; + + m_texel_stream_buffer.CommitMemory(palette_byte_offset + palette_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, palette_byte_offset + palette_size); + Renderer::GetInstance()->SetTextureDescriptor(0, m_texel_buffer_views[format].cpu_handle); + Renderer::GetInstance()->SetTextureDescriptor(1, m_texel_buffer_views[palette_format].cpu_handle); + return true; +} + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/VertexManager.h b/Source/Core/VideoBackends/D3D12/VertexManager.h new file mode 100644 index 0000000000..c609b49bd0 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/VertexManager.h @@ -0,0 +1,52 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include "VideoBackends/D3D12/DescriptorHeapManager.h" +#include "VideoBackends/D3D12/StreamBuffer.h" +#include "VideoCommon/VertexManagerBase.h" + +namespace DX12 +{ +class VertexManager final : public VertexManagerBase +{ +public: + VertexManager(); + ~VertexManager(); + + bool Initialize() override; + + void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset, + const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) override; + +protected: + void ResetBuffer(u32 vertex_stride) override; + void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, + u32* out_base_index) override; + void UploadUniforms() override; + + void UpdateVertexShaderConstants(); + void UpdateGeometryShaderConstants(); + void UpdatePixelShaderConstants(); + + // Allocates storage in the uniform buffer of the specified size. If this storage cannot be + // allocated immediately, the current command buffer will be submitted and all stage's + // constants will be re-uploaded. false will be returned in this case, otherwise true. + bool ReserveConstantStorage(); + void UploadAllConstants(); + + StreamBuffer m_vertex_stream_buffer; + StreamBuffer m_index_stream_buffer; + StreamBuffer m_uniform_stream_buffer; + StreamBuffer m_texel_stream_buffer; + std::array m_texel_buffer_views = {}; +}; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp new file mode 100644 index 0000000000..0ba778471b --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp @@ -0,0 +1,166 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "Common/CommonTypes.h" +#include "Common/StringUtil.h" + +#include "Core/ConfigManager.h" + +#include "VideoBackends/D3D12/Common.h" +#include "VideoBackends/D3D12/DXContext.h" +#include "VideoBackends/D3D12/PerfQuery.h" +#include "VideoBackends/D3D12/Renderer.h" +#include "VideoBackends/D3D12/SwapChain.h" +#include "VideoBackends/D3D12/VertexManager.h" +#include "VideoBackends/D3D12/VideoBackend.h" + +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/ShaderCache.h" +#include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/VideoCommon.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ +std::string VideoBackend::GetName() const +{ + return "D3D12"; +} + +std::string VideoBackend::GetDisplayName() const +{ + return "Direct3D 12"; +} + +void VideoBackend::InitBackendInfo() +{ + if (!D3DCommon::LoadLibraries()) + return; + + FillBackendInfo(); + D3DCommon::UnloadLibraries(); +} + +void VideoBackend::FillBackendInfo() +{ + g_Config.backend_info.api_type = APIType::D3D; + g_Config.backend_info.bUsesLowerLeftOrigin = false; + g_Config.backend_info.bSupportsExclusiveFullscreen = true; + g_Config.backend_info.bSupportsDualSourceBlend = true; + g_Config.backend_info.bSupportsPrimitiveRestart = true; + g_Config.backend_info.bSupportsOversizedViewports = false; + g_Config.backend_info.bSupportsGeometryShaders = true; + g_Config.backend_info.bSupports3DVision = false; + g_Config.backend_info.bSupportsEarlyZ = true; + g_Config.backend_info.bSupportsBindingLayout = false; + g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsGSInstancing = true; + g_Config.backend_info.bSupportsPaletteConversion = true; + g_Config.backend_info.bSupportsPostProcessing = true; + g_Config.backend_info.bSupportsClipControl = true; + g_Config.backend_info.bSupportsSSAA = true; + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; + g_Config.backend_info.bSupportsDepthClamp = true; + g_Config.backend_info.bSupportsReversedDepthRange = false; + g_Config.backend_info.bSupportsComputeShaders = true; + g_Config.backend_info.bSupportsLogicOp = true; + g_Config.backend_info.bSupportsMultithreading = true; + g_Config.backend_info.bSupportsGPUTextureDecoding = true; + g_Config.backend_info.bSupportsST3CTextures = false; + g_Config.backend_info.bSupportsCopyToVram = true; + g_Config.backend_info.bSupportsBitfield = false; + g_Config.backend_info.bSupportsDynamicSamplerIndexing = false; + g_Config.backend_info.bSupportsBPTCTextures = false; + g_Config.backend_info.bSupportsFramebufferFetch = false; + g_Config.backend_info.bSupportsBackgroundCompiling = true; + g_Config.backend_info.bSupportsLargePoints = false; + g_Config.backend_info.bSupportsPartialDepthCopies = false; + g_Config.backend_info.Adapters = D3DCommon::GetAdapterNames(); + g_Config.backend_info.AAModes = DXContext::GetAAModes(g_Config.iAdapter); + + // We can only check texture support once we have a device. + if (g_dx_context) + { + g_Config.backend_info.bSupportsST3CTextures = + g_dx_context->SupportsTextureFormat(DXGI_FORMAT_BC1_UNORM) && + g_dx_context->SupportsTextureFormat(DXGI_FORMAT_BC2_UNORM) && + g_dx_context->SupportsTextureFormat(DXGI_FORMAT_BC3_UNORM); + g_Config.backend_info.bSupportsBPTCTextures = + g_dx_context->SupportsTextureFormat(DXGI_FORMAT_BC7_UNORM); + } +} + +bool VideoBackend::Initialize(const WindowSystemInfo& wsi) +{ + if (!DXContext::Create(g_Config.iAdapter, g_Config.bEnableValidationLayer)) + { + PanicAlert("Failed to create D3D12 context"); + return false; + } + + FillBackendInfo(); + InitializeShared(); + + if (!g_dx_context->CreateGlobalResources()) + { + PanicAlert("Failed to create D3D12 global resources"); + DXContext::Destroy(); + ShutdownShared(); + return false; + } + + std::unique_ptr swap_chain; + if (wsi.render_surface && !(swap_chain = SwapChain::Create(wsi))) + { + PanicAlertT("Failed to create D3D swap chain"); + DXContext::Destroy(); + ShutdownShared(); + return false; + } + + // Create main wrapper instances. + g_renderer = std::make_unique(std::move(swap_chain), wsi.render_surface_scale); + g_vertex_manager = std::make_unique(); + g_shader_cache = std::make_unique(); + g_framebuffer_manager = std::make_unique(); + g_texture_cache = std::make_unique(); + g_perf_query = std::make_unique(); + + if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() || + !g_renderer->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize() || !PerfQuery::GetInstance()->Initialize()) + { + PanicAlert("Failed to initialize renderer classes"); + Shutdown(); + return false; + } + + g_shader_cache->InitializeShaderCache(); + return true; +} + +void VideoBackend::Shutdown() +{ + // Keep the debug runtime happy... + if (g_renderer) + Renderer::GetInstance()->ExecuteCommandList(true); + + if (g_shader_cache) + g_shader_cache->Shutdown(); + + if (g_renderer) + g_renderer->Shutdown(); + + g_perf_query.reset(); + g_texture_cache.reset(); + g_framebuffer_manager.reset(); + g_shader_cache.reset(); + g_vertex_manager.reset(); + g_renderer.reset(); + DXContext::Destroy(); + ShutdownShared(); +} +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/VideoBackend.h b/Source/Core/VideoBackends/D3D12/VideoBackend.h new file mode 100644 index 0000000000..f3fe4c4f89 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/VideoBackend.h @@ -0,0 +1,25 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include "VideoCommon/VideoBackendBase.h" + +namespace DX12 +{ +class VideoBackend final : public VideoBackendBase +{ +public: + bool Initialize(const WindowSystemInfo& wsi) override; + void Shutdown() override; + + std::string GetName() const override; + std::string GetDisplayName() const override; + void InitBackendInfo() override; + +private: + void FillBackendInfo(); +}; +} diff --git a/Source/Core/VideoCommon/VideoBackendBase.cpp b/Source/Core/VideoCommon/VideoBackendBase.cpp index f310f68920..d09b32cf82 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.cpp +++ b/Source/Core/VideoCommon/VideoBackendBase.cpp @@ -21,6 +21,7 @@ // TODO: ugly #ifdef _WIN32 #include "VideoBackends/D3D/VideoBackend.h" +#include "VideoBackends/D3D12/VideoBackend.h" #endif #include "VideoBackends/Null/VideoBackend.h" #include "VideoBackends/OGL/VideoBackend.h" @@ -184,6 +185,7 @@ void VideoBackendBase::PopulateList() g_available_video_backends.push_back(std::make_unique()); #ifdef _WIN32 g_available_video_backends.push_back(std::make_unique()); + g_available_video_backends.push_back(std::make_unique()); #endif g_available_video_backends.push_back(std::make_unique()); g_available_video_backends.push_back(std::make_unique()); diff --git a/Source/UnitTests/UnitTests.vcxproj b/Source/UnitTests/UnitTests.vcxproj index 7c013e3007..016d1dda62 100644 --- a/Source/UnitTests/UnitTests.vcxproj +++ b/Source/UnitTests/UnitTests.vcxproj @@ -84,6 +84,9 @@ {604c8368-f34a-4d55-82c8-cc92a0c13254} + + {570215b7-e32f-4438-95ae-c8d955f9fca3} + diff --git a/Source/dolphin-emu.sln b/Source/dolphin-emu.sln index 0e4ef7ca99..f855ef883d 100644 --- a/Source/dolphin-emu.sln +++ b/Source/dolphin-emu.sln @@ -93,6 +93,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "imgui", "..\Externals\imgui EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UpdaterCommon", "Core\UpdaterCommon\UpdaterCommon.vcxproj", "{B001D13E-7EAB-4689-842D-801E5ACFFAC5}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "D3D12", "Core\VideoBackends\D3D12\D3D12.vcxproj", "{570215B7-E32F-4438-95AE-C8D955F9FCA3}" +EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "D3DCommon", "Core\VideoBackends\D3DCommon\D3DCommon.vcxproj", "{DEA96CF2-F237-4A1A-B32F-C916769EFB50}" EndProject Global @@ -355,6 +357,12 @@ Global {B001D13E-7EAB-4689-842D-801E5ACFFAC5}.Release|x64.ActiveCfg = Release|x64 {B001D13E-7EAB-4689-842D-801E5ACFFAC5}.Release|x64.Build.0 = Release|x64 {B001D13E-7EAB-4689-842D-801E5ACFFAC5}.Release|x86.ActiveCfg = Release|x64 + {570215B7-E32F-4438-95AE-C8D955F9FCA3}.Debug|x64.ActiveCfg = Debug|x64 + {570215B7-E32F-4438-95AE-C8D955F9FCA3}.Debug|x64.Build.0 = Debug|x64 + {570215B7-E32F-4438-95AE-C8D955F9FCA3}.Debug|x86.ActiveCfg = Debug|x64 + {570215B7-E32F-4438-95AE-C8D955F9FCA3}.Release|x64.ActiveCfg = Release|x64 + {570215B7-E32F-4438-95AE-C8D955F9FCA3}.Release|x64.Build.0 = Release|x64 + {570215B7-E32F-4438-95AE-C8D955F9FCA3}.Release|x86.ActiveCfg = Release|x64 {DEA96CF2-F237-4A1A-B32F-C916769EFB50}.Debug|x64.ActiveCfg = Debug|x64 {DEA96CF2-F237-4A1A-B32F-C916769EFB50}.Debug|x64.Build.0 = Debug|x64 {DEA96CF2-F237-4A1A-B32F-C916769EFB50}.Debug|x86.ActiveCfg = Debug|x64 @@ -402,6 +410,7 @@ Global {4482FD2A-EC43-3FFB-AC20-2E5C54B05EAD} = {87ADDFF9-5768-4DA2-A33B-2477593D6677} {23114507-079A-4418-9707-CFA81A03CA99} = {87ADDFF9-5768-4DA2-A33B-2477593D6677} {4C3B2264-EA73-4A7B-9CFE-65B0FD635EBB} = {87ADDFF9-5768-4DA2-A33B-2477593D6677} + {570215B7-E32F-4438-95AE-C8D955F9FCA3} = {AAD1BCD6-9804-44A5-A5FC-4782EA00E9D4} {DEA96CF2-F237-4A1A-B32F-C916769EFB50} = {AAD1BCD6-9804-44A5-A5FC-4782EA00E9D4} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution