diff --git a/Source/Core/VideoBackends/D3D12/BoundingBox.cpp b/Source/Core/VideoBackends/D3D12/BoundingBox.cpp index d847924cf2..b9ee9b4929 100644 --- a/Source/Core/VideoBackends/D3D12/BoundingBox.cpp +++ b/Source/Core/VideoBackends/D3D12/BoundingBox.cpp @@ -2,43 +2,151 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include + #include "Common/CommonTypes.h" #include "Common/MsgHandler.h" #include "VideoBackends/D3D12/BoundingBox.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h" +#include "VideoBackends/D3D12/D3DStreamBuffer.h" +#include "VideoBackends/D3D12/D3DUtil.h" +#include "VideoBackends/D3D12/FramebufferManager.h" +#include "VideoBackends/D3D12/Render.h" #include "VideoCommon/VideoConfig.h" -// D3D12TODO: Support bounding box behavior. namespace DX12 { -ID3D11UnorderedAccessView* BBox::GetUAV() -{ - // D3D12TODO: Implement this; - return nullptr; -} +constexpr size_t BBOX_BUFFER_SIZE = sizeof(int) * 4; +constexpr size_t BBOX_STREAM_BUFFER_SIZE = BBOX_BUFFER_SIZE * 128; + +static ID3D12Resource* s_bbox_buffer; +static ID3D12Resource* s_bbox_staging_buffer; +static void* s_bbox_staging_buffer_map; +static std::unique_ptr s_bbox_stream_buffer; +static D3D12_GPU_DESCRIPTOR_HANDLE s_bbox_descriptor_handle; void BBox::Init() { - if (g_ActiveConfig.backend_info.bSupportsBBox) - { - // D3D12TODO: Implement this; - } + CD3DX12_RESOURCE_DESC buffer_desc(CD3DX12_RESOURCE_DESC::Buffer(BBOX_BUFFER_SIZE, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, 0)); + CD3DX12_RESOURCE_DESC staging_buffer_desc(CD3DX12_RESOURCE_DESC::Buffer(BBOX_BUFFER_SIZE, D3D12_RESOURCE_FLAG_NONE, 0)); + + CheckHR(D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + &buffer_desc, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + nullptr, + IID_PPV_ARGS(&s_bbox_buffer))); + + CheckHR(D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), + D3D12_HEAP_FLAG_NONE, + &staging_buffer_desc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&s_bbox_staging_buffer))); + + s_bbox_stream_buffer = std::make_unique(BBOX_STREAM_BUFFER_SIZE, BBOX_STREAM_BUFFER_SIZE, nullptr); + + // D3D12 root signature UAV must be raw or structured buffers, not typed. Since we used a typed buffer, + // we have to use a descriptor table. Luckily, we only have to allocate this once, and it never changes. + D3D12_CPU_DESCRIPTOR_HANDLE cpu_descriptor_handle; + if (!D3D::gpu_descriptor_heap_mgr->Allocate(&cpu_descriptor_handle, &s_bbox_descriptor_handle, nullptr, false)) + PanicAlert("Failed to create bounding box UAV descriptor"); + + D3D12_UNORDERED_ACCESS_VIEW_DESC view_desc = { DXGI_FORMAT_R32_SINT, D3D12_UAV_DIMENSION_BUFFER }; + view_desc.Buffer.FirstElement = 0; + view_desc.Buffer.NumElements = 4; + view_desc.Buffer.StructureByteStride = 0; + view_desc.Buffer.CounterOffsetInBytes = 0; + view_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + D3D::device12->CreateUnorderedAccessView(s_bbox_buffer, nullptr, &view_desc, cpu_descriptor_handle); + + Bind(); +} + +void BBox::Bind() +{ + D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_UAV, s_bbox_descriptor_handle); +} + +void BBox::Invalidate() +{ + if (!s_bbox_staging_buffer_map) + return; + + D3D12_RANGE write_range = {}; + s_bbox_staging_buffer->Unmap(0, &write_range); + s_bbox_staging_buffer_map = nullptr; } void BBox::Shutdown() { - // D3D12TODO: Implement this; + Invalidate(); + + if (s_bbox_buffer) + { + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_bbox_buffer); + s_bbox_buffer = nullptr; + } + + if (s_bbox_staging_buffer) + { + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_bbox_staging_buffer); + s_bbox_staging_buffer = nullptr; + } + + s_bbox_stream_buffer.reset(); } void BBox::Set(int index, int value) { - // D3D12TODO: Implement this; + // If the buffer is currently mapped, compare the value, and update the staging buffer. + if (s_bbox_staging_buffer_map) + { + int current_value; + memcpy(¤t_value, reinterpret_cast(s_bbox_staging_buffer_map) + (index * sizeof(int)), sizeof(int)); + if (current_value == value) + { + // Value hasn't changed. So skip updating completely. + return; + } + + memcpy(reinterpret_cast(s_bbox_staging_buffer_map) + (index * sizeof(int)), &value, sizeof(int)); + } + + s_bbox_stream_buffer->AllocateSpaceInBuffer(sizeof(int), sizeof(int)); + + // Allocate temporary bytes in upload buffer, then copy to real buffer. + memcpy(s_bbox_stream_buffer->GetCPUAddressOfCurrentAllocation(), &value, sizeof(int)); + D3D::ResourceBarrier(D3D::current_command_list, s_bbox_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST, 0); + D3D::current_command_list->CopyBufferRegion(s_bbox_buffer, index * sizeof(int), s_bbox_stream_buffer->GetBuffer(), s_bbox_stream_buffer->GetOffsetOfCurrentAllocation(), sizeof(int)); + D3D::ResourceBarrier(D3D::current_command_list, s_bbox_buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0); } int BBox::Get(int index) { - // D3D12TODO: Implement this; - return 0; + if (!s_bbox_staging_buffer_map) + { + D3D::command_list_mgr->CPUAccessNotify(); + + // Copy from real buffer to staging buffer, then block until we have the results. + D3D::ResourceBarrier(D3D::current_command_list, s_bbox_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE, 0); + D3D::current_command_list->CopyBufferRegion(s_bbox_staging_buffer, 0, s_bbox_buffer, 0, BBOX_BUFFER_SIZE); + D3D::ResourceBarrier(D3D::current_command_list, s_bbox_buffer, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0); + + D3D::command_list_mgr->ExecuteQueuedWork(true); + + D3D12_RANGE read_range = { 0, BBOX_BUFFER_SIZE }; + CheckHR(s_bbox_staging_buffer->Map(0, &read_range, &s_bbox_staging_buffer_map)); + } + + int value; + memcpy(&value, &reinterpret_cast(s_bbox_staging_buffer_map)[index], sizeof(int)); + return value; } }; diff --git a/Source/Core/VideoBackends/D3D12/BoundingBox.h b/Source/Core/VideoBackends/D3D12/BoundingBox.h index 05126810db..5a762e7b59 100644 --- a/Source/Core/VideoBackends/D3D12/BoundingBox.h +++ b/Source/Core/VideoBackends/D3D12/BoundingBox.h @@ -11,8 +11,9 @@ namespace DX12 class BBox { public: - static ID3D11UnorderedAccessView* GetUAV(); static void Init(); + static void Bind(); + static void Invalidate(); static void Shutdown(); static void Set(int index, int value); diff --git a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj index d0f3787378..ff026f977c 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj +++ b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj @@ -67,7 +67,6 @@ - @@ -91,7 +90,6 @@ - diff --git a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters index 83038e5a7b..f9b7d8e624 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters +++ b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters @@ -39,9 +39,6 @@ Render - - Render - Render @@ -105,9 +102,6 @@ Render - - Render - Render diff --git a/Source/Core/VideoBackends/D3D12/D3DBase.cpp b/Source/Core/VideoBackends/D3D12/D3DBase.cpp index 637b11b059..dc32f782e9 100644 --- a/Source/Core/VideoBackends/D3D12/D3DBase.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DBase.cpp @@ -14,6 +14,7 @@ #include "VideoBackends/D3D12/D3DDescriptorHeapManager.h" #include "VideoBackends/D3D12/D3DState.h" #include "VideoBackends/D3D12/D3DTexture.h" +#include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/VideoConfig.h" static const unsigned int SWAP_CHAIN_BUFFER_COUNT = 4; @@ -72,18 +73,12 @@ static LARGE_INTEGER s_qpc_frequency; static ID3D12DebugDevice* s_debug_device12 = nullptr; -static D3D_FEATURE_LEVEL s_feat_level; static D3DTexture2D* s_backbuf[SWAP_CHAIN_BUFFER_COUNT]; static unsigned int s_current_back_buf = 0; static unsigned int s_xres = 0; static unsigned int s_yres = 0; static bool s_frame_in_progress = false; -static std::vector s_aa_modes; // supported AA modes of the current adapter -static const D3D_FEATURE_LEVEL s_supported_feature_levels[] = { - D3D_FEATURE_LEVEL_11_0 -}; - HRESULT LoadDXGI() { if (s_dxgi_dll_ref++ > 0) @@ -233,108 +228,29 @@ void UnloadD3DCompiler() d3d_reflect = nullptr; } -bool AlertUserIfSelectedAdapterDoesNotSupportD3D12() -{ - HRESULT hr = LoadDXGI(); - if (SUCCEEDED(hr)) - { - hr = LoadD3D(); - } - - if (FAILED(hr)) - { - // LoadDXGI / LoadD3D display a specific error message, - // no need to do that here. - return false; - } - - IDXGIFactory* factory = nullptr; - IDXGIAdapter* adapter = nullptr; - ID3D12Device* device = nullptr; - - if (SUCCEEDED(hr)) - { - hr = create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory); - } - - if (SUCCEEDED(hr)) - { - hr = factory->EnumAdapters(g_ActiveConfig.iAdapter, &adapter); - } - - if (SUCCEEDED(hr)) - { - hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device)); - - SAFE_RELEASE(device); - SAFE_RELEASE(adapter); - SAFE_RELEASE(factory); - - if (FAILED(hr)) - { - UnloadD3D(); - UnloadDXGI(); - MessageBoxA(nullptr, "Failed to create a D3D12 device on the selected adapter.\n\nPlease make sure it supports Direct3D 12, and that your graphics drivers are up-to-date.", "Critical error", MB_OK | MB_ICONERROR); - return false; - } - - // If succeeded, leave DXGI and D3D libraries loaded since we'll use them in Create(). - return true; - } - - // DXGI failed to create factory/enumerate adapter. This should be very uncommon. - MessageBoxA(nullptr, "Failed to create enumerate selected adapter. Please select a different graphics adapter.", "Critical error", MB_OK | MB_ICONERROR); - SAFE_RELEASE(adapter); - SAFE_RELEASE(factory); - - UnloadD3D(); - UnloadDXGI(); - return false; -} - -std::vector EnumAAModes(IDXGIAdapter* adapter) +std::vector EnumAAModes(ID3D12Device* device) { std::vector aa_modes; - bool d3d12_supported = AlertUserIfSelectedAdapterDoesNotSupportD3D12(); - - if (!d3d12_supported) - return aa_modes; - - ID3D12Device* device12 = nullptr; - d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); - - if (device12) + for (int samples = 0; samples < D3D12_MAX_MULTISAMPLE_SAMPLE_COUNT; ++samples) { - for (int samples = 0; samples < D3D12_MAX_MULTISAMPLE_SAMPLE_COUNT; ++samples) - { - D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS multisample_quality_levels = {}; - multisample_quality_levels.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - multisample_quality_levels.SampleCount = samples; + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS multisample_quality_levels = {}; + multisample_quality_levels.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + multisample_quality_levels.SampleCount = samples; - device12->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &multisample_quality_levels, sizeof(multisample_quality_levels)); + device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &multisample_quality_levels, sizeof(multisample_quality_levels)); - DXGI_SAMPLE_DESC desc; - desc.Count = samples; - desc.Quality = 0; + DXGI_SAMPLE_DESC desc; + desc.Count = samples; + desc.Quality = 0; - if (multisample_quality_levels.NumQualityLevels > 0) - { - aa_modes.push_back(desc); - } - } - - device12->Release(); + if (multisample_quality_levels.NumQualityLevels > 0) + aa_modes.push_back(desc); } return aa_modes; } -D3D_FEATURE_LEVEL GetFeatureLevel(IDXGIAdapter* adapter) -{ - return D3D_FEATURE_LEVEL_11_0; -} - HRESULT Create(HWND wnd) { hWnd = wnd; @@ -346,17 +262,21 @@ HRESULT Create(HWND wnd) s_yres = client.bottom - client.top; hr = LoadDXGI(); - if (SUCCEEDED(hr)) - hr = LoadD3D(); - - if (SUCCEEDED(hr)) - hr = LoadD3DCompiler(); + if (FAILED(hr)) + return hr; + hr = LoadD3D(); if (FAILED(hr)) { UnloadDXGI(); + return hr; + } + + hr = LoadD3DCompiler(); + if (FAILED(hr)) + { UnloadD3D(); - UnloadD3DCompiler(); + UnloadDXGI(); return hr; } @@ -364,7 +284,13 @@ HRESULT Create(HWND wnd) IDXGIAdapter* adapter; hr = create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory); if (FAILED(hr)) + { MessageBox(wnd, _T("Failed to create IDXGIFactory object"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); + UnloadD3DCompiler(); + UnloadD3D(); + UnloadDXGI(); + return hr; + } hr = factory->EnumAdapters(g_ActiveConfig.iAdapter, &adapter); if (FAILED(hr)) @@ -372,20 +298,13 @@ HRESULT Create(HWND wnd) // try using the first one hr = factory->EnumAdapters(0, &adapter); if (FAILED(hr)) + { MessageBox(wnd, _T("Failed to enumerate adapters"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); - } - - // get supported AA modes - s_aa_modes = EnumAAModes(adapter); - - if (std::find_if( - s_aa_modes.begin(), - s_aa_modes.end(), - [](const DXGI_SAMPLE_DESC& desc) {return desc.Count == g_Config.iMultisamples; } - ) == s_aa_modes.end()) - { - g_Config.iMultisamples = 1; - UpdateActiveConfig(); + UnloadD3DCompiler(); + UnloadD3D(); + UnloadDXGI(); + return hr; + } } DXGI_SWAP_CHAIN_DESC swap_chain_desc = {}; @@ -405,74 +324,70 @@ HRESULT Create(HWND wnd) #if defined(_DEBUG) || defined(DEBUGFAST) || defined(USE_D3D12_DEBUG_LAYER) // Enabling the debug layer will fail if the Graphics Tools feature is not installed. + ID3D12Debug* debug_controller; + hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller)); if (SUCCEEDED(hr)) { - ID3D12Debug* debug_controller; - hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller)); - if (SUCCEEDED(hr)) - { - debug_controller->EnableDebugLayer(); - debug_controller->Release(); - } - else - { - MessageBox(wnd, _T("WARNING: Failed to enable D3D12 debug layer, please ensure the Graphics Tools feature is installed."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); - } + debug_controller->EnableDebugLayer(); + debug_controller->Release(); + } + else + { + MessageBox(wnd, _T("WARNING: Failed to enable D3D12 debug layer, please ensure the Graphics Tools feature is installed."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); } #endif - if (SUCCEEDED(hr)) - { - hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); - s_feat_level = D3D_FEATURE_LEVEL_11_0; - } - - if (SUCCEEDED(hr)) - { - D3D12_COMMAND_QUEUE_DESC command_queue_desc = { - D3D12_COMMAND_LIST_TYPE_DIRECT, // D3D12_COMMAND_LIST_TYPE Type; - 0, // INT Priority; - D3D12_COMMAND_QUEUE_FLAG_NONE, // D3D12_COMMAND_QUEUE_FLAG Flags; - 0 // UINT NodeMask; - }; - - CheckHR(device12->CreateCommandQueue(&command_queue_desc, IID_PPV_ARGS(&command_queue))); - - IDXGIFactory* factory = nullptr; - adapter->GetParent(IID_PPV_ARGS(&factory)); - - CheckHR(factory->CreateSwapChain(command_queue, &swap_chain_desc, &s_swap_chain)); - - s_current_back_buf = 0; - - factory->Release(); - } - - if (SUCCEEDED(hr)) - { - // Query the monitor refresh rate, to ensure proper Present throttling behavior. - DEVMODE dev_mode; - memset(&dev_mode, 0, sizeof(DEVMODE)); - dev_mode.dmSize = sizeof(DEVMODE); - dev_mode.dmDriverExtra = 0; - - if (EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dev_mode) == 0) - { - // If EnumDisplaySettings fails, assume monitor refresh rate of 60 Hz. - s_monitor_refresh_rate = 60; - } - else - { - s_monitor_refresh_rate = dev_mode.dmDisplayFrequency; - } - } - + hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); if (FAILED(hr)) { MessageBox(wnd, _T("Failed to initialize Direct3D.\nMake sure your video card supports Direct3D 12 and your drivers are up-to-date."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); - SAFE_RELEASE(s_swap_chain); - return E_FAIL; + adapter->Release(); + UnloadD3DCompiler(); + UnloadD3D(); + UnloadDXGI(); + return hr; + } + + // Ensure that the chosen AA mode is supported by the device. + std::vector aa_modes = EnumAAModes(device12); + if (std::find_if( + aa_modes.begin(), + aa_modes.end(), + [](const DXGI_SAMPLE_DESC& desc) {return desc.Count == g_Config.iMultisamples; } + ) == aa_modes.end()) + { + g_Config.iMultisamples = 1; + UpdateActiveConfig(); + } + + D3D12_COMMAND_QUEUE_DESC command_queue_desc = { + D3D12_COMMAND_LIST_TYPE_DIRECT, // D3D12_COMMAND_LIST_TYPE Type; + 0, // INT Priority; + D3D12_COMMAND_QUEUE_FLAG_NONE, // D3D12_COMMAND_QUEUE_FLAG Flags; + 0 // UINT NodeMask; + }; + + CheckHR(device12->CreateCommandQueue(&command_queue_desc, IID_PPV_ARGS(&command_queue))); + + CheckHR(factory->CreateSwapChain(command_queue, &swap_chain_desc, &s_swap_chain)); + + s_current_back_buf = 0; + + // Query the monitor refresh rate, to ensure proper Present throttling behavior. + DEVMODE dev_mode; + memset(&dev_mode, 0, sizeof(DEVMODE)); + dev_mode.dmSize = sizeof(DEVMODE); + dev_mode.dmDriverExtra = 0; + + if (EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dev_mode) == 0) + { + // If EnumDisplaySettings fails, assume monitor refresh rate of 60 Hz. + s_monitor_refresh_rate = 60; + } + else + { + s_monitor_refresh_rate = dev_mode.dmDisplayFrequency; } ID3D12InfoQueue* info_queue = nullptr; @@ -485,8 +400,7 @@ HRESULT Create(HWND wnd) D3D12_MESSAGE_ID id_list[] = { D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_DEPTHSTENCILVIEW_NOT_SET, // Benign. D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, // Benign. - D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, // Benign. - D3D12_MESSAGE_ID_MAP_INVALID_NULLRANGE, // Benign. + D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH // Benign. }; filter.DenyList.NumIDs = ARRAYSIZE(id_list); filter.DenyList.pIDList = id_list; @@ -505,9 +419,6 @@ HRESULT Create(HWND wnd) if (FAILED(hr)) MessageBox(wnd, _T("Failed to associate the window"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); - SAFE_RELEASE(factory); - SAFE_RELEASE(adapter) - CreateDescriptorHeaps(); CreateRootSignatures(); @@ -528,7 +439,7 @@ HRESULT Create(HWND wnd) CHECK(SUCCEEDED(hr), "Retrieve back buffer texture"); s_backbuf[i] = new D3DTexture2D(buf12, - D3D11_BIND_RENDER_TARGET, + TEXTURE_BIND_FLAG_RENDER_TARGET, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, @@ -545,6 +456,14 @@ HRESULT Create(HWND wnd) QueryPerformanceFrequency(&s_qpc_frequency); + // Render the device name. + DXGI_ADAPTER_DESC adapter_desc; + CheckHR(adapter->GetDesc(&adapter_desc)); + OSD::AddMessage(StringFromFormat("Using D3D Adapter: %s.", UTF16ToUTF8(adapter_desc.Description).c_str())); + + SAFE_RELEASE(factory); + SAFE_RELEASE(adapter); + return S_OK; } @@ -637,7 +556,15 @@ void CreateRootSignatures() D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND // UINT OffsetInDescriptorsFromTableStart; }; - D3D12_ROOT_PARAMETER root_parameters[6]; + D3D12_DESCRIPTOR_RANGE desc_range_uav = { + D3D12_DESCRIPTOR_RANGE_TYPE_UAV, // D3D12_DESCRIPTOR_RANGE_TYPE RangeType; + 1, // UINT NumDescriptors; + 2, // UINT BaseShaderRegister; + 0, // UINT RegisterSpace; + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND // UINT OffsetInDescriptorsFromTableStart; + }; + + D3D12_ROOT_PARAMETER root_parameters[NUM_GRAPHICS_ROOT_PARAMETERS]; root_parameters[DESCRIPTOR_TABLE_PS_SRV].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; root_parameters[DESCRIPTOR_TABLE_PS_SRV].DescriptorTable.NumDescriptorRanges = 1; @@ -669,7 +596,10 @@ void CreateRootSignatures() root_parameters[DESCRIPTOR_TABLE_PS_CBVTWO].Descriptor.ShaderRegister = 1; root_parameters[DESCRIPTOR_TABLE_PS_CBVTWO].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - // D3D12TODO: Add bounding box UAV to root signature. + root_parameters[DESCRIPTOR_TABLE_PS_UAV].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_parameters[DESCRIPTOR_TABLE_PS_UAV].DescriptorTable.NumDescriptorRanges = 1; + root_parameters[DESCRIPTOR_TABLE_PS_UAV].DescriptorTable.pDescriptorRanges = &desc_range_uav; + root_parameters[DESCRIPTOR_TABLE_PS_UAV].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; D3D12_ROOT_SIGNATURE_DESC root_signature_desc = {}; root_signature_desc.pParameters = root_parameters; @@ -746,9 +676,9 @@ void Close() current_command_list = nullptr; // unload DLLs - UnloadDXGI(); UnloadD3DCompiler(); UnloadD3D(); + UnloadDXGI(); } const std::string VertexShaderVersionString() @@ -784,7 +714,7 @@ unsigned int GetBackBufferHeight() // Returns the maximum width/height of a texture. unsigned int GetMaxTextureSize() { - return D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; + return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; } void Reset() @@ -819,7 +749,7 @@ void Reset() CHECK(SUCCEEDED(hr), "Retrieve back buffer texture"); s_backbuf[i] = new D3DTexture2D(buf12, - D3D11_BIND_RENDER_TARGET, + TEXTURE_BIND_FLAG_RENDER_TARGET, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, diff --git a/Source/Core/VideoBackends/D3D12/D3DBase.h b/Source/Core/VideoBackends/D3D12/D3DBase.h index 21cd9650ea..4016a039b1 100644 --- a/Source/Core/VideoBackends/D3D12/D3DBase.h +++ b/Source/Core/VideoBackends/D3D12/D3DBase.h @@ -9,7 +9,6 @@ #pragma once -#include #include #include #include @@ -44,17 +43,21 @@ class D3DCommandListManager; class D3DDescriptorHeapManager; class D3DTexture2D; +enum GRAPHICS_ROOT_PARAMETER : u32 +{ + DESCRIPTOR_TABLE_PS_SRV, + DESCRIPTOR_TABLE_PS_SAMPLER, + DESCRIPTOR_TABLE_GS_CBV, + DESCRIPTOR_TABLE_VS_CBV, + DESCRIPTOR_TABLE_PS_CBVONE, + DESCRIPTOR_TABLE_PS_CBVTWO, + DESCRIPTOR_TABLE_PS_UAV, + NUM_GRAPHICS_ROOT_PARAMETERS +}; + namespace D3D { -#define DESCRIPTOR_TABLE_PS_SRV 0 -#define DESCRIPTOR_TABLE_PS_SAMPLER 1 -#define DESCRIPTOR_TABLE_GS_CBV 2 -#define DESCRIPTOR_TABLE_VS_CBV 3 -// #define DESCRIPTOR_TABLE_PS_UAV 4 -#define DESCRIPTOR_TABLE_PS_CBVONE 4 -#define DESCRIPTOR_TABLE_PS_CBVTWO 5 - HRESULT LoadDXGI(); HRESULT LoadD3D(); HRESULT LoadD3DCompiler(); @@ -62,10 +65,7 @@ void UnloadDXGI(); void UnloadD3D(); void UnloadD3DCompiler(); -D3D_FEATURE_LEVEL GetFeatureLevel(IDXGIAdapter* adapter); -std::vector EnumAAModes(IDXGIAdapter* adapter); - -bool AlertUserIfSelectedAdapterDoesNotSupportD3D12(); +std::vector EnumAAModes(ID3D12Device* device); HRESULT Create(HWND wnd); @@ -146,6 +146,8 @@ using CREATEDXGIFACTORY = HRESULT(WINAPI*)(REFIID, void**); extern CREATEDXGIFACTORY create_dxgi_factory; using D3D12CREATEDEVICE = HRESULT(WINAPI*)(IUnknown*, D3D_FEATURE_LEVEL, REFIID, void**); +extern D3D12CREATEDEVICE d3d12_create_device; + using D3D12SERIALIZEROOTSIGNATURE = HRESULT(WINAPI*)(const D3D12_ROOT_SIGNATURE_DESC* pRootSignature, D3D_ROOT_SIGNATURE_VERSION Version, ID3DBlob** ppBlob, ID3DBlob** ppErrorBlob); using D3D12GETDEBUGINTERFACE = HRESULT(WINAPI*)(REFIID riid, void** ppvDebug); diff --git a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h index f404a8a0ac..2629cd2333 100644 --- a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h +++ b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h @@ -90,8 +90,6 @@ private: ID3D12GraphicsCommandList* m_backing_command_list; ID3D12QueuedCommandList* m_queued_command_list; - ID3D12RootSignature* m_default_root_signature; - UINT m_current_deferred_destruction_list; std::array, 2> m_deferred_destruction_lists; std::array m_deferred_destruction_list_fences; diff --git a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp index c7b5f659cb..8a84a0c42b 100644 --- a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp @@ -274,6 +274,45 @@ void ID3D12QueuedCommandList::BackgroundThreadFunction(ID3D12QueuedCommandList* break; } + case D3DQueueItemType::BeginQuery: + { + command_list->BeginQuery( + reinterpret_cast(item)->BeginQuery.pQueryHeap, + reinterpret_cast(item)->BeginQuery.Type, + reinterpret_cast(item)->BeginQuery.Index + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::EndQuery: + { + command_list->EndQuery( + reinterpret_cast(item)->EndQuery.pQueryHeap, + reinterpret_cast(item)->EndQuery.Type, + reinterpret_cast(item)->EndQuery.Index + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::ResolveQueryData: + { + command_list->ResolveQueryData( + reinterpret_cast(item)->ResolveQueryData.pQueryHeap, + reinterpret_cast(item)->ResolveQueryData.Type, + reinterpret_cast(item)->ResolveQueryData.StartElement, + reinterpret_cast(item)->ResolveQueryData.ElementCount, + reinterpret_cast(item)->ResolveQueryData.pDestinationBuffer, + reinterpret_cast(item)->ResolveQueryData.AlignedDestinationBufferOffset + ); + + item += BufferOffsetForQueueItemType(); + break; + } + case D3DQueueItemType::CloseCommandList: { CheckHR(command_list->Close()); @@ -804,7 +843,7 @@ void STDMETHODCALLTYPE ID3D12QueuedCommandList::ResolveSubresource( } void STDMETHODCALLTYPE ID3D12QueuedCommandList::IASetPrimitiveTopology( - _In_ D3D11_PRIMITIVE_TOPOLOGY PrimitiveTopology + _In_ D3D12_PRIMITIVE_TOPOLOGY PrimitiveTopology ) { // No ignored parameters, no assumptions to DEBUGCHECK. @@ -818,7 +857,7 @@ void STDMETHODCALLTYPE ID3D12QueuedCommandList::IASetPrimitiveTopology( } void STDMETHODCALLTYPE ID3D12QueuedCommandList::RSSetViewports( - _In_range_(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) UINT Count, + _In_range_(0, D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) UINT Count, _In_reads_(Count) const D3D12_VIEWPORT* pViewports ) { @@ -838,7 +877,7 @@ void STDMETHODCALLTYPE ID3D12QueuedCommandList::RSSetViewports( } void STDMETHODCALLTYPE ID3D12QueuedCommandList::RSSetScissorRects( - _In_range_(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) UINT Count, + _In_range_(0, D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) UINT Count, _In_reads_(Count) const D3D12_RECT* pRects ) { @@ -916,8 +955,14 @@ void STDMETHODCALLTYPE ID3D12QueuedCommandList::BeginQuery( _In_ UINT Index ) { - // Function not implemented yet. - DEBUGCHECK(0, "Function not implemented yet."); + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::BeginQuery; + reinterpret_cast(m_queue_array_back)->BeginQuery.pQueryHeap = pQueryHeap; + reinterpret_cast(m_queue_array_back)->BeginQuery.Type = Type; + reinterpret_cast(m_queue_array_back)->BeginQuery.Index = Index; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); } void STDMETHODCALLTYPE ID3D12QueuedCommandList::EndQuery( @@ -926,8 +971,14 @@ void STDMETHODCALLTYPE ID3D12QueuedCommandList::EndQuery( _In_ UINT Index ) { - // Function not implemented yet. - DEBUGCHECK(0, "Function not implemented yet."); + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::EndQuery; + reinterpret_cast(m_queue_array_back)->EndQuery.pQueryHeap = pQueryHeap; + reinterpret_cast(m_queue_array_back)->EndQuery.Type = Type; + reinterpret_cast(m_queue_array_back)->EndQuery.Index = Index; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); } void STDMETHODCALLTYPE ID3D12QueuedCommandList::ResolveQueryData( @@ -939,8 +990,17 @@ void STDMETHODCALLTYPE ID3D12QueuedCommandList::ResolveQueryData( _In_ UINT64 AlignedDestinationBufferOffset ) { - // Function not implemented yet. - DEBUGCHECK(0, "Function not implemented yet."); + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::ResolveQueryData; + reinterpret_cast(m_queue_array_back)->ResolveQueryData.pQueryHeap = pQueryHeap; + reinterpret_cast(m_queue_array_back)->ResolveQueryData.Type = Type; + reinterpret_cast(m_queue_array_back)->ResolveQueryData.StartElement = StartElement; + reinterpret_cast(m_queue_array_back)->ResolveQueryData.ElementCount = ElementCount; + reinterpret_cast(m_queue_array_back)->ResolveQueryData.pDestinationBuffer = pDestinationBuffer; + reinterpret_cast(m_queue_array_back)->ResolveQueryData.AlignedDestinationBufferOffset = AlignedDestinationBufferOffset; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); } void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetPredication( @@ -1199,7 +1259,7 @@ void STDMETHODCALLTYPE ID3D12QueuedCommandList::ClearDepthStencilView( _In_reads_opt_(NumRects) const D3D12_RECT* pRect ) { - DEBUGCHECK(ClearFlags == D3D11_CLEAR_DEPTH, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(ClearFlags == D3D12_CLEAR_FLAG_DEPTH, "Error: Invalid assumption in ID3D12QueuedCommandList."); DEBUGCHECK(Depth == 0.0f, "Error: Invalid assumption in ID3D12QueuedCommandList."); DEBUGCHECK(Stencil == 0, "Error: Invalid assumption in ID3D12QueuedCommandList."); DEBUGCHECK(pRect == nullptr, "Error: Invalid assumption in ID3D12QueuedCommandList."); diff --git a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h index 2933b2fc72..2c9d80febe 100644 --- a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h +++ b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h @@ -35,6 +35,9 @@ enum D3DQueueItemType SetDescriptorHeaps, ResourceBarrier, ResolveSubresource, + BeginQuery, + EndQuery, + ResolveQueryData, ExecuteCommandList, CloseCommandList, Present, @@ -170,6 +173,30 @@ struct ResolveSubresourceArguments DXGI_FORMAT Format; }; +struct BeginQueryArguments +{ + ID3D12QueryHeap* pQueryHeap; + D3D12_QUERY_TYPE Type; + UINT Index; +}; + +struct EndQueryArguments +{ + ID3D12QueryHeap* pQueryHeap; + D3D12_QUERY_TYPE Type; + UINT Index; +}; + +struct ResolveQueryDataArguments +{ + ID3D12QueryHeap* pQueryHeap; + D3D12_QUERY_TYPE Type; + UINT StartElement; + UINT ElementCount; + ID3D12Resource* pDestinationBuffer; + UINT64 AlignedDestinationBufferOffset; +}; + struct CloseCommandListArguments { }; @@ -239,6 +266,9 @@ struct D3DQueueItem SetDescriptorHeapsArguments SetDescriptorHeaps; ResourceBarrierArguments ResourceBarrier; ResolveSubresourceArguments ResolveSubresource; + BeginQueryArguments BeginQuery; + EndQueryArguments EndQuery; + ResolveQueryDataArguments ResolveQueryData; CloseCommandListArguments CloseCommandList; ExecuteCommandListArguments ExecuteCommandList; PresentArguments Present; diff --git a/Source/Core/VideoBackends/D3D12/D3DShader.cpp b/Source/Core/VideoBackends/D3D12/D3DShader.cpp index 1b0eae1681..d99a02c5ac 100644 --- a/Source/Core/VideoBackends/D3D12/D3DShader.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DShader.cpp @@ -19,7 +19,7 @@ namespace DX12 namespace D3D { -bool CompileShader(const std::string& code, ID3DBlob** blob, const D3D_SHADER_MACRO* defines, std::string shader_version_string) +bool CompileShader(const std::string& code, ID3DBlob** blob, const D3D_SHADER_MACRO* defines, const std::string& shader_version_string) { ID3D10Blob* shader_buffer = nullptr; ID3D10Blob* error_buffer = nullptr; @@ -34,21 +34,26 @@ bool CompileShader(const std::string& code, ID3DBlob** blob, const D3D_SHADER_MA if (error_buffer) { - INFO_LOG(VIDEO, "Shader compiler messages:\n%s\n", + WARN_LOG(VIDEO, "Warning generated when compiling %s shader:\n%s\n", + shader_version_string.c_str(), static_cast(error_buffer->GetBufferPointer())); } if (FAILED(hr)) { static int num_failures = 0; - std::string filename = StringFromFormat("%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), shader_version_string, num_failures++); + std::string filename = StringFromFormat("%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), shader_version_string.c_str(), num_failures++); std::ofstream file; OpenFStream(file, filename, std::ios_base::out); file << code; + file << std::endl << "Errors:" << std::endl; + file << static_cast(error_buffer->GetBufferPointer()); file.close(); PanicAlert("Failed to compile shader: %s\nDebug info (%s):\n%s", - filename.c_str(), shader_version_string, static_cast(error_buffer->GetBufferPointer())); + filename.c_str(), + shader_version_string.c_str(), + static_cast(error_buffer->GetBufferPointer())); *blob = nullptr; error_buffer->Release(); diff --git a/Source/Core/VideoBackends/D3D12/D3DState.cpp b/Source/Core/VideoBackends/D3D12/D3DState.cpp index 9da11541cc..ffe1e573e5 100644 --- a/Source/Core/VideoBackends/D3D12/D3DState.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DState.cpp @@ -343,8 +343,8 @@ inline D3D12_DEPTH_STENCIL_DESC StateCache::GetDesc12(ZMode state) D3D12_DEPTH_STENCIL_DESC depthdc; depthdc.StencilEnable = FALSE; - depthdc.StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK; - depthdc.StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK; + depthdc.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + depthdc.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = { D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS }; depthdc.FrontFace = defaultStencilOp; diff --git a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp index 22c83a3654..cfb67209c8 100644 --- a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp @@ -29,7 +29,8 @@ D3DStreamBuffer::~D3DStreamBuffer() { D3D::command_list_mgr->RemoveQueueFenceCallback(this); - m_buffer->Unmap(0, nullptr); + D3D12_RANGE write_range = { 0, m_buffer_size }; + m_buffer->Unmap(0, &write_range); D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_buffer); } @@ -43,7 +44,7 @@ bool D3DStreamBuffer::AllocateSpaceInBuffer(size_t allocation_size, size_t align { CHECK(allocation_size <= m_buffer_max_size, "Error: Requested allocation size in D3DStreamBuffer is greater than max allowed size of backing buffer."); - if (alignment) + if (alignment && m_buffer_offset > 0) { size_t padding = m_buffer_offset % alignment; @@ -94,7 +95,8 @@ void D3DStreamBuffer::AllocateBuffer(size_t size) // First, put existing buffer (if it exists) in deferred destruction list. if (m_buffer) { - m_buffer->Unmap(0, nullptr); + D3D12_RANGE write_range = { 0, m_buffer_size }; + m_buffer->Unmap(0, &write_range); D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_buffer); m_buffer = nullptr; } @@ -110,7 +112,8 @@ void D3DStreamBuffer::AllocateBuffer(size_t size) ) ); - CheckHR(m_buffer->Map(0, nullptr, &m_buffer_cpu_address)); + D3D12_RANGE read_range = {}; + CheckHR(m_buffer->Map(0, &read_range, &m_buffer_cpu_address)); m_buffer_gpu_address = m_buffer->GetGPUVirtualAddress(); m_buffer_size = size; diff --git a/Source/Core/VideoBackends/D3D12/D3DTexture.cpp b/Source/Core/VideoBackends/D3D12/D3DTexture.cpp index f913715e75..9749a36a76 100644 --- a/Source/Core/VideoBackends/D3D12/D3DTexture.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DTexture.cpp @@ -21,6 +21,9 @@ namespace DX12 namespace D3D { +constexpr size_t INITIAL_TEXTURE_UPLOAD_BUFFER_SIZE = 4 * 1024 * 1024; +constexpr size_t MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024; + static std::unique_ptr s_texture_upload_stream_buffer; void CleanupPersistentD3DTextureResources() @@ -32,16 +35,35 @@ void ReplaceRGBATexture2D(ID3D12Resource* texture12, const u8* buffer, unsigned { const unsigned int upload_size = AlignValue(src_pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) * height; - if (!s_texture_upload_stream_buffer) - { - s_texture_upload_stream_buffer = std::make_unique(4 * 1024 * 1024, 64 * 1024 * 1024, nullptr); - } + ID3D12Resource* upload_buffer = nullptr; + size_t upload_buffer_offset = 0; + u8* dest_data = nullptr; - bool current_command_list_executed = s_texture_upload_stream_buffer->AllocateSpaceInBuffer(upload_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); - if (current_command_list_executed) + if (upload_size > MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE) { - g_renderer->SetViewport(); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + // If the texture is too large to fit in the upload buffer, create a temporary buffer instead. + // This will only be the case for large (e.g. 8192x8192) textures from custom texture packs. + CheckHR(D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(upload_size), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&upload_buffer))); + + D3D12_RANGE read_range = {}; + CheckHR(upload_buffer->Map(0, &read_range, reinterpret_cast(&dest_data))); + } + else + { + if (!s_texture_upload_stream_buffer) + s_texture_upload_stream_buffer = std::make_unique(INITIAL_TEXTURE_UPLOAD_BUFFER_SIZE, MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE, nullptr); + + s_texture_upload_stream_buffer->AllocateSpaceInBuffer(upload_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + + upload_buffer = s_texture_upload_stream_buffer->GetBuffer(); + upload_buffer_offset = s_texture_upload_stream_buffer->GetOffsetOfCurrentAllocation(); + dest_data = reinterpret_cast(s_texture_upload_stream_buffer->GetCPUAddressOfCurrentAllocation()); } ResourceBarrier(current_command_list, texture12, current_resource_state, D3D12_RESOURCE_STATE_COPY_DEST, level); @@ -51,9 +73,8 @@ void ReplaceRGBATexture2D(ID3D12Resource* texture12, const u8* buffer, unsigned u64 upload_row_size_in_bytes = 0; u64 upload_total_bytes = 0; - D3D::device12->GetCopyableFootprints(&texture12->GetDesc(), level, 1, s_texture_upload_stream_buffer->GetOffsetOfCurrentAllocation(), &upload_footprint, &upload_rows, &upload_row_size_in_bytes, &upload_total_bytes); + D3D::device12->GetCopyableFootprints(&texture12->GetDesc(), level, 1, upload_buffer_offset, &upload_footprint, &upload_rows, &upload_row_size_in_bytes, &upload_total_bytes); - u8* dest_data = reinterpret_cast(s_texture_upload_stream_buffer->GetCPUAddressOfCurrentAllocation()); const u8* src_data = reinterpret_cast(buffer); for (u32 y = 0; y < upload_rows; ++y) { @@ -64,14 +85,26 @@ void ReplaceRGBATexture2D(ID3D12Resource* texture12, const u8* buffer, unsigned ); } - D3D::current_command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(texture12, level), 0, 0, 0, &CD3DX12_TEXTURE_COPY_LOCATION(s_texture_upload_stream_buffer->GetBuffer(), upload_footprint), nullptr); + D3D::current_command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(texture12, level), 0, 0, 0, &CD3DX12_TEXTURE_COPY_LOCATION(upload_buffer, upload_footprint), nullptr); ResourceBarrier(D3D::current_command_list, texture12, D3D12_RESOURCE_STATE_COPY_DEST, current_resource_state, level); + + // Release temporary buffer after commands complete. + // We block here because otherwise if there was a large number of texture uploads, we may run out of memory. + if (!s_texture_upload_stream_buffer || upload_buffer != s_texture_upload_stream_buffer->GetBuffer()) + { + D3D12_RANGE write_range = { 0, upload_size }; + upload_buffer->Unmap(0, &write_range); + + D3D::command_list_mgr->ExecuteQueuedWork(true); + + upload_buffer->Release(); + } } } // namespace -D3DTexture2D* D3DTexture2D::Create(unsigned int width, unsigned int height, D3D11_BIND_FLAG bind, D3D11_USAGE usage, DXGI_FORMAT fmt, unsigned int levels, unsigned int slices, D3D12_SUBRESOURCE_DATA* data) +D3DTexture2D* D3DTexture2D::Create(unsigned int width, unsigned int height, u32 bind, DXGI_FORMAT fmt, unsigned int levels, unsigned int slices, D3D12_SUBRESOURCE_DATA* data) { ID3D12Resource* texture12 = nullptr; @@ -86,7 +119,7 @@ D3DTexture2D* D3DTexture2D::Create(unsigned int width, unsigned int height, D3D1 D3D12_CLEAR_VALUE optimized_clear_value = {}; optimized_clear_value.Format = fmt; - if (bind & D3D11_BIND_RENDER_TARGET) + if (bind & TEXTURE_BIND_FLAG_RENDER_TARGET) { texdesc12.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; optimized_clear_value.Color[0] = 0.0f; @@ -95,7 +128,7 @@ D3DTexture2D* D3DTexture2D::Create(unsigned int width, unsigned int height, D3D1 optimized_clear_value.Color[3] = 1.0f; } - if (bind & D3D11_BIND_DEPTH_STENCIL) + if (bind & TEXTURE_BIND_FLAG_DEPTH_STENCIL) { texdesc12.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; optimized_clear_value.DepthStencil.Depth = 0.0f; @@ -181,7 +214,7 @@ D3D12_CPU_DESCRIPTOR_HANDLE D3DTexture2D::GetRTV12() const return m_rtv12; } -D3DTexture2D::D3DTexture2D(ID3D12Resource* texptr, D3D11_BIND_FLAG bind, +D3DTexture2D::D3DTexture2D(ID3D12Resource* texptr, u32 bind, DXGI_FORMAT srv_format, DXGI_FORMAT dsv_format, DXGI_FORMAT rtv_format, bool multisampled, D3D12_RESOURCE_STATES resource_state) : m_tex12(texptr), m_resource_state(resource_state), m_multisampled(multisampled) { @@ -189,7 +222,7 @@ D3DTexture2D::D3DTexture2D(ID3D12Resource* texptr, D3D11_BIND_FLAG bind, D3D12_DSV_DIMENSION dsv_dim12 = multisampled ? D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY : D3D12_DSV_DIMENSION_TEXTURE2DARRAY; D3D12_RTV_DIMENSION rtv_dim12 = multisampled ? D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY : D3D12_RTV_DIMENSION_TEXTURE2DARRAY; - if (bind & D3D11_BIND_SHADER_RESOURCE) + if (bind & TEXTURE_BIND_FLAG_SHADER_RESOURCE) { D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { srv_format, // DXGI_FORMAT Format @@ -216,7 +249,7 @@ D3DTexture2D::D3DTexture2D(ID3D12Resource* texptr, D3D11_BIND_FLAG bind, D3D::device12->CreateShaderResourceView(m_tex12, &srv_desc, m_srv12_gpu_cpu_shadow); } - if (bind & D3D11_BIND_DEPTH_STENCIL) + if (bind & TEXTURE_BIND_FLAG_DEPTH_STENCIL) { D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = { dsv_format, // DXGI_FORMAT Format @@ -233,7 +266,7 @@ D3DTexture2D::D3DTexture2D(ID3D12Resource* texptr, D3D11_BIND_FLAG bind, D3D::device12->CreateDepthStencilView(m_tex12, &dsv_desc, m_dsv12); } - if (bind & D3D11_BIND_RENDER_TARGET) + if (bind & TEXTURE_BIND_FLAG_RENDER_TARGET) { D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = { rtv_format, // DXGI_FORMAT Format diff --git a/Source/Core/VideoBackends/D3D12/D3DTexture.h b/Source/Core/VideoBackends/D3D12/D3DTexture.h index 08c1f38a1d..8f58d84692 100644 --- a/Source/Core/VideoBackends/D3D12/D3DTexture.h +++ b/Source/Core/VideoBackends/D3D12/D3DTexture.h @@ -5,11 +5,17 @@ #pragma once #include -#include namespace DX12 { +enum TEXTURE_BIND_FLAG : u32 +{ + TEXTURE_BIND_FLAG_SHADER_RESOURCE = (1 << 0), + TEXTURE_BIND_FLAG_RENDER_TARGET = (1 << 1), + TEXTURE_BIND_FLAG_DEPTH_STENCIL = (1 << 2) +}; + namespace D3D { void ReplaceRGBATexture2D(ID3D12Resource* pTexture, const u8* buffer, unsigned int width, unsigned int height, unsigned int src_pitch, unsigned int level, D3D12_RESOURCE_STATES current_resource_state = D3D12_RESOURCE_STATE_COMMON); @@ -24,8 +30,8 @@ public: // either create an ID3D12Resource object, pass it to the constructor and specify what views to create // or let the texture automatically be created by D3DTexture2D::Create - D3DTexture2D(ID3D12Resource* texptr, D3D11_BIND_FLAG bind, DXGI_FORMAT srv_format = DXGI_FORMAT_UNKNOWN, DXGI_FORMAT dsv_format = DXGI_FORMAT_UNKNOWN, DXGI_FORMAT rtv_format = DXGI_FORMAT_UNKNOWN, bool multisampled = false, D3D12_RESOURCE_STATES resource_state = D3D12_RESOURCE_STATE_COMMON); - static D3DTexture2D* Create(unsigned int width, unsigned int height, D3D11_BIND_FLAG bind, D3D11_USAGE usage, DXGI_FORMAT, unsigned int levels = 1, unsigned int slices = 1, D3D12_SUBRESOURCE_DATA* data = nullptr); + D3DTexture2D(ID3D12Resource* texptr, u32 bind, DXGI_FORMAT srv_format = DXGI_FORMAT_UNKNOWN, DXGI_FORMAT dsv_format = DXGI_FORMAT_UNKNOWN, DXGI_FORMAT rtv_format = DXGI_FORMAT_UNKNOWN, bool multisampled = false, D3D12_RESOURCE_STATES resource_state = D3D12_RESOURCE_STATE_COMMON); + static D3DTexture2D* Create(unsigned int width, unsigned int height, u32 bind, DXGI_FORMAT fmt, unsigned int levels = 1, unsigned int slices = 1, D3D12_SUBRESOURCE_DATA* data = nullptr); void TransitionToResourceState(ID3D12GraphicsCommandList* command_list, D3D12_RESOURCE_STATES state_after); // reference counting, use AddRef() when creating a new reference and Release() it when you don't need it anymore diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp index 360cd04df9..bf58b084b3 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp @@ -26,11 +26,6 @@ namespace DX12 namespace D3D { -unsigned int AlignValue(unsigned int value, unsigned int alignment) -{ - return (value + (alignment - 1)) & ~(alignment - 1); -} - void ResourceBarrier(ID3D12GraphicsCommandList* command_list, ID3D12Resource* resource, D3D12_RESOURCE_STATES state_before, D3D12_RESOURCE_STATES state_after, UINT subresource) { if (state_before == state_after) @@ -104,7 +99,6 @@ private: CD3DFont font; static std::unique_ptr util_vbuf_stq; -static std::unique_ptr util_vbuf_cq; static std::unique_ptr util_vbuf_clearq; static std::unique_ptr util_vbuf_efbpokequads; @@ -502,12 +496,6 @@ struct float u1, v1, u2, v2, S, G; } tex_quad_data; -struct -{ - float x1, y1, x2, y2, z; - u32 col; -} draw_quad_data; - struct { u32 col; @@ -516,13 +504,11 @@ struct // ring buffer offsets static size_t stq_offset; -static size_t cq_offset; static size_t clearq_offset; void InitUtils() { util_vbuf_stq = std::make_unique(0x10000); - util_vbuf_cq = std::make_unique(0x10000); util_vbuf_clearq = std::make_unique(0x10000); util_vbuf_efbpokequads = std::make_unique(0x100000); @@ -560,7 +546,6 @@ void InitUtils() // cached data used to avoid unnecessarily reloading the vertex buffers memset(&tex_quad_data, 0, sizeof(tex_quad_data)); - memset(&draw_quad_data, 0, sizeof(draw_quad_data)); memset(&clear_quad_data, 0, sizeof(clear_quad_data)); font.Init(); @@ -571,7 +556,6 @@ void ShutdownUtils() font.Shutdown(); util_vbuf_stq.reset(); - util_vbuf_cq.reset(); util_vbuf_clearq.reset(); util_vbuf_efbpokequads.reset(); } @@ -588,7 +572,7 @@ void SetLinearCopySampler() D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS, true); } -void SetViewportAndScissor(u32 top_left_x, u32 top_left_y, u32 width, u32 height, float min_depth, float max_depth) +void SetViewportAndScissor(int top_left_x, int top_left_y, int width, int height, float min_depth, float max_depth) { D3D12_VIEWPORT viewport = { static_cast(top_left_x), @@ -707,89 +691,6 @@ void DrawShadedTexQuad(D3DTexture2D* texture, D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); D3D::current_command_list->DrawInstanced(4, 1, static_cast(stq_offset), 0); - - g_renderer->RestoreAPIState(); -} - -// Fills a certain area of the current render target with the specified color -// destination coordinates normalized to (-1;1) -void DrawColorQuad(u32 Color, float z, float x1, float y1, float x2, float y2, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled) -{ - ColVertex coords[4] = { - { x1, y2, z, Color }, - { x2, y2, z, Color }, - { x1, y1, z, Color }, - { x2, y1, z, Color }, - }; - - if (draw_quad_data.x1 != x1 || draw_quad_data.y1 != y1 || - draw_quad_data.x2 != x2 || draw_quad_data.y2 != y2 || - draw_quad_data.col != Color || draw_quad_data.z != z) - { - cq_offset = util_vbuf_cq->AppendData(coords, sizeof(coords), sizeof(ColVertex)); - - draw_quad_data.x1 = x1; - draw_quad_data.y1 = y1; - draw_quad_data.x2 = x2; - draw_quad_data.y2 = y2; - draw_quad_data.col = Color; - draw_quad_data.z = z; - } - - D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - - D3D12_VERTEX_BUFFER_VIEW vb_view = { - util_vbuf_cq->GetBuffer12()->GetGPUVirtualAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation; - static_cast(util_vbuf_cq->GetSize()), // UINT SizeInBytes; This is the size of the entire buffer, not just the size of the vertex data for one draw call, since the offsetting is done in the draw call itself. - sizeof(ColVertex) // UINT StrideInBytes; - }; - - D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view); - D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, true); - - D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { - default_root_signature, // ID3D12RootSignature *pRootSignature; - StaticShaderCache::GetClearVertexShader(), // D3D12_SHADER_BYTECODE VS; - StaticShaderCache::GetClearPixelShader(), // D3D12_SHADER_BYTECODE PS; - {}, // D3D12_SHADER_BYTECODE DS; - {}, // D3D12_SHADER_BYTECODE HS; - StaticShaderCache::GetClearGeometryShader(), // D3D12_SHADER_BYTECODE GS; - {}, // D3D12_STREAM_OUTPUT_DESC StreamOutput - *blend_desc, // D3D12_BLEND_DESC BlendState; - UINT_MAX, // UINT SampleMask; - Renderer::GetResetRasterizerDesc(), // D3D12_RASTERIZER_DESC RasterizerState - *depth_stencil_desc, // D3D12_DEPTH_STENCIL_DESC DepthStencilState - StaticShaderCache::GetClearVertexShaderInputLayout(), // D3D12_INPUT_LAYOUT_DESC InputLayout - D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_PROPERTIES IndexBufferProperties - D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType - 1, // UINT NumRenderTargets - { DXGI_FORMAT_R8G8B8A8_UNORM }, // DXGI_FORMAT RTVFormats[8] - DXGI_FORMAT_D32_FLOAT, // DXGI_FORMAT DSVFormat - { 1 /* UINT Count */, 0 /* UINT Quality */ } // DXGI_SAMPLE_DESC SampleDesc - }; - - if (rt_multisampled) - { - pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples; - } - - ID3D12PipelineState* pso = nullptr; - CheckHR(DX12::gx_state_cache.GetPipelineStateObjectFromCache(&pso_desc, &pso)); - - D3D::current_command_list->SetPipelineState(pso); - D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); - - // In D3D11, the 'resetraststate' has ScissorEnable disabled. In D3D12, scissor testing is always enabled. - // Thus, set the scissor rect to the max texture size, then reset it to the current scissor rect to avoid - // dirtying state. - - // 2 ^ D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP = 131072 - D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072)); - - D3D::current_command_list->DrawInstanced(4, 1, static_cast(cq_offset), 0); - - g_renderer->RestoreAPIState(); } void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled) @@ -856,8 +757,6 @@ void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); D3D::current_command_list->DrawInstanced(4, 1, static_cast(clearq_offset), 0); - - g_renderer->RestoreAPIState(); } static void InitColVertex(ColVertex* vert, float x, float y, float z, u32 col) @@ -933,6 +832,7 @@ void DrawEFBPokeQuads(EFBAccessType type, // Corresponding dirty flags set outside loop. D3D::current_command_list->OMSetRenderTargets(1, render_target, FALSE, depth_buffer); D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); D3D12_VERTEX_BUFFER_VIEW vb_view = { util_vbuf_efbpokequads->GetBuffer12()->GetGPUVirtualAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation; @@ -946,9 +846,6 @@ void DrawEFBPokeQuads(EFBAccessType type, D3D::current_command_list->SetPipelineState(pso); D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); - // Disable scissor testing. - D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072)); - // generate quads for each efb point ColVertex* base_vertex_ptr = reinterpret_cast(buffer_ptr); for (size_t i = 0; i < points_to_draw; i++) diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.h b/Source/Core/VideoBackends/D3D12/D3DUtil.h index add8516be1..6f4500c71e 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.h +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include @@ -22,7 +21,11 @@ extern StateCache gx_state_cache; namespace D3D { -unsigned int AlignValue(unsigned int value, unsigned int alignment); +constexpr unsigned int AlignValue(unsigned int value, unsigned int alignment) +{ + return (value + (alignment - 1)) & ~(alignment - 1); +} + void ResourceBarrier(ID3D12GraphicsCommandList* command_list, ID3D12Resource* resource, D3D12_RESOURCE_STATES state_before, D3D12_RESOURCE_STATES state_after, UINT subresource); // Font creation flags @@ -74,7 +77,7 @@ void ShutdownUtils(); void SetPointCopySampler(); void SetLinearCopySampler(); -void SetViewportAndScissor(u32 top_left_x, u32 top_left_y, u32 width, u32 height, float min_depth = D3D12_MIN_DEPTH, float max_depth = D3D12_MAX_DEPTH); +void SetViewportAndScissor(int top_left_x, int top_left_y, int width, int height, float min_depth = D3D12_MIN_DEPTH, float max_depth = D3D12_MAX_DEPTH); void DrawShadedTexQuad(D3DTexture2D* texture, const D3D12_RECT* source, @@ -92,7 +95,6 @@ void DrawShadedTexQuad(D3DTexture2D* texture, ); void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled); -void DrawColorQuad(u32 Color, float z, float x1, float y1, float x2, float y2, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled); void DrawEFBPokeQuads(EFBAccessType type, const EfbPokeData* points, diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp index a3c8064318..f7cfb2b9a7 100644 --- a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp @@ -15,8 +15,6 @@ namespace DX12 { -static XFBEncoder s_xfbEncoder; - FramebufferManager::Efb FramebufferManager::m_efb; unsigned int FramebufferManager::m_target_width; unsigned int FramebufferManager::m_target_height; @@ -42,7 +40,12 @@ D3DTexture2D*& FramebufferManager::GetResolvedEFBColorTexture() for (int i = 0; i < m_efb.slices; i++) { - D3D::current_command_list->ResolveSubresource(m_efb.resolved_color_tex->GetTex12(), D3D11CalcSubresource(0, i, 1), m_efb.color_tex->GetTex12(), D3D11CalcSubresource(0, i, 1), DXGI_FORMAT_R8G8B8A8_UNORM); + D3D::current_command_list->ResolveSubresource( + m_efb.resolved_color_tex->GetTex12(), + D3D12CalcSubresource(0, i, 0, 1, m_efb.slices), + m_efb.color_tex->GetTex12(), + D3D12CalcSubresource(0, i, 0, 1, m_efb.slices), + DXGI_FORMAT_R8G8B8A8_UNORM); } m_efb.color_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); @@ -91,13 +94,13 @@ FramebufferManager::FramebufferManager() texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width, m_target_height, m_efb.slices, 1, sample_desc.Count, sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueRTV, IID_PPV_ARGS(&buf12)); - m_efb.color_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET), DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, (sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON); + m_efb.color_tex = new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, (sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON); SAFE_RELEASE(buf12); // Temporary EFB color texture - used in ReinterpretPixelData texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width, m_target_height, m_efb.slices, 1, sample_desc.Count, sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueRTV, IID_PPV_ARGS(&buf12))); - m_efb.color_temp_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET), DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, (sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON); + m_efb.color_temp_tex = new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, (sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON); SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.color_temp_tex->GetTex12(), "EFB color temp texture"); @@ -105,7 +108,7 @@ FramebufferManager::FramebufferManager() texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, sample_desc.Count, sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueDSV, IID_PPV_ARGS(&buf12))); - m_efb.depth_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_UNKNOWN, (sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON); + m_efb.depth_tex = new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_DEPTH_STENCIL, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_UNKNOWN, (sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON); SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.depth_tex->GetTex12(), "EFB depth texture"); @@ -115,14 +118,14 @@ FramebufferManager::FramebufferManager() texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width, m_target_height, m_efb.slices, 1); hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&buf12)); CHECK(hr == S_OK, "create EFB color resolve texture (size: %dx%d)", m_target_width, m_target_height); - m_efb.resolved_color_tex = new D3DTexture2D(buf12, D3D11_BIND_SHADER_RESOURCE, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); + m_efb.resolved_color_tex = new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_SHADER_RESOURCE, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.resolved_color_tex->GetTex12(), "EFB color resolve texture shader resource view"); texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, m_target_width, m_target_height, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&buf12)); CHECK(hr == S_OK, "create EFB depth resolve texture (size: %dx%d; hr=%#x)", m_target_width, m_target_height, hr); - m_efb.resolved_depth_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); + m_efb.resolved_depth_tex = new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.resolved_depth_tex->GetTex12(), "EFB depth resolve texture shader resource view"); } @@ -133,14 +136,10 @@ FramebufferManager::FramebufferManager() } InitializeEFBAccessCopies(); - - s_xfbEncoder.Init(); } FramebufferManager::~FramebufferManager() { - s_xfbEncoder.Shutdown(); - DestroyEFBAccessCopies(); SAFE_RELEASE(m_efb.color_tex); @@ -153,14 +152,16 @@ FramebufferManager::~FramebufferManager() void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc, float gamma) { u8* dst = Memory::GetPointer(xfbAddr); - s_xfbEncoder.Encode(dst, fbStride/2, fbHeight, sourceRc, gamma); + D3DTexture2D* src_texture = GetResolvedEFBColorTexture(); + TargetRectangle scaled_rect = g_renderer->ConvertEFBRectangle(sourceRc); + g_xfb_encoder->EncodeTextureToRam(dst, fbStride, fbHeight, src_texture, scaled_rect, m_target_width, m_target_height, gamma); } std::unique_ptr FramebufferManager::CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) { return std::make_unique(D3DTexture2D::Create(target_width, target_height, - (D3D11_BIND_FLAG)(D3D11_BIND_RENDER_TARGET|D3D11_BIND_SHADER_RESOURCE), - D3D11_USAGE_DEFAULT, DXGI_FORMAT_R8G8B8A8_UNORM, 1, layers), layers); + TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET, + DXGI_FORMAT_R8G8B8A8_UNORM, 1, layers), layers); } void FramebufferManager::GetTargetSize(unsigned int* width, unsigned int* height) @@ -197,12 +198,18 @@ void FramebufferManager::ResolveDepthTexture() FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); // Restores proper viewport/scissor settings. g_renderer->RestoreAPIState(); } +void FramebufferManager::RestoreEFBRenderTargets() +{ + D3D::current_command_list->OMSetRenderTargets(1, + &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, + &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); +} + u32 FramebufferManager::ReadEFBColorAccessCopy(u32 x, u32 y) { if (!m_efb.color_access_readback_map) @@ -255,7 +262,7 @@ void FramebufferManager::InitializeEFBAccessCopies() texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, D3D12_TEXTURE_LAYOUT_UNKNOWN, 0); hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_color_clear_value, IID_PPV_ARGS(&buf12)); CHECK(hr == S_OK, "create EFB access color resize buffer (hr=%#x)", hr); - m_efb.color_access_resize_tex = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_R8G8B8A8_UNORM); + m_efb.color_access_resize_tex = new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_RENDER_TARGET, DXGI_FORMAT_R8G8B8A8_UNORM); D3D::SetDebugObjectName12(m_efb.color_access_resize_tex->GetTex12(), "EFB access color resize buffer"); buf12->Release(); @@ -269,7 +276,7 @@ void FramebufferManager::InitializeEFBAccessCopies() texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, D3D12_TEXTURE_LAYOUT_UNKNOWN, 0); hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_depth_clear_value, IID_PPV_ARGS(&buf12)); CHECK(hr == S_OK, "create EFB access depth resize buffer (hr=%#x)", hr); - m_efb.depth_access_resize_tex = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_R32_FLOAT); + m_efb.depth_access_resize_tex = new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_RENDER_TARGET, DXGI_FORMAT_R32_FLOAT); D3D::SetDebugObjectName12(m_efb.color_access_resize_tex->GetTex12(), "EFB access depth resize buffer"); buf12->Release(); @@ -317,19 +324,16 @@ void FramebufferManager::MapEFBColorAccessCopy() CD3DX12_TEXTURE_COPY_LOCATION src_location(src_resource, 0); D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr); - // Block until completion - D3D::command_list_mgr->ExecuteQueuedWork(true); - // Restore EFB resource state if it was sourced from here if (src_resource == m_efb.color_tex->GetTex12()) m_efb.color_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); - // Restore state after resetting command list - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); - g_renderer->RestoreAPIState(); + // Block until completion - state is automatically restored + D3D::command_list_mgr->ExecuteQueuedWork(true); // Resource copy has finished, so safe to map now - m_efb.color_access_readback_buffer->Map(0, nullptr, reinterpret_cast(&m_efb.color_access_readback_map)); + D3D12_RANGE read_range = { 0, m_efb.color_access_readback_pitch * EFB_HEIGHT }; + m_efb.color_access_readback_buffer->Map(0, &read_range, reinterpret_cast(&m_efb.color_access_readback_map)); } void FramebufferManager::MapEFBDepthAccessCopy() @@ -344,7 +348,7 @@ void FramebufferManager::MapEFBDepthAccessCopy() D3D::SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT); D3D::SetPointCopySampler(); - D3D::current_command_list->OMSetRenderTargets(1, &m_efb.color_access_resize_tex->GetRTV12(), FALSE, nullptr); + D3D::current_command_list->OMSetRenderTargets(1, &m_efb.depth_access_resize_tex->GetRTV12(), FALSE, nullptr); CD3DX12_RECT src_rect(0, 0, m_target_width, m_target_height); D3D::DrawShadedTexQuad(m_efb.depth_tex, &src_rect, m_target_width, m_target_height, @@ -369,32 +373,31 @@ void FramebufferManager::MapEFBDepthAccessCopy() CD3DX12_TEXTURE_COPY_LOCATION src_location(src_resource, 0); D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr); - // Block until completion - D3D::command_list_mgr->ExecuteQueuedWork(true); - // Restore EFB resource state if it was sourced from here if (src_resource == m_efb.depth_tex->GetTex12()) m_efb.depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); - // Restore state after resetting command list - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); - g_renderer->RestoreAPIState(); + // Block until completion - state is automatically restored + D3D::command_list_mgr->ExecuteQueuedWork(true); // Resource copy has finished, so safe to map now - m_efb.depth_access_readback_buffer->Map(0, nullptr, reinterpret_cast(&m_efb.depth_access_readback_map)); + D3D12_RANGE read_range = { 0, m_efb.depth_access_readback_pitch * EFB_HEIGHT }; + m_efb.depth_access_readback_buffer->Map(0, &read_range, reinterpret_cast(&m_efb.depth_access_readback_map)); } void FramebufferManager::InvalidateEFBAccessCopies() { + D3D12_RANGE write_range = {}; + if (m_efb.color_access_readback_map) { - m_efb.color_access_readback_buffer->Unmap(0, nullptr); + m_efb.color_access_readback_buffer->Unmap(0, &write_range); m_efb.color_access_readback_map = nullptr; } if (m_efb.depth_access_readback_map) { - m_efb.depth_access_readback_buffer->Unmap(0, nullptr); + m_efb.depth_access_readback_buffer->Unmap(0, &write_range); m_efb.depth_access_readback_map = nullptr; } } @@ -414,8 +417,8 @@ void FramebufferManager::DestroyEFBAccessCopies() void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) { - // DX12's XFB decoder does not use this function. - // YUYV data is decoded in Render::Swap. + u8* src = Memory::GetPointer(xfbAddr); + g_xfb_encoder->DecodeToTexture(m_tex, src, fbWidth, fbHeight); } void XFBSource::CopyEFB(float gamma) @@ -448,7 +451,6 @@ void XFBSource::CopyEFB(float gamma) FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); // Restores proper viewport/scissor settings. g_renderer->RestoreAPIState(); diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.h b/Source/Core/VideoBackends/D3D12/FramebufferManager.h index 13e9ae205c..09721f143d 100644 --- a/Source/Core/VideoBackends/D3D12/FramebufferManager.h +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.h @@ -70,6 +70,8 @@ public: static void ResolveDepthTexture(); + static void RestoreEFBRenderTargets(); + // Access EFB from CPU static u32 ReadEFBColorAccessCopy(u32 x, u32 y); static float ReadEFBDepthAccessCopy(u32 x, u32 y); diff --git a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp index 5c27244bdd..fda14dd7e1 100644 --- a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp @@ -105,7 +105,9 @@ void PSTextureEncoder::Init() D3D::SetDebugObjectName12(m_encode_params_buffer, "efb encoder params buffer"); - CheckHR(m_encode_params_buffer->Map(0, nullptr, &m_encode_params_buffer_data)); + // NOTE: This upload buffer is okay to overwrite each time, since we block until completion when it's used anyway. + D3D12_RANGE read_range = {}; + CheckHR(m_encode_params_buffer->Map(0, &read_range, &m_encode_params_buffer_data)); m_ready = true; } @@ -215,11 +217,16 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p D3D::ResourceBarrier(D3D::current_command_list, m_out, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE, 0); D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); + + // State is automatically restored after executing command list. D3D::command_list_mgr->ExecuteQueuedWork(true); // Transfer staging buffer to GameCube/Wii RAM void* readback_data_map; - CheckHR(m_out_readback_buffer->Map(0, nullptr, &readback_data_map)); + D3D12_RANGE read_range = { 0, dst_location.PlacedFootprint.Footprint.RowPitch * num_blocks_y }; + CheckHR(m_out_readback_buffer->Map(0, &read_range, &readback_data_map)); u8* src = static_cast(readback_data_map); u32 read_stride = std::min(bytes_per_row, dst_location.PlacedFootprint.Footprint.RowPitch); @@ -231,14 +238,8 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p src += dst_location.PlacedFootprint.Footprint.RowPitch; } - m_out_readback_buffer->Unmap(0, nullptr); - - // Restores proper viewport/scissor settings. - g_renderer->RestoreAPIState(); - - FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); - FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + D3D12_RANGE write_range = {}; + m_out_readback_buffer->Unmap(0, &write_range); } D3D12_SHADER_BYTECODE PSTextureEncoder::SetStaticShader(unsigned int dst_format, PEControl::PixelFormat src_format, diff --git a/Source/Core/VideoBackends/D3D12/PerfQuery.cpp b/Source/Core/VideoBackends/D3D12/PerfQuery.cpp index f5821ca1b7..5f4b5508e5 100644 --- a/Source/Core/VideoBackends/D3D12/PerfQuery.cpp +++ b/Source/Core/VideoBackends/D3D12/PerfQuery.cpp @@ -2,68 +2,215 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include + #include "Common/CommonFuncs.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" #include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" #include "VideoBackends/D3D12/PerfQuery.h" #include "VideoCommon/RenderBase.h" -//D3D12TODO: Implement PerfQuery class. - namespace DX12 { PerfQuery::PerfQuery() { - //D3D12TODO: Add implementation + D3D12_QUERY_HEAP_DESC desc = { D3D12_QUERY_HEAP_TYPE_OCCLUSION, PERF_QUERY_BUFFER_SIZE, 0 }; + CheckHR(D3D::device12->CreateQueryHeap(&desc, IID_PPV_ARGS(&m_query_heap))); + + CheckHR(D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(QUERY_READBACK_BUFFER_SIZE), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_query_readback_buffer))); + + m_tracking_fence = D3D::command_list_mgr->RegisterQueueFenceCallback(this, &PerfQuery::QueueFenceCallback); } PerfQuery::~PerfQuery() { - //D3D12TODO: Add implementation + D3D::command_list_mgr->RemoveQueueFenceCallback(this); + + SAFE_RELEASE(m_query_heap); + SAFE_RELEASE(m_query_readback_buffer); } void PerfQuery::EnableQuery(PerfQueryGroup type) { - //D3D12TODO: Add implementation + if (m_query_count > m_query_buffer.size() / 2) + WeakFlush(); + + // all queries already used? + if (m_query_buffer.size() == m_query_count) + { + FlushOne(); + //WARN_LOG(VIDEO, "Flushed query buffer early!"); + } + + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + size_t index = (m_query_read_pos + m_query_count) % m_query_buffer.size(); + auto& entry = m_query_buffer[index]; + + D3D::current_command_list->BeginQuery(m_query_heap, D3D12_QUERY_TYPE_OCCLUSION, static_cast(index)); + entry.query_type = type; + entry.fence_value = -1; + + ++m_query_count; + } } void PerfQuery::DisableQuery(PerfQueryGroup type) { - //D3D12TODO: Add implementation + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + size_t index = (m_query_read_pos + m_query_count + m_query_buffer.size() - 1) % m_query_buffer.size(); + auto& entry = m_query_buffer[index]; + + D3D::current_command_list->EndQuery(m_query_heap, D3D12_QUERY_TYPE_OCCLUSION, static_cast(index)); + D3D::current_command_list->ResolveQueryData(m_query_heap, D3D12_QUERY_TYPE_OCCLUSION, static_cast(index), 1, m_query_readback_buffer, index * sizeof(UINT64)); + entry.fence_value = m_next_fence_value; + } } void PerfQuery::ResetQuery() { - //D3D12TODO: Add implementation + m_query_count = 0; + std::fill_n(m_results, ArraySize(m_results), 0); } u32 PerfQuery::GetQueryResult(PerfQueryType type) { - //D3D12TODO: Add implementation - return 0; + u32 result = 0; + + if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC) + result = m_results[PQG_ZCOMP_ZCOMPLOC]; + else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT) + result = m_results[PQG_ZCOMP]; + else if (type == PQ_BLEND_INPUT) + result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC]; + else if (type == PQ_EFB_COPY_CLOCKS) + result = m_results[PQG_EFB_COPY_CLOCKS]; + + return result / 4; } void PerfQuery::FlushOne() { - //D3D12TODO: Add implementation + size_t index = m_query_read_pos; + ActiveQuery& entry = m_query_buffer[index]; + + // Has the command list been executed yet? + if (entry.fence_value == m_next_fence_value) + D3D::command_list_mgr->ExecuteQueuedWork(false); + + // Block until the fence is reached + D3D::command_list_mgr->WaitOnCPUForFence(m_tracking_fence, entry.fence_value); + + // Copy from readback buffer to local + void* readback_buffer_map; + D3D12_RANGE read_range = { sizeof(UINT64) * index, sizeof(UINT64) * (index + 1) }; + CheckHR(m_query_readback_buffer->Map(0, &read_range, &readback_buffer_map)); + + UINT64 result; + memcpy(&result, reinterpret_cast(readback_buffer_map) + sizeof(UINT64) * index, sizeof(UINT64)); + + D3D12_RANGE write_range = {}; + m_query_readback_buffer->Unmap(0, &write_range); + + // NOTE: Reported pixel metrics should be referenced to native resolution + m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight()); + + m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size(); + m_query_count--; +} + +UINT64 PerfQuery::FindLastPendingFenceValue() const +{ + UINT64 last_fence_value = 0; + u32 query_count = m_query_count; + u32 query_read_pos = m_query_read_pos; + while (query_count > 0) + { + const ActiveQuery& entry = m_query_buffer[query_read_pos]; + + last_fence_value = std::max(entry.fence_value, last_fence_value); + query_read_pos = (query_read_pos + 1) % m_query_buffer.size(); + query_count--; + } + + return last_fence_value; } void PerfQuery::FlushResults() { - //D3D12TODO: Add implementation + if (IsFlushed()) + return; + + // Find the fence value we have to wait for. + UINT64 last_fence_value = FindLastPendingFenceValue(); + if (last_fence_value == m_next_fence_value) + D3D::command_list_mgr->ExecuteQueuedWork(false); + + // Wait for all queries to be resolved. + D3D::command_list_mgr->WaitOnCPUForFence(m_tracking_fence, last_fence_value); + + // Map the whole readback buffer. Shouldn't have much overhead, and saves taking the wrapped-around cases into consideration. + void* readback_buffer_map; + D3D12_RANGE read_range = { 0, QUERY_READBACK_BUFFER_SIZE }; + CheckHR(m_query_readback_buffer->Map(0, &read_range, &readback_buffer_map)); + + // Read all pending queries. + while (m_query_count > 0) + { + ActiveQuery& entry = m_query_buffer[m_query_read_pos]; + + UINT64 result; + memcpy(&result, reinterpret_cast(readback_buffer_map) + sizeof(UINT64) * m_query_read_pos, sizeof(UINT64)); + + // NOTE: Reported pixel metrics should be referenced to native resolution + m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight()); + + m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size(); + m_query_count--; + } + + D3D12_RANGE write_range = {}; + m_query_readback_buffer->Unmap(0, &write_range); } void PerfQuery::WeakFlush() { - //D3D12TODO: Add implementation + UINT64 completed_fence = m_tracking_fence->GetCompletedValue(); + + while (!IsFlushed()) + { + ActiveQuery& entry = m_query_buffer[m_query_read_pos]; + if (entry.fence_value > completed_fence) + break; + + FlushOne(); + } } bool PerfQuery::IsFlushed() const { - //D3D12TODO: Add implementation - return true; + return m_query_count == 0; +} + +void PerfQuery::QueueFenceCallback(void* owning_object, UINT64 fence_value) +{ + PerfQuery* owning_perf_query = static_cast(owning_object); + owning_perf_query->QueueFence(fence_value); +} + +void PerfQuery::QueueFence(UINT64 fence_value) +{ + m_next_fence_value = fence_value + 1; } } // namespace diff --git a/Source/Core/VideoBackends/D3D12/PerfQuery.h b/Source/Core/VideoBackends/D3D12/PerfQuery.h index 6e197bf53a..98760910f3 100644 --- a/Source/Core/VideoBackends/D3D12/PerfQuery.h +++ b/Source/Core/VideoBackends/D3D12/PerfQuery.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "VideoCommon/PerfQueryBase.h" @@ -27,20 +28,33 @@ public: private: struct ActiveQuery { - ID3D11Query* query; PerfQueryGroup query_type; + UINT64 fence_value; }; void WeakFlush(); + // Find the last fence value of all pending queries. + UINT64 FindLastPendingFenceValue() const; + // Only use when non-empty void FlushOne(); - // when testing in SMS: 64 was too small, 128 was ok - static const int s_perf_query_buffer_size = 512; + static void QueueFenceCallback(void* owning_object, UINT64 fence_value); + void QueueFence(UINT64 fence_value); - std::array m_query_buffer; + // when testing in SMS: 64 was too small, 128 was ok + static constexpr size_t PERF_QUERY_BUFFER_SIZE = 512; + static constexpr size_t QUERY_READBACK_BUFFER_SIZE = PERF_QUERY_BUFFER_SIZE * sizeof(UINT64); + + std::array m_query_buffer; int m_query_read_pos = 0; + + ID3D12QueryHeap* m_query_heap = nullptr; + ID3D12Resource* m_query_readback_buffer = nullptr; + + ID3D12Fence* m_tracking_fence = nullptr; + UINT64 m_next_fence_value = 0; }; } // namespace diff --git a/Source/Core/VideoBackends/D3D12/Render.cpp b/Source/Core/VideoBackends/D3D12/Render.cpp index ac95d198b3..17653bd193 100644 --- a/Source/Core/VideoBackends/D3D12/Render.cpp +++ b/Source/Core/VideoBackends/D3D12/Render.cpp @@ -29,7 +29,6 @@ #include "VideoBackends/D3D12/ShaderCache.h" #include "VideoBackends/D3D12/ShaderConstantsManager.h" #include "VideoBackends/D3D12/StaticShaderCache.h" -#include "VideoBackends/D3D12/Television.h" #include "VideoBackends/D3D12/TextureCache.h" #include "VideoCommon/AVIDump.h" @@ -50,8 +49,6 @@ static u32 s_last_multisamples = 1; static bool s_last_stereo_mode = false; static bool s_last_xfb_mode = false; -static Television s_television; - enum CLEAR_BLEND_DESC { CLEAR_BLEND_DESC_ALL_CHANNELS_ENABLED = 0, @@ -104,8 +101,6 @@ StateCache gx_state_cache; static void SetupDeviceObjects() { - s_television.Init(); - g_framebuffer_manager = std::make_unique(); D3D12_DEPTH_STENCIL_DESC depth_desc; @@ -175,8 +170,6 @@ static void TeardownDeviceObjects() s_screenshot_texture = nullptr; } - s_television.Shutdown(); - gx_state_cache.Clear(); } @@ -231,8 +224,6 @@ Renderer::Renderer(void*& window_handle) return; } - D3D::Create((HWND)window_handle); - s_backbuffer_width = D3D::GetBackBufferWidth(); s_backbuffer_height = D3D::GetBackBufferHeight(); @@ -252,7 +243,7 @@ Renderer::Renderer(void*& window_handle) // Setup GX pipeline state gx_state.blend.blend_enable = false; - gx_state.blend.write_mask = D3D11_COLOR_WRITE_ENABLE_ALL; + gx_state.blend.write_mask = D3D12_COLOR_WRITE_ENABLE_ALL; gx_state.blend.src_blend = D3D12_BLEND_ONE; gx_state.blend.dst_blend = D3D12_BLEND_ZERO; gx_state.blend.blend_op = D3D12_BLEND_OP_ADD; @@ -280,7 +271,7 @@ Renderer::Renderer(void*& window_handle) D3D::current_command_list->RSSetViewports(1, &vp); // Already transitioned to appropriate states a few lines up for the clears. - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + FramebufferManager::RestoreEFBRenderTargets(); D3D::BeginFrame(); } @@ -290,7 +281,6 @@ Renderer::~Renderer() D3D::EndFrame(); D3D::WaitForOutstandingRenderingToComplete(); TeardownDeviceObjects(); - D3D::Close(); } void Renderer::RenderText(const std::string& text, int left, int top, u32 color) @@ -534,7 +524,8 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha D3D::DrawClearQuad(rgba_color, 1.0f - (z & 0xFFFFFF) / 16777216.0f, blend_desc, depth_stencil_desc, FramebufferManager::GetEFBColorTexture()->GetMultisampled()); // Restores proper viewport/scissor settings. - g_renderer->RestoreAPIState(); + g_renderer->SetViewport(); + BPFunctions::SetScissor(); FramebufferManager::InvalidateEFBAccessCopies(); } @@ -582,14 +573,13 @@ void Renderer::ReinterpretPixelData(unsigned int convtype) FramebufferManager::GetEFBColorTempTexture()->GetMultisampled() ); - // Restores proper viewport/scissor settings. - g_renderer->RestoreAPIState(); - FramebufferManager::SwapReinterpretTexture(); FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); - FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); + + // Restores proper viewport/scissor settings. + RestoreAPIState(); } void Renderer::SetBlendMode(bool force_update) @@ -675,11 +665,13 @@ bool Renderer::SaveScreenshot(const std::string& filename, const TargetRectangle D3D::command_list_mgr->ExecuteQueuedWork(true); void* screenshot_texture_map; - CheckHR(s_screenshot_texture->Map(0, nullptr, &screenshot_texture_map)); + D3D12_RANGE read_range = { 0, dst_location.PlacedFootprint.Footprint.RowPitch * (source_box.bottom - source_box.top) }; + CheckHR(s_screenshot_texture->Map(0, &read_range, &screenshot_texture_map)); saved_png = TextureToPng(static_cast(screenshot_texture_map), dst_location.PlacedFootprint.Footprint.RowPitch, filename, source_box.right - source_box.left, source_box.bottom - source_box.top, false); - s_screenshot_texture->Unmap(0, nullptr); + D3D12_RANGE write_range = {}; + s_screenshot_texture->Unmap(0, &write_range); if (saved_png) { @@ -735,6 +727,7 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height // Invalidate EFB access copies. Not strictly necessary, but this avoids having the buffers mapped when calling Present(). FramebufferManager::InvalidateEFBAccessCopies(); + BBox::Invalidate(); // Prepare to copy the XFBs to our backbuffer UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height); @@ -749,15 +742,7 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height // activate linear filtering for the buffer copies D3D::SetLinearCopySampler(); - if (g_ActiveConfig.bUseXFB && g_ActiveConfig.bUseRealXFB) - { - // EXISTINGD3D11TODO: Television should be used to render Virtual XFB mode as well. - D3D::SetViewportAndScissor(target_rc.left, target_rc.top, target_rc.GetWidth(), target_rc.GetHeight()); - - s_television.Submit(xfb_addr, fb_stride, fb_width, fb_height); - s_television.Render(); - } - else if (g_ActiveConfig.bUseXFB) + if (g_ActiveConfig.bUseXFB) { const XFBSource* xfb_source; @@ -767,33 +752,40 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height xfb_source = static_cast(xfb_source_list[i]); TargetRectangle drawRc; - - // use virtual xfb with offset - int xfb_height = xfb_source->srcHeight; - int xfb_width = xfb_source->srcWidth; - int hOffset = (static_cast(xfb_source->srcAddr) - static_cast(xfb_addr)) / (static_cast(fb_stride) * 2); - - drawRc.top = target_rc.top + hOffset * target_rc.GetHeight() / static_cast(fb_height); - drawRc.bottom = target_rc.top + (hOffset + xfb_height) * target_rc.GetHeight() / static_cast(fb_height); - drawRc.left = target_rc.left + (target_rc.GetWidth() - xfb_width * target_rc.GetWidth() / static_cast(fb_stride)) / 2; - drawRc.right = target_rc.left + (target_rc.GetWidth() + xfb_width * target_rc.GetWidth() / static_cast(fb_stride)) / 2; - - // The following code disables auto stretch. Kept for reference. - // scale draw area for a 1 to 1 pixel mapping with the draw target - //float vScale = static_cast(fbHeight) / static_cast(s_backbuffer_height); - //float hScale = static_cast(fbWidth) / static_cast(s_backbuffer_width); - //drawRc.top *= vScale; - //drawRc.bottom *= vScale; - //drawRc.left *= hScale; - //drawRc.right *= hScale; - TargetRectangle source_rc; source_rc.left = xfb_source->sourceRc.left; source_rc.top = xfb_source->sourceRc.top; source_rc.right = xfb_source->sourceRc.right; source_rc.bottom = xfb_source->sourceRc.bottom; - source_rc.right -= Renderer::EFBToScaledX(fb_stride - fb_width); + // use virtual xfb with offset + int xfb_height = xfb_source->srcHeight; + int xfb_width = xfb_source->srcWidth; + int hOffset = (static_cast(xfb_source->srcAddr) - static_cast(xfb_addr)) / (static_cast(fb_stride) * 2); + + if (g_ActiveConfig.bUseRealXFB) + { + drawRc = target_rc; + source_rc.right -= fb_stride - fb_width; + } + else + { + drawRc.top = target_rc.top + hOffset * target_rc.GetHeight() / static_cast(fb_height); + drawRc.bottom = target_rc.top + (hOffset + xfb_height) * target_rc.GetHeight() / static_cast(fb_height); + drawRc.left = target_rc.left + (target_rc.GetWidth() - xfb_width * target_rc.GetWidth() / static_cast(fb_stride)) / 2; + drawRc.right = target_rc.left + (target_rc.GetWidth() + xfb_width * target_rc.GetWidth() / static_cast(fb_stride)) / 2; + + // The following code disables auto stretch. Kept for reference. + // scale draw area for a 1 to 1 pixel mapping with the draw target + //float vScale = static_cast(fbHeight) / static_cast(s_backbuffer_height); + //float hScale = static_cast(fbWidth) / static_cast(s_backbuffer_width); + //drawRc.top *= vScale; + //drawRc.bottom *= vScale; + //drawRc.left *= hScale; + //drawRc.right *= hScale; + + source_rc.right -= Renderer::EFBToScaledX(fb_stride - fb_width); + } BlitScreen(source_rc, drawRc, xfb_source->m_tex, xfb_source->texWidth, xfb_source->texHeight, gamma); } @@ -881,9 +873,12 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height } void* screenshot_texture_map; - CheckHR(s_screenshot_texture->Map(0, nullptr, &screenshot_texture_map)); + D3D12_RANGE read_range = { 0, dst_location.PlacedFootprint.Footprint.RowPitch * source_height }; + CheckHR(s_screenshot_texture->Map(0, &read_range, &screenshot_texture_map)); formatBufferDump(static_cast(screenshot_texture_map), &frame_data[0], source_width, source_height, dst_location.PlacedFootprint.Footprint.RowPitch); - s_screenshot_texture->Unmap(0, nullptr); + + D3D12_RANGE write_range = {}; + s_screenshot_texture->Unmap(0, &write_range); FlipImageData(&frame_data[0], w, h); AVIDump::AddFrame(&frame_data[0], source_width, source_height); @@ -995,14 +990,12 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height } // begin next frame - RestoreAPIState(); D3D::BeginFrame(); FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); - SetViewport(); + RestoreAPIState(); } void Renderer::ResetAPIState() @@ -1016,6 +1009,9 @@ void Renderer::RestoreAPIState() // overwritten elsewhere (particularly the viewport). SetViewport(); BPFunctions::SetScissor(); + + FramebufferManager::RestoreEFBRenderTargets(); + BBox::Bind(); } static bool s_previous_use_dst_alpha = false; diff --git a/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp b/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp index e2bd65aac9..38de296e87 100644 --- a/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp @@ -18,6 +18,8 @@ static ID3DBlob* s_depth_matrix_program_blob[2] = {}; static ID3DBlob* s_depth_resolve_to_color_program_blob = {}; static ID3DBlob* s_clear_program_blob = {}; static ID3DBlob* s_anaglyph_program_blob = {}; +static ID3DBlob* s_xfb_encode_shader_blob = {}; +static ID3DBlob* s_xfb_decode_shader_blob = {}; static ID3DBlob* s_rgba6_to_rgb8_program_blob[2] = {}; static ID3DBlob* s_rgb8_to_rgba6_program_blob[2] = {}; @@ -411,6 +413,93 @@ static constexpr const char s_copy_geometry_shader_hlsl[] = { "}\n" }; +static const char s_xfb_encode_shader_hlsl[] = R"( + +Texture2DArray tex0 : register(t0); +SamplerState samp0 : register(s0); + +cbuffer EncodeParams : register(b0) +{ + float4 srcRect; + float2 texelSize; +} + +// GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see +// +static const float3x4 RGB_TO_YCBCR = float3x4( + 0.257, 0.504, 0.098, 16.0/255.0, + -0.148, -0.291, 0.439, 128.0/255.0, + 0.439, -0.368, -0.071, 128.0/255.0 +); + +void main( + out float4 ocol0 : SV_Target, + in float4 pos : SV_Position, + in float3 uv0 : TEXCOORD0, + in float gamma : TEXCOORD1) +{ + // Load three input pixels, emulate clamp sampler by clamping to the source rectangle. + // Subtract 0.5 from the x coordinate because we're doubling the width, and want the pixel center shifted back to 0.5. + // The native resolution is used as a reference here so bilinear filtering works as expected. + float2 baseCoords = lerp(srcRect.xy, srcRect.zw, float2(uv0.x - 0.5 * texelSize.x, uv0.y)); + float3 sampleL = tex0.Sample(samp0, float3(max(srcRect.xy, baseCoords - float2(texelSize.x, 0)), 0)).rgb; + float3 sampleM = tex0.Sample(samp0, float3(baseCoords, 0)).rgb; + float3 sampleR = tex0.Sample(samp0, float3(min(srcRect.zw, baseCoords + float2(texelSize.x, 0)), 0)).rgb; + + // Gamma correction (gamma is already rcp(gamma)) + // abs() here because the HLSL compiler throws a warning otherwise. + sampleL = pow(abs(sampleL), gamma); + sampleM = pow(abs(sampleM), gamma); + sampleR = pow(abs(sampleR), gamma); + + // RGB -> YUV + float3 yuvL = mul(RGB_TO_YCBCR, float4(sampleL,1)); + float3 yuvM = mul(RGB_TO_YCBCR, float4(sampleM,1)); + float3 yuvR = mul(RGB_TO_YCBCR, float4(sampleR,1)); + + // The Y components correspond to two EFB pixels, while the U and V are + // made from a blend of three EFB pixels. + float y0 = yuvM.r; + float y1 = yuvR.r; + float u0 = 0.25*yuvL.g + 0.5*yuvM.g + 0.25*yuvR.g; + float v0 = 0.25*yuvL.b + 0.5*yuvM.b + 0.25*yuvR.b; + ocol0 = float4(y0, u0, y1, v0); +} + +)"; + +static const char s_xfb_decode_shader_hlsl[] = R"( + +Texture2DArray tex0 : register(t0); + +static const float3x3 YCBCR_TO_RGB = float3x3( + 1.164, 0.000, 1.596, + 1.164, -0.392, -0.813, + 1.164, 2.017, 0.000 +); + +void main( + out float4 ocol0 : SV_Target, + in float4 pos : SV_Position, + in float3 uv0 : TEXCOORD0) +{ + // Divide coordinates by 2 due to half-width YUYV texure. + int2 ipos = int2(pos.xy); + int2 texpos = int2(ipos.x >> 1, ipos.y); + float4 yuyv = tex0.Load(int4(texpos, 0, 0)); + + // Select U for even pixels, V for odd pixels. + float y = lerp(yuyv.r, yuyv.b, float(ipos.x & 1)); + + // Recover RGB components + float3 yuv_601_sub = float3(y, yuyv.ga) - float3(16.0/255.0, 128.0/255.0, 128.0/255.0); + float3 rgb_601 = mul(YCBCR_TO_RGB, yuv_601_sub); + + ocol0 = float4(rgb_601, 1); +} + +)"; + D3D12_SHADER_BYTECODE StaticShaderCache::GetReinterpRGBA6ToRGB8PixelShader(bool multisampled) { D3D12_SHADER_BYTECODE bytecode = {}; @@ -625,6 +714,28 @@ D3D12_SHADER_BYTECODE StaticShaderCache::GetCopyGeometryShader() return bytecode; } +D3D12_SHADER_BYTECODE StaticShaderCache::GetXFBEncodePixelShader() +{ + D3D12_SHADER_BYTECODE bytecode = + { + s_xfb_encode_shader_blob->GetBufferPointer(), + s_xfb_encode_shader_blob->GetBufferSize() + }; + + return bytecode; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetXFBDecodePixelShader() +{ + D3D12_SHADER_BYTECODE bytecode = + { + s_xfb_decode_shader_blob->GetBufferPointer(), + s_xfb_decode_shader_blob->GetBufferSize() + }; + + return bytecode; +} + void StaticShaderCache::Init() { // Compile static pixel shaders @@ -633,6 +744,8 @@ void StaticShaderCache::Init() D3D::CompilePixelShader(s_color_copy_program_hlsl, &s_color_copy_program_blob[0]); D3D::CompilePixelShader(s_color_matrix_program_hlsl, &s_color_matrix_program_blob[0]); D3D::CompilePixelShader(s_depth_matrix_program_hlsl, &s_depth_matrix_program_blob[0]); + D3D::CompilePixelShader(s_xfb_encode_shader_hlsl, &s_xfb_encode_shader_blob); + D3D::CompilePixelShader(s_xfb_decode_shader_hlsl, &s_xfb_decode_shader_blob); // Compile static vertex shaders D3D::CompileVertexShader(s_simple_vertex_shader_hlsl, &s_simple_vertex_shader_blob); @@ -657,7 +770,8 @@ void StaticShaderCache::InvalidateMSAAShaders() void StaticShaderCache::Shutdown() { // Free pixel shader blobs - + SAFE_RELEASE(s_xfb_decode_shader_blob); + SAFE_RELEASE(s_xfb_encode_shader_blob); SAFE_RELEASE(s_clear_program_blob); SAFE_RELEASE(s_anaglyph_program_blob); SAFE_RELEASE(s_depth_resolve_to_color_program_blob); diff --git a/Source/Core/VideoBackends/D3D12/StaticShaderCache.h b/Source/Core/VideoBackends/D3D12/StaticShaderCache.h index 4b9f6959a8..492363f0a8 100644 --- a/Source/Core/VideoBackends/D3D12/StaticShaderCache.h +++ b/Source/Core/VideoBackends/D3D12/StaticShaderCache.h @@ -23,6 +23,8 @@ public: static D3D12_SHADER_BYTECODE GetAnaglyphPixelShader(); static D3D12_SHADER_BYTECODE GetReinterpRGBA6ToRGB8PixelShader(bool multisampled); static D3D12_SHADER_BYTECODE GetReinterpRGB8ToRGBA6PixelShader(bool multisampled); + static D3D12_SHADER_BYTECODE GetXFBEncodePixelShader(); + static D3D12_SHADER_BYTECODE GetXFBDecodePixelShader(); // Vertex shaders static D3D12_SHADER_BYTECODE GetSimpleVertexShader(); diff --git a/Source/Core/VideoBackends/D3D12/Television.cpp b/Source/Core/VideoBackends/D3D12/Television.cpp deleted file mode 100644 index 548859a1ac..0000000000 --- a/Source/Core/VideoBackends/D3D12/Television.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Core/HW/Memmap.h" -#include "VideoBackends/D3D12/D3DBase.h" -#include "VideoBackends/D3D12/D3DShader.h" -#include "VideoBackends/D3D12/D3DState.h" -#include "VideoBackends/D3D12/D3DUtil.h" -#include "VideoBackends/D3D12/Television.h" -#include "VideoCommon/VideoConfig.h" - -// D3D12TODO: Add DX12 path for this file. - -namespace DX12 -{ - -Television::Television() -{ - // D3D12TODO: Add DX12 path for this file. -} - -void Television::Init() -{ - // D3D12TODO: Add DX12 path for this file. -} - -void Television::Shutdown() -{ - // D3D12TODO: Add DX12 path for this file. -} - -void Television::Submit(u32 xfb_address, u32 stride, u32 width, u32 height) -{ - // D3D12TODO: Add DX12 path for this file. -} - -void Television::Render() -{ - // D3D12TODO: Add DX12 path for this file. -} - -} diff --git a/Source/Core/VideoBackends/D3D12/Television.h b/Source/Core/VideoBackends/D3D12/Television.h deleted file mode 100644 index 1bfbbb7c7d..0000000000 --- a/Source/Core/VideoBackends/D3D12/Television.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "VideoCommon/VideoCommon.h" - -// D3D12TODO: Add DX12 path for this file. - -namespace DX12 -{ - -class Television final -{ - -public: - - Television(); - - void Init(); - void Shutdown(); - - // Submit video data to be drawn. This will change the current state of the - // TV. xfbAddr points to YUYV data stored in GameCube/Wii RAM, but the XFB - // may be virtualized when rendering so the RAM may not actually be read. - void Submit(u32 xfb_address, u32 stride, u32 width, u32 height); - - // Render the current state of the TV. - void Render(); - -private: - - -}; - -} diff --git a/Source/Core/VideoBackends/D3D12/TextureCache.cpp b/Source/Core/VideoBackends/D3D12/TextureCache.cpp index f4a57eb3e1..4b235d1037 100644 --- a/Source/Core/VideoBackends/D3D12/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D12/TextureCache.cpp @@ -83,7 +83,8 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l // Map readback buffer and save to file. void* readback_texture_map; - CheckHR(s_texture_cache_entry_readback_buffer->Map(0, nullptr, &readback_texture_map)); + D3D12_RANGE read_range = { 0, required_readback_buffer_size }; + CheckHR(s_texture_cache_entry_readback_buffer->Map(0, &read_range, &readback_texture_map)); bool saved = TextureToPng( static_cast(readback_texture_map), @@ -93,7 +94,8 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l dst_location.PlacedFootprint.Footprint.Height ); - s_texture_cache_entry_readback_buffer->Unmap(0, nullptr); + D3D12_RANGE write_range = {}; + s_texture_cache_entry_readback_buffer->Unmap(0, &write_range); return saved; } @@ -106,33 +108,24 @@ void TextureCache::TCacheEntry::CopyRectangleFromTexture( if (src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()) { - D3D12_BOX srcbox; - srcbox.left = src_rect.left; - srcbox.top = src_rect.top; - srcbox.right = src_rect.right; - srcbox.bottom = src_rect.bottom; - srcbox.front = 0; - srcbox.back = srcentry->config.layers; + // These assertions should hold true unless the base code is passing us sizes too large, in which case it should be fixed instead. + _assert_msg_(VIDEO, + static_cast(src_rect.GetWidth()) <= source->config.width && + static_cast(src_rect.GetHeight()) <= source->config.height, + "Source rect is too large for CopyRectangleFromTexture"); - if (static_cast(src_rect.GetHeight()) > config.height || - static_cast(src_rect.GetWidth()) > config.width) - { - // To mimic D3D11 behavior, we're just going to drop the clear since it is invalid. - // This invalid copy needs to be fixed above the Backend level. - - // On D3D12, instead of silently dropping this invalid clear, the runtime throws an exception - // so we need to filter it out ourselves. - - return; - } + _assert_msg_(VIDEO, + static_cast(dst_rect.GetWidth()) <= config.width && + static_cast(dst_rect.GetHeight()) <= config.height, + "Dest rect is too large for CopyRectangleFromTexture"); + CD3DX12_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom, srcentry->config.layers); D3D12_TEXTURE_COPY_LOCATION dst_location = CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), 0); D3D12_TEXTURE_COPY_LOCATION src_location = CD3DX12_TEXTURE_COPY_LOCATION(srcentry->m_texture->GetTex12(), 0); m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_DEST); srcentry->m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); - - D3D::current_command_list->CopyTextureRegion(&dst_location, dst_rect.left, dst_rect.top, 0, &src_location, &srcbox); + D3D::current_command_list->CopyTextureRegion(&dst_location, dst_rect.left, dst_rect.top, 0, &src_location, &src_box); m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); srcentry->m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); @@ -168,7 +161,6 @@ void TextureCache::TCacheEntry::CopyRectangleFromTexture( FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); g_renderer->RestoreAPIState(); } @@ -185,8 +177,8 @@ TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntry if (config.rendertarget) { D3DTexture2D* texture = D3DTexture2D::Create(config.width, config.height, - static_cast((static_cast(D3D11_BIND_RENDER_TARGET) | static_cast(D3D11_BIND_SHADER_RESOURCE))), - D3D11_USAGE_DEFAULT, DXGI_FORMAT_R8G8B8A8_UNORM, 1, config.layers); + TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET, + DXGI_FORMAT_R8G8B8A8_UNORM, 1, config.layers); TCacheEntry* entry = new TCacheEntry(config, texture); @@ -216,7 +208,7 @@ TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntry D3DTexture2D* texture = new D3DTexture2D( texture_resource, - D3D11_BIND_SHADER_RESOURCE, + TEXTURE_BIND_FLAG_SHADER_RESOURCE, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, @@ -308,7 +300,6 @@ void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); g_renderer->RestoreAPIState(); } @@ -490,7 +481,6 @@ void TextureCache::ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* uncon FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); g_renderer->RestoreAPIState(); } diff --git a/Source/Core/VideoBackends/D3D12/VertexManager.cpp b/Source/Core/VideoBackends/D3D12/VertexManager.cpp index 6bdfd2e272..9f22cc56eb 100644 --- a/Source/Core/VideoBackends/D3D12/VertexManager.cpp +++ b/Source/Core/VideoBackends/D3D12/VertexManager.cpp @@ -137,10 +137,7 @@ void VertexManager::vFlush(bool use_dst_alpha) ShaderCache::LoadAndSetActiveShaders(use_dst_alpha ? DSTALPHA_DUAL_SOURCE_BLEND : DSTALPHA_NONE, current_primitive_type); if (g_ActiveConfig.backend_info.bSupportsBBox && BoundingBox::active) - { - // D3D12TODO: Support GPU-side bounding box. - // D3D::context->OMSetRenderTargetsAndUnorderedAccessViews(D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, 2, 1, &BBox::GetUAV(), nullptr); - } + BBox::Invalidate(); u32 stride = VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(); @@ -161,15 +158,15 @@ void VertexManager::vFlush(bool use_dst_alpha) // D3D12TODO: Decide right threshold for drawCountSinceAsyncFlush at runtime depending on // amount of stall measured in AccessEFB. - if (D3D::command_list_mgr->m_draws_since_last_execution > 100 && D3D::command_list_mgr->m_cpu_access_last_frame) + // We can't do this with perf queries enabled since it can leave queries open. + + if (D3D::command_list_mgr->m_cpu_access_last_frame && + D3D::command_list_mgr->m_draws_since_last_execution > 100 && + !PerfQueryBase::ShouldEmulate()) { D3D::command_list_mgr->m_draws_since_last_execution = 0; D3D::command_list_mgr->ExecuteQueuedWork(); - - g_renderer->SetViewport(); - - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); } } @@ -185,7 +182,7 @@ void VertexManager::ResetBuffer(u32 stride) return; } - bool command_list_executed = m_vertex_stream_buffer->AllocateSpaceInBuffer(MAXVBUFFERSIZE, stride); + m_vertex_stream_buffer->AllocateSpaceInBuffer(MAXVBUFFERSIZE, stride); if (m_vertex_stream_buffer_reallocated) { @@ -198,12 +195,7 @@ void VertexManager::ResetBuffer(u32 stride) s_pCurBufferPointer = static_cast(m_vertex_stream_buffer->GetCPUAddressOfCurrentAllocation()); m_vertex_draw_offset = static_cast(m_vertex_stream_buffer->GetOffsetOfCurrentAllocation()); - command_list_executed |= m_index_stream_buffer->AllocateSpaceInBuffer(MAXIBUFFERSIZE * sizeof(u16), sizeof(u16)); - if (command_list_executed) - { - g_renderer->SetViewport(); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); - } + m_index_stream_buffer->AllocateSpaceInBuffer(MAXIBUFFERSIZE * sizeof(u16), sizeof(u16)); if (m_index_stream_buffer_reallocated) { diff --git a/Source/Core/VideoBackends/D3D12/XFBEncoder.cpp b/Source/Core/VideoBackends/D3D12/XFBEncoder.cpp index a2933ed268..670cda05e2 100644 --- a/Source/Core/VideoBackends/D3D12/XFBEncoder.cpp +++ b/Source/Core/VideoBackends/D3D12/XFBEncoder.cpp @@ -1,4 +1,4 @@ -// Copyright 2011 Dolphin Emulator Project +// Copyright 2016 Dolphin Emulator Project // Licensed under GPLv2+ // Refer to the license.txt file included. @@ -6,33 +6,180 @@ #include "Common/MsgHandler.h" #include "Common/Logging/Log.h" #include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" #include "VideoBackends/D3D12/D3DShader.h" #include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DUtil.h" #include "VideoBackends/D3D12/FramebufferManager.h" #include "VideoBackends/D3D12/Render.h" +#include "VideoBackends/D3D12/StaticShaderCache.h" #include "VideoBackends/D3D12/XFBEncoder.h" -// D3D12TODO: Convert this file.. - namespace DX12 { +// YUYV data is packed into half-width RGBA, with Y values in (R,B) and UV in (G,A) +constexpr size_t XFB_TEXTURE_WIDTH = MAX_XFB_WIDTH / 2; +constexpr size_t XFB_TEXTURE_HEIGHT = MAX_XFB_HEIGHT; + +// Buffer enough space for 2 XFB buffers (our frame latency) +constexpr size_t XFB_UPLOAD_BUFFER_SIZE = D3D::AlignValue(XFB_TEXTURE_WIDTH * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) * XFB_TEXTURE_HEIGHT * 2; +constexpr size_t XFB_ENCODER_PARAMS_BUFFER_SIZE = 64 * 1024; + +std::unique_ptr g_xfb_encoder; + XFBEncoder::XFBEncoder() -{ } - -void XFBEncoder::Init() { - // D3D12TODO: Convert this file.. + ID3D12Resource* texture; + + CheckHR(D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, XFB_TEXTURE_WIDTH, XFB_TEXTURE_HEIGHT, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET), + D3D12_RESOURCE_STATE_RENDER_TARGET, + nullptr, + IID_PPV_ARGS(&texture))); + + m_yuyv_texture = new D3DTexture2D(texture, + TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET, + DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM); + SAFE_RELEASE(texture); + + CheckHR(D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(D3D::AlignValue(XFB_TEXTURE_WIDTH * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) * MAX_XFB_HEIGHT), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_readback_buffer))); + + m_upload_buffer = std::make_unique(XFB_UPLOAD_BUFFER_SIZE, XFB_UPLOAD_BUFFER_SIZE, nullptr); + m_encode_params_buffer = std::make_unique(XFB_ENCODER_PARAMS_BUFFER_SIZE, XFB_ENCODER_PARAMS_BUFFER_SIZE, nullptr); } -void XFBEncoder::Shutdown() +XFBEncoder::~XFBEncoder() { - // D3D12TODO: Convert this file.. + SAFE_RELEASE(m_yuyv_texture); + SAFE_RELEASE(m_readback_buffer); } -void XFBEncoder::Encode(u8* dst, u32 width, u32 height, const EFBRectangle& srcRect, float gamma) +void XFBEncoder::EncodeTextureToRam(u8* dst, u32 dst_pitch, u32 dst_height, + D3DTexture2D* src_texture, const TargetRectangle& src_rect, + u32 src_width, u32 src_height, float gamma) { - // D3D12TODO: Convert this file.. + // src_rect is in native coordinates + // dst_pitch is in words + u32 dst_width = dst_pitch / 2; + u32 dst_texture_width = dst_width / 2; + _assert_msg_(VIDEO, dst_width <= MAX_XFB_WIDTH && dst_height <= MAX_XFB_HEIGHT, "XFB destination does not exceed maximum size"); + + // Encode parameters constant buffer used by shader + struct EncodeParameters + { + float srcRect[4]; + float texelSize[2]; + float pad[2]; + }; + EncodeParameters parameters = + { + { + static_cast(src_rect.left) / static_cast(src_width), + static_cast(src_rect.top) / static_cast(src_height), + static_cast(src_rect.right) / static_cast(src_width), + static_cast(src_rect.bottom) / static_cast(src_height) + }, + { + 1.0f / EFB_WIDTH, + 1.0f / EFB_HEIGHT + }, + { + 0.0f, + 0.0f + } + }; + m_encode_params_buffer->AllocateSpaceInBuffer(sizeof(parameters), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + memcpy(m_encode_params_buffer->GetCPUAddressOfCurrentAllocation(), ¶meters, sizeof(parameters)); + + // Convert RGBA texture to YUYV intermediate texture. + // Performs downscaling through a linear filter. Probably not ideal, but it's not going to look perfect anyway. + CD3DX12_RECT src_texture_rect(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom); + D3D12_RESOURCE_STATES src_texture_state = src_texture->GetResourceUsageState(); + m_yuyv_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &m_yuyv_texture->GetRTV12(), FALSE, nullptr); + D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, m_encode_params_buffer->GetGPUAddressOfCurrentAllocation()); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true); + D3D::SetViewportAndScissor(0, 0, dst_texture_width, dst_height); + D3D::SetLinearCopySampler(); + D3D::DrawShadedTexQuad( + src_texture, &src_texture_rect, src_rect.GetWidth(), src_rect.GetHeight(), + StaticShaderCache::GetXFBEncodePixelShader(), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), + {}, gamma, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); + + src_texture->TransitionToResourceState(D3D::current_command_list, src_texture_state); + + // Copy from YUYV intermediate texture to readback buffer. It's likely the pitch here is going to be different to dst_pitch. + u32 readback_pitch = D3D::AlignValue(dst_width * sizeof(u16), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = { 0, { DXGI_FORMAT_R8G8B8A8_UNORM, dst_texture_width, dst_height, 1, readback_pitch } }; + CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_readback_buffer, dst_footprint); + CD3DX12_TEXTURE_COPY_LOCATION src_location(m_yuyv_texture->GetTex12(), 0); + CD3DX12_BOX src_box(0, 0, dst_texture_width, dst_height); + m_yuyv_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); + + // Wait until the GPU completes the copy. Resets back to known state automatically. + D3D::command_list_mgr->ExecuteQueuedWork(true); + + // Copy from the readback buffer to dst. + // Can't be done as one memcpy due to pitch difference. + void* readback_texture_map; + D3D12_RANGE read_range = { 0, readback_pitch * dst_height }; + CheckHR(m_readback_buffer->Map(0, &read_range, &readback_texture_map)); + + for (u32 row = 0; row < dst_height; row++) + { + const u8* row_src = reinterpret_cast(readback_texture_map) + readback_pitch * row; + u8* row_dst = dst + dst_pitch * row; + memcpy(row_dst, row_src, std::min(dst_pitch, readback_pitch)); + } + + D3D12_RANGE write_range = {}; + m_readback_buffer->Unmap(0, &write_range); +} + +void XFBEncoder::DecodeToTexture(D3DTexture2D* dst_texture, const u8* src, u32 src_width, u32 src_height) +{ + _assert_msg_(VIDEO, src_width <= MAX_XFB_WIDTH && src_height <= MAX_XFB_HEIGHT, "XFB source does not exceed maximum size"); + + // Copy to XFB upload buffer. Each row has to be done separately due to pitch differences. + u32 buffer_pitch = D3D::AlignValue(src_width / 2 * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + m_upload_buffer->AllocateSpaceInBuffer(buffer_pitch * src_height, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + for (u32 row = 0; row < src_height; row++) + { + const u8* row_src = src + (src_width * 2) * row; + u8* row_dst = reinterpret_cast(m_upload_buffer->GetCPUAddressOfCurrentAllocation()) + buffer_pitch * row; + memcpy(row_dst, row_src, src_width * 2); + } + + // Copy from upload buffer to intermediate YUYV texture. + D3D12_PLACED_SUBRESOURCE_FOOTPRINT src_footprint = { m_upload_buffer->GetOffsetOfCurrentAllocation(), { DXGI_FORMAT_R8G8B8A8_UNORM, src_width / 2, src_height, 1, buffer_pitch } }; + CD3DX12_TEXTURE_COPY_LOCATION src_location(m_upload_buffer->GetBuffer(), src_footprint); + CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_yuyv_texture->GetTex12(), 0); + CD3DX12_BOX src_box(0, 0, src_width / 2, src_height); + m_yuyv_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_DEST); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); + + // Convert YUYV texture to RGBA texture with pixel shader. + CD3DX12_RECT src_texture_rect(0, 0, src_width / 2, src_height); + dst_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &dst_texture->GetRTV12(), FALSE, nullptr); + D3D::SetViewportAndScissor(0, 0, src_width, src_height); + D3D::DrawShadedTexQuad( + m_yuyv_texture, &src_texture_rect, XFB_TEXTURE_WIDTH, XFB_TEXTURE_HEIGHT, + StaticShaderCache::GetXFBDecodePixelShader(), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), + {}, 1.0f, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); + + // XFB source textures are expected to be in shader resource state. + dst_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } } diff --git a/Source/Core/VideoBackends/D3D12/XFBEncoder.h b/Source/Core/VideoBackends/D3D12/XFBEncoder.h index 68d2cd3839..cb4f712cd6 100644 --- a/Source/Core/VideoBackends/D3D12/XFBEncoder.h +++ b/Source/Core/VideoBackends/D3D12/XFBEncoder.h @@ -1,28 +1,42 @@ -// Copyright 2011 Dolphin Emulator Project +// Copyright 2016 Dolphin Emulator Project // Licensed under GPLv2+ // Refer to the license.txt file included. #pragma once +#include +#include + +#include "VideoBackends/D3D12/D3DStreamBuffer.h" +#include "VideoBackends/D3D12/D3DTexture.h" #include "VideoCommon/VideoCommon.h" namespace DX12 { +class D3DTexture2D; + class XFBEncoder { - public: XFBEncoder(); + ~XFBEncoder(); - void Init(); - void Shutdown(); + void EncodeTextureToRam(u8* dst, u32 dst_pitch, u32 dst_height, + D3DTexture2D* src_texture, const TargetRectangle& src_rect, + u32 src_width, u32 src_height, float gamma); - void Encode(u8* dst, u32 width, u32 height, const EFBRectangle& src_rect, float gamma); + void DecodeToTexture(D3DTexture2D* dst_texture, const u8* src, u32 src_width, u32 src_height); private: - // D3D12TODO: Implement this class + D3DTexture2D* m_yuyv_texture; + ID3D12Resource* m_readback_buffer; + + std::unique_ptr m_upload_buffer; + std::unique_ptr m_encode_params_buffer; }; +extern std::unique_ptr g_xfb_encoder; + } diff --git a/Source/Core/VideoBackends/D3D12/main.cpp b/Source/Core/VideoBackends/D3D12/main.cpp index 8b045ba5b7..e67ea2777a 100644 --- a/Source/Core/VideoBackends/D3D12/main.cpp +++ b/Source/Core/VideoBackends/D3D12/main.cpp @@ -23,6 +23,7 @@ #include "VideoBackends/D3D12/TextureCache.h" #include "VideoBackends/D3D12/VertexManager.h" #include "VideoBackends/D3D12/VideoBackend.h" +#include "VideoBackends/D3D12/XFBEncoder.h" #include "VideoCommon/BPStructs.h" #include "VideoCommon/CommandProcessor.h" @@ -64,11 +65,14 @@ std::string VideoBackend::GetDisplayName() const void InitBackendInfo() { - HRESULT hr = DX12::D3D::LoadDXGI(); - if (SUCCEEDED(hr)) hr = DX12::D3D::LoadD3D(); + HRESULT hr = D3D::LoadDXGI(); + if (FAILED(hr)) + return; + + hr = D3D::LoadD3D(); if (FAILED(hr)) { - DX12::D3D::UnloadDXGI(); + D3D::UnloadDXGI(); return; } @@ -85,9 +89,14 @@ void InitBackendInfo() IDXGIFactory* factory; IDXGIAdapter* ad; - hr = DX12::create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory); + hr = create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory); if (FAILED(hr)) + { PanicAlert("Failed to create IDXGIFactory object"); + D3D::UnloadD3D(); + D3D::UnloadDXGI(); + return; + } // adapters g_Config.backend_info.Adapters.clear(); @@ -102,28 +111,34 @@ void InitBackendInfo() // TODO: These don't get updated on adapter change, yet if (adapter_index == g_Config.iAdapter) { - std::string samples; - std::vector modes = DX12::D3D::EnumAAModes(ad); - // First iteration will be 1. This equals no AA. - for (unsigned int i = 0; i < modes.size(); ++i) + ID3D12Device* temp_device; + hr = d3d12_create_device(ad, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&temp_device)); + if (SUCCEEDED(hr)) { - g_Config.backend_info.AAModes.push_back(modes[i].Count); + std::string samples; + std::vector modes = D3D::EnumAAModes(temp_device); + // First iteration will be 1. This equals no AA. + for (unsigned int i = 0; i < modes.size(); ++i) + { + g_Config.backend_info.AAModes.push_back(modes[i].Count); + } + + // Requires the earlydepthstencil attribute (only available in shader model 5) + g_Config.backend_info.bSupportsEarlyZ = true; + + // Requires full UAV functionality (only available in shader model 5) + g_Config.backend_info.bSupportsBBox = true; + + // Requires the instance attribute (only available in shader model 5) + g_Config.backend_info.bSupportsGSInstancing = true; + + // Sample shading requires shader model 5 + g_Config.backend_info.bSupportsSSAA = true; + + temp_device->Release(); } - - bool shader_model_5_supported = (DX12::D3D::GetFeatureLevel(ad) >= D3D_FEATURE_LEVEL_11_0); - - // Requires the earlydepthstencil attribute (only available in shader model 5) - g_Config.backend_info.bSupportsEarlyZ = shader_model_5_supported; - - // Requires full UAV functionality (only available in shader model 5) - g_Config.backend_info.bSupportsBBox = false; - - // Requires the instance attribute (only available in shader model 5) - g_Config.backend_info.bSupportsGSInstancing = shader_model_5_supported; - - // Sample shading requires shader model 5 - g_Config.backend_info.bSupportsSSAA = shader_model_5_supported; } + g_Config.backend_info.Adapters.push_back(UTF16ToUTF8(desc.Description)); ad->Release(); } @@ -133,8 +148,8 @@ void InitBackendInfo() g_Config.backend_info.PPShaders.clear(); g_Config.backend_info.AnaglyphShaders.clear(); - DX12::D3D::UnloadDXGI(); - DX12::D3D::UnloadD3D(); + D3D::UnloadD3D(); + D3D::UnloadDXGI(); } void VideoBackend::ShowConfig(void *hParent) @@ -145,11 +160,6 @@ void VideoBackend::ShowConfig(void *hParent) bool VideoBackend::Initialize(void *window_handle) { - bool d3d12_supported = D3D::AlertUserIfSelectedAdapterDoesNotSupportD3D12(); - - if (!d3d12_supported) - return false; - if (window_handle == nullptr) return false; @@ -168,6 +178,9 @@ bool VideoBackend::Initialize(void *window_handle) g_Config.VerifyValidity(); UpdateActiveConfig(); + if (FAILED(D3D::Create((HWND)window_handle))) + return false; + m_window_handle = window_handle; m_initialized = true; @@ -181,6 +194,7 @@ void VideoBackend::Video_Prepare() g_texture_cache = std::make_unique(); g_vertex_manager = std::make_unique(); g_perf_query = std::make_unique(); + g_xfb_encoder = std::make_unique(); ShaderCache::Init(); ShaderConstantsManager::Init(); StaticShaderCache::Init(); @@ -230,10 +244,13 @@ void VideoBackend::Shutdown() StaticShaderCache::Shutdown(); BBox::Shutdown(); + g_xfb_encoder.reset(); g_perf_query.reset(); g_vertex_manager.reset(); g_texture_cache.reset(); g_renderer.reset(); + + D3D::Close(); } } diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 0960bbd5f7..1bf8b8e10e 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -1309,6 +1309,11 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, co { drawRc = flipped_trc; sourceRc.right -= fbStride - fbWidth; + + // RealXFB doesn't call ConvertEFBRectangle for sourceRc, therefore it is still assuming a top-left origin. + // The top offset is always zero (see FramebufferManagerBase::GetRealXFBSource). + sourceRc.top = sourceRc.bottom; + sourceRc.bottom = 0; } else { diff --git a/Source/Core/VideoCommon/FramebufferManagerBase.cpp b/Source/Core/VideoCommon/FramebufferManagerBase.cpp index c5597dddba..ab095f1020 100644 --- a/Source/Core/VideoCommon/FramebufferManagerBase.cpp +++ b/Source/Core/VideoCommon/FramebufferManagerBase.cpp @@ -67,12 +67,10 @@ const XFBSourceBase* const* FramebufferManagerBase::GetRealXFBSource(u32 xfbAddr m_realXFBSource->texWidth = fbWidth; m_realXFBSource->texHeight = fbHeight; - // OpenGL texture coordinates originate at the lower left, which is why - // sourceRc.top = fbHeight and sourceRc.bottom = 0. m_realXFBSource->sourceRc.left = 0; - m_realXFBSource->sourceRc.top = fbHeight; + m_realXFBSource->sourceRc.top = 0; m_realXFBSource->sourceRc.right = fbWidth; - m_realXFBSource->sourceRc.bottom = 0; + m_realXFBSource->sourceRc.bottom = fbHeight; // Decode YUYV data from GameCube RAM m_realXFBSource->DecodeToTexture(xfbAddr, fbWidth, fbHeight);