Add EFB encode-to-RAM support in DX11 backend. It could probably be simplified a lot, and not all the possible formats are implemented. I tried to use the dynamic-linking feature of shader model 5, but Microsoft's HLSL compiler is broken. "Dynamic mode" is implemented, but disabled for now.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7253 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Nolan Check
2011-02-26 23:41:02 +00:00
parent 8351177738
commit f0c5cc76a9
19 changed files with 1687 additions and 112 deletions

View File

@ -25,6 +25,10 @@
namespace DX11
{
HINSTANCE hD3DCompilerDll = NULL;
D3DREFLECT PD3DReflect = NULL;
int d3dcompiler_dll_ref = 0;
HINSTANCE hD3DXDll = NULL;
D3DX11COMPILEFROMMEMORYTYPE PD3DX11CompileFromMemory = NULL;
D3DX11FILTERTEXTURETYPE PD3DX11FilterTexture = NULL;
@ -113,7 +117,7 @@ HRESULT LoadD3DX()
// try to load D3DX11 first to check whether we have proper runtime support
// try to use the dll the backend was compiled against first - don't bother about debug runtimes
hD3DXDll = LoadLibraryA(StringFromFormat("d3dx11_%d.dll", D3DX11_SDK_VERSION).c_str());
hD3DXDll = LoadLibraryA(D3DX11_DLL_A);
if (!hD3DXDll)
{
// if that fails, use the dll which should be available in every SDK which officially supports DX11.
@ -144,6 +148,35 @@ HRESULT LoadD3DX()
return S_OK;
}
HRESULT LoadD3DCompiler()
{
if (d3dcompiler_dll_ref++ > 0) return S_OK;
if (hD3DCompilerDll) return S_OK;
// try to load D3DCompiler first to check whether we have proper runtime support
// try to use the dll the backend was compiled against first - don't bother about debug runtimes
hD3DCompilerDll = LoadLibraryA(D3DCOMPILER_DLL_A);
if (!hD3DCompilerDll)
{
// if that fails, use the dll which should be available in every SDK which officially supports DX11.
hD3DCompilerDll = LoadLibraryA("D3DCompiler_42.dll");
if (!hD3DCompilerDll)
{
MessageBoxA(NULL, "Failed to load D3DCompiler_42.dll, update your DX11 runtime, please", "Critical error", MB_OK | MB_ICONERROR);
return E_FAIL;
}
else
{
NOTICE_LOG(VIDEO, "Successfully loaded D3DCompiler_42.dll. If you're having trouble, try updating your DX runtime first.");
}
}
PD3DReflect = (D3DREFLECT)GetProcAddress(hD3DCompilerDll, "D3DReflect");
if (PD3DReflect == NULL) MessageBoxA(NULL, "GetProcAddress failed for D3DReflect!", "Critical error", MB_OK | MB_ICONERROR);
return S_OK;
}
void UnloadDXGI()
{
if (!dxgi_dll_ref) return;
@ -177,6 +210,16 @@ void UnloadD3D()
PD3D11CreateDeviceAndSwapChain = NULL;
}
void UnloadD3DCompiler()
{
if (!d3dcompiler_dll_ref) return;
if (--d3dcompiler_dll_ref != 0) return;
if (hD3DCompilerDll) FreeLibrary(hD3DCompilerDll);
hD3DCompilerDll = NULL;
PD3DReflect = NULL;
}
void EnumAAModes(IDXGIAdapter* adapter, std::vector<DXGI_SAMPLE_DESC>& aa_modes)
{
aa_modes.clear();
@ -232,10 +275,13 @@ HRESULT Create(HWND wnd)
hr = LoadDXGI();
if (SUCCEEDED(hr)) hr = LoadD3D();
if (SUCCEEDED(hr)) hr = LoadD3DX();
if (SUCCEEDED(hr)) hr = LoadD3DCompiler();
if (FAILED(hr))
{
UnloadDXGI();
UnloadD3D();
UnloadD3DX();
UnloadD3DCompiler();
return hr;
}

View File

@ -17,7 +17,8 @@
#pragma once
#include <d3dx11.h>
#include <D3DX11.h>
#include <D3Dcompiler.h>
#include "Common.h"
#include <vector>
@ -37,9 +38,11 @@ namespace D3D
HRESULT LoadDXGI();
HRESULT LoadD3D();
HRESULT LoadD3DX();
HRESULT LoadD3DCompiler();
void UnloadDXGI();
void UnloadD3D();
void UnloadD3DX();
void UnloadD3DCompiler();
void EnumAAModes(IDXGIAdapter* adapter, std::vector<DXGI_SAMPLE_DESC>& aa_modes);
DXGI_SAMPLE_DESC GetAAMode(int index);
@ -72,7 +75,7 @@ unsigned int GetMaxTextureSize();
inline void SetDebugObjectName(ID3D11DeviceChild* resource, const char* name)
{
#if defined(_DEBUG) || defined(DEBUGFAST)
resource->SetPrivateData( WKPDID_D3DDebugObjectName, strlen(name), name);
resource->SetPrivateData( WKPDID_D3DDebugObjectName, (UINT)strlen(name), name);
#endif
}
@ -105,4 +108,7 @@ extern CREATEDXGIFACTORY PCreateDXGIFactory;
typedef HRESULT (WINAPI* D3D11CREATEDEVICE)(IDXGIAdapter*, D3D_DRIVER_TYPE, HMODULE, UINT, CONST D3D_FEATURE_LEVEL*, UINT, UINT, ID3D11Device**, D3D_FEATURE_LEVEL*, ID3D11DeviceContext**);
extern D3D11CREATEDEVICE PD3D11CreateDevice;
typedef HRESULT (WINAPI *D3DREFLECT)(LPCVOID, SIZE_T, REFIID, void**);
extern D3DREFLECT PD3DReflect;
} // namespace DX11

View File

@ -54,8 +54,14 @@ bool CompileVertexShader(const char* code, unsigned int len, D3DBlob** blob)
#endif
HRESULT hr = PD3DX11CompileFromMemory(code, len, NULL, NULL, NULL, "main", D3D::VertexShaderVersionString(),
flags, 0, NULL, &shaderBuffer, &errorBuffer, NULL);
if (errorBuffer)
{
INFO_LOG(VIDEO, "Vertex shader compiler messages:\n%s\n",
(const char*)errorBuffer->GetBufferPointer());
}
if (FAILED(hr) || errorBuffer)
if (FAILED(hr))
{
if (g_ActiveConfig.bShowShaderErrors)
{
@ -90,7 +96,8 @@ ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, unsigned
}
// code->bytecode
bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob)
bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob,
const D3D_SHADER_MACRO* pDefines)
{
ID3D10Blob* shaderBuffer = NULL;
ID3D10Blob* errorBuffer = NULL;
@ -100,10 +107,16 @@ bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob)
#else
UINT flags = D3D10_SHADER_OPTIMIZATION_LEVEL3;
#endif
HRESULT hr = PD3DX11CompileFromMemory(code, len, NULL, NULL, NULL, "main", D3D::PixelShaderVersionString(),
HRESULT hr = PD3DX11CompileFromMemory(code, len, NULL, pDefines, NULL, "main", D3D::PixelShaderVersionString(),
flags, 0, NULL, &shaderBuffer, &errorBuffer, NULL);
if (errorBuffer)
{
INFO_LOG(VIDEO, "Pixel shader compiler messages:\n%s",
(const char*)errorBuffer->GetBufferPointer());
}
if (FAILED(hr) || errorBuffer)
if (FAILED(hr))
{
if (g_ActiveConfig.bShowShaderErrors)
{
@ -121,6 +134,7 @@ bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob)
*blob = new D3DBlob(shaderBuffer);
shaderBuffer->Release();
}
return SUCCEEDED(hr);
}

View File

@ -17,6 +17,7 @@
#pragma once
#include "D3DBase.h"
#include "D3DBlob.h"
struct ID3D11PixelShader;
@ -32,7 +33,7 @@ namespace D3D
// The returned bytecode buffers should be Release()d.
bool CompileVertexShader(const char* code, unsigned int len, D3DBlob** blob);
bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob);
bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob, const D3D_SHADER_MACRO* pDefines = NULL);
// Utility functions
ID3D11VertexShader* CompileAndCreateVertexShader(const char* code, unsigned int len);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,119 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _PSTEXTUREENCODER_H
#define _PSTEXTUREENCODER_H
#include "TextureEncoder.h"
struct ID3D11Texture2D;
struct ID3D11RenderTargetView;
struct ID3D11Buffer;
struct ID3D11InputLayout;
struct ID3D11VertexShader;
struct ID3D11PixelShader;
struct ID3D11ClassLinkage;
struct ID3D11ClassInstance;
struct ID3D11BlendState;
struct ID3D11DepthStencilState;
struct ID3D11RasterizerState;
struct ID3D11SamplerState;
namespace DX11
{
class PSTextureEncoder : public TextureEncoder
{
public:
PSTextureEncoder();
void Init();
void Shutdown();
size_t Encode(u8* dst, unsigned int dstFormat,
unsigned int srcFormat, const EFBRectangle& srcRect, bool isIntensity,
bool scaleByHalf);
private:
bool m_ready;
ID3D11Texture2D* m_out;
ID3D11RenderTargetView* m_outRTV;
ID3D11Texture2D* m_outStage;
ID3D11Buffer* m_encodeParams;
ID3D11Buffer* m_quad;
ID3D11VertexShader* m_vShader;
ID3D11InputLayout* m_quadLayout;
ID3D11BlendState* m_efbEncodeBlendState;
ID3D11DepthStencilState* m_efbEncodeDepthState;
ID3D11RasterizerState* m_efbEncodeRastState;
ID3D11SamplerState* m_efbSampler;
// Stuff only used in static-linking mode (SM4.0-compatible)
bool InitStaticMode();
bool SetStaticShader(unsigned int dstFormat, unsigned int srcFormat,
bool isIntensity, bool scaleByHalf);
typedef unsigned int ComboKey; // Key for a shader combination
ComboKey MakeComboKey(unsigned int dstFormat, unsigned int srcFormat,
bool isIntensity, bool scaleByHalf)
{
return (dstFormat << 4) | (srcFormat << 2) | (isIntensity ? (1<<1) : 0)
| (scaleByHalf ? (1<<0) : 0);
}
typedef std::map<ComboKey, ID3D11PixelShader*> ComboMap;
ComboMap m_staticShaders;
// Stuff only used for dynamic-linking mode (SM5.0+, available as soon as
// Microsoft fixes their bloody HLSL compiler)
bool InitDynamicMode();
bool SetDynamicShader(unsigned int dstFormat, unsigned int srcFormat,
bool isIntensity, bool scaleByHalf);
ID3D11PixelShader* m_dynamicShader;
ID3D11ClassLinkage* m_classLinkage;
// Interface slots
UINT m_fetchSlot;
UINT m_scaledFetchSlot;
UINT m_intensitySlot;
UINT m_generatorSlot;
// Class instances
// Fetch: 0 is RGB, 1 is RGBA, 2 is RGB565, 3 is Z
ID3D11ClassInstance* m_fetchClass[4];
// ScaledFetch: 0 is off, 1 is on
ID3D11ClassInstance* m_scaledFetchClass[2];
// Intensity: 0 is off, 1 is on
ID3D11ClassInstance* m_intensityClass[2];
// Generator: one for each dst format, 16 total
ID3D11ClassInstance* m_generatorClass[16];
std::vector<ID3D11ClassInstance*> m_linkageArray;
};
}
#endif

View File

@ -23,12 +23,17 @@
#include "PixelShaderCache.h"
#include "TextureCache.h"
#include "VertexShaderCache.h"
#include "TextureEncoder.h"
#include "PSTextureEncoder.h"
#include "HW/Memmap.h"
#include "VideoConfig.h"
namespace DX11
{
#define MAX_COPY_BUFFERS 25
ID3D11Buffer* efbcopycbuf[MAX_COPY_BUFFERS] = {};
static TextureEncoder* g_encoder = NULL;
const size_t MAX_COPY_BUFFERS = 25;
ID3D11Buffer* efbcopycbuf[MAX_COPY_BUFFERS] = { 0 };
TextureCache::TCacheEntry::~TCacheEntry()
{
@ -92,48 +97,70 @@ TextureCache::TCacheEntryBase* TextureCache::CreateTexture(unsigned int width,
return entry;
}
void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleByHalf,
unsigned int cbufid, const float colmat[], const EFBRectangle &source_rect,
bool bIsIntensityFmt, u32 copyfmt)
void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFormat,
unsigned int srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf, unsigned int cbufid,
const float *colmat)
{
g_renderer->ResetAPIState();
// stretch picture with increased internal resolution
const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, (float)virtualW, (float)virtualH);
D3D::context->RSSetViewports(1, &vp);
// set transformation
if (NULL == efbcopycbuf[cbufid])
if (!isDynamic || g_ActiveConfig.bCopyEFBToTexture)
{
const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
D3D11_SUBRESOURCE_DATA data;
data.pSysMem = colmat;
HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &efbcopycbuf[cbufid]);
CHECK(SUCCEEDED(hr), "Create efb copy constant buffer %d", cbufid);
D3D::SetDebugObjectName((ID3D11DeviceChild*)efbcopycbuf[cbufid], "a constant buffer used in TextureCache::CopyRenderTargetToTexture");
}
D3D::context->PSSetConstantBuffers(0, 1, &efbcopycbuf[cbufid]);
g_renderer->ResetAPIState();
const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect);
// TODO: try targetSource.asRECT();
const D3D11_RECT sourcerect = CD3D11_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom);
// stretch picture with increased internal resolution
const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, (float)virtualW, (float)virtualH);
D3D::context->RSSetViewports(1, &vp);
// Use linear filtering if (bScaleByHalf), use point filtering otherwise
if (bScaleByHalf)
D3D::SetLinearCopySampler();
else
D3D::SetPointCopySampler();
// set transformation
if (NULL == efbcopycbuf[cbufid])
{
const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
D3D11_SUBRESOURCE_DATA data;
data.pSysMem = colmat;
HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &efbcopycbuf[cbufid]);
CHECK(SUCCEEDED(hr), "Create efb copy constant buffer %d", cbufid);
D3D::SetDebugObjectName((ID3D11DeviceChild*)efbcopycbuf[cbufid], "a constant buffer used in TextureCache::CopyRenderTargetToTexture");
}
D3D::context->PSSetConstantBuffers(0, 1, &efbcopycbuf[cbufid]);
D3D::context->OMSetRenderTargets(1, &texture->GetRTV(), NULL);
const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(srcRect);
// TODO: try targetSource.asRECT();
const D3D11_RECT sourcerect = CD3D11_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom);
D3D::drawShadedTexQuad(
(bFromZBuffer) ? FramebufferManager::GetEFBDepthTexture()->GetSRV() : FramebufferManager::GetEFBColorTexture()->GetSRV(),
&sourcerect, Renderer::GetFullTargetWidth(), Renderer::GetFullTargetHeight(),
(bFromZBuffer) ? PixelShaderCache::GetDepthMatrixProgram(true) : PixelShaderCache::GetColorMatrixProgram(true),
VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout());
// Use linear filtering if (bScaleByHalf), use point filtering otherwise
if (scaleByHalf)
D3D::SetLinearCopySampler();
else
D3D::SetPointCopySampler();
D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV());
D3D::context->OMSetRenderTargets(1, &texture->GetRTV(), NULL);
// Create texture copy
D3D::drawShadedTexQuad(
(srcFormat == PIXELFMT_Z24) ? FramebufferManager::GetEFBDepthTexture()->GetSRV() : FramebufferManager::GetEFBColorTexture()->GetSRV(),
&sourcerect, Renderer::GetFullTargetWidth(), Renderer::GetFullTargetHeight(),
(srcFormat == PIXELFMT_Z24) ? PixelShaderCache::GetDepthMatrixProgram(true) : PixelShaderCache::GetColorMatrixProgram(true),
VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout());
D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV());
g_renderer->RestoreAPIState();
g_renderer->RestoreAPIState();
}
if (!g_ActiveConfig.bCopyEFBToTexture)
{
u8* dst = Memory::GetPointer(dstAddr);
size_t encodeSize = g_encoder->Encode(dst, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf);
hash = GetHash64(dst, encodeSize, g_ActiveConfig.iSafeTextureCache_ColorSamples);
if (g_ActiveConfig.bEFBCopyCacheEnable)
{
// If the texture in RAM is already in the texture cache,
// do not copy it again as it has not changed.
if (TextureCache::Find(dstAddr, hash))
return;
}
TextureCache::MakeRangeDynamic(dstAddr, encodeSize);
}
}
TextureCache::TCacheEntryBase* TextureCache::CreateRenderTargetTexture(
@ -146,12 +173,19 @@ TextureCache::TCacheEntryBase* TextureCache::CreateRenderTargetTexture(
TextureCache::TextureCache()
{
// FIXME: Is it safe here?
g_encoder = new PSTextureEncoder;
g_encoder->Init();
}
TextureCache::~TextureCache()
{
for (unsigned int k = 0; k < MAX_COPY_BUFFERS; ++k)
SAFE_RELEASE(efbcopycbuf[k]);
g_encoder->Shutdown();
delete g_encoder;
g_encoder = NULL;
}
}

View File

@ -43,9 +43,10 @@ private:
void Load(unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int levels, bool autogen_mips = false);
void FromRenderTarget(bool bFromZBuffer, bool bScaleByHalf,
unsigned int cbufid, const float* colmat, const EFBRectangle &source_rect,
bool bIsIntensityFmt, u32 copyfmt);
void FromRenderTarget(u32 dstAddr, unsigned int dstFormat,
unsigned int srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf, unsigned int cbufid,
const float *colmat);
void Bind(unsigned int stage);
bool Save(const char filename[]);

View File

@ -0,0 +1,90 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _TEXTUREENCODER_H
#define _TEXTUREENCODER_H
#include "VideoCommon.h"
namespace DX11
{
// 4-bit format: 8x8 texels / cache line
// 8-bit format: 8x4 texels / cache line
// 16-bit format: 4x4 texels / cache line
// 32-bit format: 4x4 texels / 2 cache lines
// Compressed format: 8x8 texels / cache line
const unsigned int BLOCK_WIDTHS[16] = {
8, // R4
8, // R8 (FIXME: duplicate of R8 below?)
8, // A4 R4
4, // A8 R8
4, // R5 G6 B5
4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4
4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines)
8, // A8
8, // R8 (FIXME: duplicate of R8 above?)
8, // G8
8, // B8
4, // G8 R8
4, // B8 G8
0, 0, 0 // Unknown formats
};
const unsigned int BLOCK_HEIGHTS[16] = {
8, // R4
4, // R8 (FIXME: duplicate of R8 below?)
4, // A4 R4
4, // A8 R8
4, // R5 G6 B5
4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4
4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines)
4, // A8
4, // R8 (FIXME: duplicate of R8 above?)
4, // G8
4, // B8
4, // G8 R8
4, // B8 G8
0, 0, 0 // Unknown formats
};
// Maximum number of bytes that can occur in a texture block-row generated by
// the encoder
static const UINT MAX_BYTES_PER_BLOCK_ROW = (EFB_WIDTH/4)*64;
// The maximum amount of data that the texture encoder can generate in one call
static const UINT MAX_BYTES_PER_ENCODE = MAX_BYTES_PER_BLOCK_ROW*(EFB_HEIGHT/4);
class TextureEncoder
{
public:
virtual ~TextureEncoder() { }
virtual void Init() = 0;
virtual void Shutdown() = 0;
// Returns size in bytes of encoded block of memory
virtual size_t Encode(u8* dst, unsigned int dstFormat,
unsigned int srcFormat, const EFBRectangle& srcRect, bool isIntensity,
bool scaleByHalf) = 0;
};
}
#endif

View File

@ -81,7 +81,7 @@ void InitBackendInfo()
{
g_Config.backend_info.APIType = API_D3D11;
g_Config.backend_info.bUseRGBATextures = true; // the GX formats barely match any D3D11 formats
g_Config.backend_info.bSupportsEFBToRAM = false;
g_Config.backend_info.bSupportsEFBToRAM = true;
g_Config.backend_info.bSupportsRealXFB = false;
g_Config.backend_info.bSupports3DVision = false;
g_Config.backend_info.bAllowSignedBytes = true;