From ef75f3005d5e09edaa739d298f644be1f28e3f19 Mon Sep 17 00:00:00 2001 From: magumagu Date: Sun, 25 Jan 2015 15:49:35 -0800 Subject: [PATCH] WIP. --- .../VideoBackends/D3D/PSTextureEncoder.cpp | 1286 +---------------- .../Core/VideoBackends/D3D/PSTextureEncoder.h | 44 +- .../VideoCommon/TextureConversionShader.cpp | 35 +- 3 files changed, 74 insertions(+), 1291 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index 0ca9ee2211..96f90bb870 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -6,875 +6,32 @@ #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DShader.h" #include "VideoBackends/D3D/D3DState.h" +#include "VideoBackends/D3D/D3DUtil.h" #include "VideoBackends/D3D/FramebufferManager.h" #include "VideoBackends/D3D/PSTextureEncoder.h" #include "VideoBackends/D3D/Render.h" #include "VideoBackends/D3D/TextureCache.h" +#include "VideoBackends/D3D/VertexShaderCache.h" -// "Static mode" will compile a new EFB encoder shader for every combination of -// encoding configurations. It's compatible with Shader Model 4. - -// "Dynamic mode" will use the dynamic-linking feature of Shader Model 5. Only -// one shader needs to be compiled. - -// Unfortunately, the June 2010 DirectX SDK includes a broken HLSL compiler -// which cripples dynamic linking for us. -// See . -// Dynamic mode is disabled for now. To enable it, uncomment the line below. - -//#define USE_DYNAMIC_MODE - -// FIXME: When Microsoft fixes their HLSL compiler, make Dolphin enable dynamic -// mode on Shader Model 5-compatible cards. +#include "VideoCommon/TextureConversionShader.h" namespace DX11 { -union EFBEncodeParams +struct EFBEncodeParams { - struct - { - FLOAT NumHalfCacheLinesX; - FLOAT NumBlocksY; - FLOAT PosX; - FLOAT PosY; - FLOAT TexLeft; - FLOAT TexTop; - FLOAT TexRight; - FLOAT TexBottom; - }; - // Constant buffers must be a multiple of 16 bytes in size. - u8 pad[32]; // Pad to the next multiple of 16 bytes + DWORD SrcTop; + DWORD SrcLeft; + DWORD DestWidth; + DWORD ScaleFactor; }; -static const char EFB_ENCODE_VS[] = -"// dolphin-emu EFB encoder vertex shader\n" - -"cbuffer cbParams : register(b0)\n" -"{\n" - "struct\n" // Should match EFBEncodeParams above - "{\n" - "float NumHalfCacheLinesX;\n" - "float NumBlocksY;\n" - "float PosX;\n" // Upper-left corner of source - "float PosY;\n" - "float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized) - "float TexTop;\n" - "float TexRight;\n" - "float TexBottom;\n" - "} Params;\n" -"}\n" - -"struct Output\n" -"{\n" - "float4 Pos : SV_Position;\n" - "float2 Coord : ENCODECOORD;\n" -"};\n" - -"Output main(in float2 Pos : POSITION)\n" -"{\n" - "Output result;\n" - "result.Pos = float4(2*Pos.x-1, -2*Pos.y+1, 0.0, 1.0);\n" - "result.Coord = Pos * float2(Params.NumHalfCacheLinesX, Params.NumBlocksY);\n" - "return result;\n" -"}\n" -; - -static const char EFB_ENCODE_PS[] = -"// dolphin-emu EFB encoder pixel shader\n" - -// Input - -"cbuffer cbParams : register(b0)\n" -"{\n" - "struct\n" // Should match EFBEncodeParams above - "{\n" - "float NumHalfCacheLinesX;\n" - "float NumBlocksY;\n" - "float PosX;\n" // Upper-left corner of source - "float PosY;\n" - "float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized) - "float TexTop;\n" - "float TexRight;\n" - "float TexBottom;\n" - "} Params;\n" -"}\n" - -"Texture2DArray EFBTexture : register(t0);\n" -"sampler EFBSampler : register(s0);\n" - -// Constants - -"static const float2 INV_EFB_DIMS = float2(1.0/640.0, 1.0/528.0);\n" - -// FIXME: Is this correct? -"static const float3 INTENSITY_COEFFS = float3(0.257, 0.504, 0.098);\n" -"static const float INTENSITY_ADD = 16.0/255.0;\n" - -// Utility functions - -"uint4 Swap4_32(uint4 v) {\n" - "return (((v >> 24) & 0xFF) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | ((v << 24) & 0xFF000000));\n" -"}\n" - -"uint4 UINT4_8888_BE(uint4 a, uint4 b, uint4 c, uint4 d) {\n" - "return (d << 24) | (c << 16) | (b << 8) | a;\n" -"}\n" - -"uint UINT_44444444_BE(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) {\n" - "return (g << 28) | (h << 24) | (e << 20) | (f << 16) | (c << 12) | (d << 8) | (a << 4) | b;\n" -"}\n" - -"uint UINT_1555(uint a, uint b, uint c, uint d) {\n" - "return (a << 15) | (b << 10) | (c << 5) | d;\n" -"}\n" - -"uint UINT_3444(uint a, uint b, uint c, uint d) {\n" - "return (a << 12) | (b << 8) | (c << 4) | d;\n" -"}\n" - -"uint UINT_565(uint a, uint b, uint c) {\n" - "return (a << 11) | (b << 5) | c;\n" -"}\n" - -"uint UINT_1616(uint a, uint b) {\n" - "return (a << 16) | b;\n" -"}\n" - -"uint Float8ToUint3(float v) {\n" - "return (uint)round(v*255.0) >> 5;\n" -"}\n" - -"uint Float8ToUint4(float v) {\n" - "return (uint)round(v*255.0) >> 4;\n" -"}\n" - -"uint Float8ToUint5(float v) {\n" - "return (uint)round(v*255.0) >> 3;\n" -"}\n" - -"uint Float8ToUint6(float v) {\n" - "return (uint)round(v*255.0) >> 2;\n" -"}\n" - -"uint EncodeRGB5A3(float4 pixel) {\n" - "if (pixel.a >= 224.0/255.0) {\n" - // Encode to ARGB1555 - "return UINT_1555(1, Float8ToUint5(pixel.r), Float8ToUint5(pixel.g), Float8ToUint5(pixel.b));\n" - "} else {\n" - // Encode to ARGB3444 - "return UINT_3444(Float8ToUint3(pixel.a), Float8ToUint4(pixel.r), Float8ToUint4(pixel.g), Float8ToUint4(pixel.b));\n" - "}\n" -"}\n" - -"uint EncodeRGB565(float4 pixel) {\n" - "return UINT_565(Float8ToUint5(pixel.r), Float8ToUint6(pixel.g), Float8ToUint5(pixel.b));\n" -"}\n" - -"float2 CalcTexCoord(float2 coord)\n" -"{\n" - // Add 0.5,0.5 to sample from the center of the EFB pixel - "float2 efbCoord = coord + float2(0.5,0.5);\n" - "return lerp(float2(Params.TexLeft,Params.TexTop), float2(Params.TexRight,Params.TexBottom), efbCoord * INV_EFB_DIMS);\n" -"}\n" - -// Interface and classes for different source formats - -"float4 Fetch_0(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - "float4 result = EFBTexture.Sample(EFBSampler, float3(texCoord.xy, 0.0));\n" - "result.a = 1.0;\n" - "return result;\n" -"}\n" - -"float4 Fetch_1(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - "return EFBTexture.Sample(EFBSampler, float3(texCoord.xy, 0.0));\n" -"}\n" - -"float4 Fetch_2(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - "float4 result = EFBTexture.Sample(EFBSampler, float3(texCoord.xy, 0.0));\n" - "result.a = 1.0;\n" - "return result;\n" -"}\n" - -"float4 Fetch_3(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - - "uint depth24 = 0xFFFFFF * EFBTexture.Sample(EFBSampler, float3(texCoord.xy, 0.0)).r;\n" - "uint4 bytes = uint4(\n" - "(depth24 >> 16) & 0xFF,\n" // r - "(depth24 >> 8) & 0xFF,\n" // g - "depth24 & 0xFF,\n" // b - "255);\n" // a - "return bytes / 255.0;\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iFetch\n" -"{\n" - "float4 Fetch(float2 coord);\n" -"};\n" - -// Source format 0 -"class cFetch_0 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_0(coord); }\n" -"};\n" - - -// Source format 1 -"class cFetch_1 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_1(coord); }\n" -"};\n" - -// Source format 2 -"class cFetch_2 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_2(coord); }\n" -"};\n" - -// Source format 3 -"class cFetch_3 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_3(coord); }\n" -"};\n" - -// Declare fetch interface; must be set by application -"iFetch g_fetch;\n" -"#define IMP_FETCH g_fetch.Fetch\n" - -"#endif\n" // #ifdef DYNAMIC_MODE - -"#ifndef IMP_FETCH\n" -"#error No Fetch specified\n" -"#endif\n" - -// Interface and classes for different intensity settings (on or off) - -"float4 Intensity_0(float4 sample)\n" -"{\n" - "return sample;\n" -"}\n" - -"float4 Intensity_1(float4 sample)\n" -"{\n" - "sample.r = dot(INTENSITY_COEFFS, sample.rgb) + INTENSITY_ADD;\n" - // FIXME: Is this correct? What happens if you use one of the non-R - // formats with intensity on? - "sample = sample.rrrr;\n" - "return sample;\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iIntensity\n" -"{\n" - "float4 Intensity(float4 sample);\n" -"};\n" - -// Intensity off -"class cIntensity_0 : iIntensity\n" -"{\n" - "float4 Intensity(float4 sample)\n" - "{ return Intensity_0(sample); }\n" -"};\n" - -// Intensity on -"class cIntensity_1 : iIntensity\n" -"{\n" - "float4 Intensity(float4 sample)\n" - "{ return Intensity_1(sample); }\n" -"};\n" - -// Declare intensity interface; must be set by application -"iIntensity g_intensity;\n" -"#define IMP_INTENSITY g_intensity.Intensity\n" - -"#endif\n" // #ifdef DYNAMIC_MODE - -"#ifndef IMP_INTENSITY\n" -"#error No Intensity specified\n" -"#endif\n" - - -// Interface and classes for different scale/filter settings (on or off) - -"float4 ScaledFetch_0(float2 coord)\n" -"{\n" - "return IMP_FETCH(float2(Params.PosX,Params.PosY) + coord);\n" -"}\n" - -"float4 ScaledFetch_1(float2 coord)\n" -"{\n" - "float2 ul = float2(Params.PosX,Params.PosY) + 2*coord;\n" - "float4 sample0 = IMP_FETCH(ul+float2(0,0));\n" - "float4 sample1 = IMP_FETCH(ul+float2(1,0));\n" - "float4 sample2 = IMP_FETCH(ul+float2(0,1));\n" - "float4 sample3 = IMP_FETCH(ul+float2(1,1));\n" - // Average all four samples together - // FIXME: Is this correct? - "return 0.25 * (sample0+sample1+sample2+sample3);\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iScaledFetch\n" -"{\n" - "float4 ScaledFetch(float2 coord);\n" -"};\n" - -// Scale off -"class cScaledFetch_0 : iScaledFetch\n" -"{\n" - "float4 ScaledFetch(float2 coord)\n" - "{ return ScaledFetch_0(coord); }\n" -"};\n" - -// Scale on -"class cScaledFetch_1 : iScaledFetch\n" -"{\n" - "float4 ScaledFetch(float2 coord)\n" - "{ return ScaledFetch_1(coord); }\n" -"};\n" - -// Declare scaled fetch interface; must be set by application code -"iScaledFetch g_scaledFetch;\n" -"#define IMP_SCALEDFETCH g_scaledFetch.ScaledFetch\n" - -"#endif\n" // #ifdef DYNAMIC_MODE - -"#ifndef IMP_SCALEDFETCH\n" -"#error No ScaledFetch specified\n" -"#endif\n" - -// Main EFB-sampling function: performs all steps of fetching pixels, scaling, -// applying intensity function - -"float4 SampleEFB(float2 coord)\n" -"{\n" - // FIXME: Does intensity happen before or after scaling? Or does - // it matter? - "float4 sample = IMP_SCALEDFETCH(coord);\n" - "return IMP_INTENSITY(sample);\n" -"}\n" - -// Interfaces and classes for different destination formats - -"uint4 Generate_0(float2 cacheCoord)\n" // R4 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,8);\n" - "float2 subBlockUL = blockUL + float2(0, 4*(cacheCoord.x%2));\n" - - "float4 sample[32];\n" - "for (uint y = 0; y < 4; ++y) {\n" - "for (uint x = 0; x < 8; ++x) {\n" - "sample[y*8+x] = SampleEFB(subBlockUL+float2(x,y));\n" - "}\n" - "}\n" - - "uint dw[4];\n" - "for (uint i = 0; i < 4; ++i) {\n" - "dw[i] = UINT_44444444_BE(\n" - "Float8ToUint4(sample[8*i+0].r),\n" - "Float8ToUint4(sample[8*i+1].r),\n" - "Float8ToUint4(sample[8*i+2].r),\n" - "Float8ToUint4(sample[8*i+3].r),\n" - "Float8ToUint4(sample[8*i+4].r),\n" - "Float8ToUint4(sample[8*i+5].r),\n" - "Float8ToUint4(sample[8*i+6].r),\n" - "Float8ToUint4(sample[8*i+7].r)\n" - ");\n" - "}\n" - - "return uint4(dw[0], dw[1], dw[2], dw[3]);\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_1(float2 cacheCoord)\n" // R8 (FIXME: Duplicate of R8 below?) -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.r, sample4.r, sample8.r, sampleC.r)),\n" - "round(255*float4(sample1.r, sample5.r, sample9.r, sampleD.r)),\n" - "round(255*float4(sample2.r, sample6.r, sampleA.r, sampleE.r)),\n" - "round(255*float4(sample3.r, sample7.r, sampleB.r, sampleF.r))\n" - ");\n" - - "return dw4;\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_2(float2 cacheCoord)\n" // A4 R4 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint dw0 = UINT_44444444_BE(\n" - "Float8ToUint4(sample0.a), Float8ToUint4(sample0.r),\n" - "Float8ToUint4(sample1.a), Float8ToUint4(sample1.r),\n" - "Float8ToUint4(sample2.a), Float8ToUint4(sample2.r),\n" - "Float8ToUint4(sample3.a), Float8ToUint4(sample3.r)\n" - ");\n" - "uint dw1 = UINT_44444444_BE(\n" - "Float8ToUint4(sample4.a), Float8ToUint4(sample4.r),\n" - "Float8ToUint4(sample5.a), Float8ToUint4(sample5.r),\n" - "Float8ToUint4(sample6.a), Float8ToUint4(sample6.r),\n" - "Float8ToUint4(sample7.a), Float8ToUint4(sample7.r)\n" - ");\n" - "uint dw2 = UINT_44444444_BE(\n" - "Float8ToUint4(sample8.a), Float8ToUint4(sample8.r),\n" - "Float8ToUint4(sample9.a), Float8ToUint4(sample9.r),\n" - "Float8ToUint4(sampleA.a), Float8ToUint4(sampleA.r),\n" - "Float8ToUint4(sampleB.a), Float8ToUint4(sampleB.r)\n" - ");\n" - "uint dw3 = UINT_44444444_BE(\n" - "Float8ToUint4(sampleC.a), Float8ToUint4(sampleC.r),\n" - "Float8ToUint4(sampleD.a), Float8ToUint4(sampleD.r),\n" - "Float8ToUint4(sampleE.a), Float8ToUint4(sampleE.r),\n" - "Float8ToUint4(sampleF.a), Float8ToUint4(sampleF.r)\n" - ");\n" - - "return uint4(dw0, dw1, dw2, dw3);\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_3(float2 cacheCoord)\n" // A8 R8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.a, sample2.a, sample4.a, sample6.a)),\n" - "round(255*float4(sample0.r, sample2.r, sample4.r, sample6.r)),\n" - "round(255*float4(sample1.a, sample3.a, sample5.a, sample7.a)),\n" - "round(255*float4(sample1.r, sample3.r, sample5.r, sample7.r))\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_4(float2 cacheCoord)\n" // R5 G6 B5 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint dw0 = UINT_1616(EncodeRGB565(sample0), EncodeRGB565(sample1));\n" - "uint dw1 = UINT_1616(EncodeRGB565(sample2), EncodeRGB565(sample3));\n" - "uint dw2 = UINT_1616(EncodeRGB565(sample4), EncodeRGB565(sample5));\n" - "uint dw3 = UINT_1616(EncodeRGB565(sample6), EncodeRGB565(sample7));\n" - - "return Swap4_32(uint4(dw0, dw1, dw2, dw3));\n" -"}\n" - -"uint4 Generate_5(float2 cacheCoord)\n" // 1 R5 G5 B5 or 0 A3 R4 G4 G4 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint dw0 = UINT_1616(EncodeRGB5A3(sample0), EncodeRGB5A3(sample1));\n" - "uint dw1 = UINT_1616(EncodeRGB5A3(sample2), EncodeRGB5A3(sample3));\n" - "uint dw2 = UINT_1616(EncodeRGB5A3(sample4), EncodeRGB5A3(sample5));\n" - "uint dw3 = UINT_1616(EncodeRGB5A3(sample6), EncodeRGB5A3(sample7));\n" - - "return Swap4_32(uint4(dw0, dw1, dw2, dw3));\n" -"}\n" - -"uint4 Generate_6(float2 cacheCoord)\n" // A8 R8 A8 R8 | G8 B8 G8 B8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(4,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4;\n" - "if (cacheCoord.x % 4 < 2)\n" - "{\n" - // First cache line gets AR - "dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.a, sample2.a, sample4.a, sample6.a)),\n" - "round(255*float4(sample0.r, sample2.r, sample4.r, sample6.r)),\n" - "round(255*float4(sample1.a, sample3.a, sample5.a, sample7.a)),\n" - "round(255*float4(sample1.r, sample3.r, sample5.r, sample7.r))\n" - ");\n" - "}\n" - "else\n" - "{\n" - // Second cache line gets GB - "dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.g, sample2.g, sample4.g, sample6.g)),\n" - "round(255*float4(sample0.b, sample2.b, sample4.b, sample6.b)),\n" - "round(255*float4(sample1.g, sample3.g, sample5.g, sample7.g)),\n" - "round(255*float4(sample1.b, sample3.b, sample5.b, sample7.b))\n" - ");\n" - "}\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_7(float2 cacheCoord)\n" // A8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.a, sample4.a, sample8.a, sampleC.a)),\n" - "round(255*float4(sample1.a, sample5.a, sample9.a, sampleD.a)),\n" - "round(255*float4(sample2.a, sample6.a, sampleA.a, sampleE.a)),\n" - "round(255*float4(sample3.a, sample7.a, sampleB.a, sampleF.a))\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_8(float2 cacheCoord)\n" // R8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.r, sample4.r, sample8.r, sampleC.r)),\n" - "round(255*float4(sample1.r, sample5.r, sample9.r, sampleD.r)),\n" - "round(255*float4(sample2.r, sample6.r, sampleA.r, sampleE.r)),\n" - "round(255*float4(sample3.r, sample7.r, sampleB.r, sampleF.r))\n" - ");\n" - - "return dw4;\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_9(float2 cacheCoord)\n" // G8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.g, sample4.g, sample8.g, sampleC.g)),\n" - "round(255*float4(sample1.g, sample5.g, sample9.g, sampleD.g)),\n" - "round(255*float4(sample2.g, sample6.g, sampleA.g, sampleE.g)),\n" - "round(255*float4(sample3.g, sample7.g, sampleB.g, sampleF.g))\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_A(float2 cacheCoord)\n" // B8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.b, sample4.b, sample8.b, sampleC.b)),\n" - "round(255*float4(sample1.b, sample5.b, sample9.b, sampleD.b)),\n" - "round(255*float4(sample2.b, sample6.b, sampleA.b, sampleE.b)),\n" - "round(255*float4(sample3.b, sample7.b, sampleB.b, sampleF.b))\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_B(float2 cacheCoord)\n" // G8 R8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.g, sample2.g, sample4.g, sample6.g)),\n" - "round(255*float4(sample0.r, sample2.r, sample4.r, sample6.r)),\n" - "round(255*float4(sample1.g, sample3.g, sample5.g, sample7.g)),\n" - "round(255*float4(sample1.r, sample3.r, sample5.r, sample7.r))\n" - ");\n" - - "return dw4;\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_C(float2 cacheCoord)\n" // B8 G8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.b, sample2.b, sample4.b, sample6.b)),\n" - "round(255*float4(sample0.g, sample2.g, sample4.g, sample6.g)),\n" - "round(255*float4(sample1.b, sample3.b, sample5.b, sample7.b)),\n" - "round(255*float4(sample1.g, sample3.g, sample5.g, sample7.g))\n" - ");\n" - - "return dw4;\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord);\n" -"};\n" - -"class cGenerator_4 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_4(cacheCoord); }\n" -"};\n" - -"class cGenerator_5 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_5(cacheCoord); }\n" -"};\n" - -"class cGenerator_6 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_6(cacheCoord); }\n" -"};\n" - -"class cGenerator_8 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_8(cacheCoord); }\n" -"};\n" - -"class cGenerator_B : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_B(cacheCoord); }\n" -"};\n" - -// Declare generator interface; must be set by application -"iGenerator g_generator;\n" -"#define IMP_GENERATOR g_generator.Generate\n" - -"#endif\n" - -"#ifndef IMP_GENERATOR\n" -"#error No generator specified\n" -"#endif\n" - -"void main(out uint4 ocol0 : SV_Target, in float4 Pos : SV_Position, in float2 fCacheCoord : ENCODECOORD)\n" -"{\n" - "float2 cacheCoord = floor(fCacheCoord);\n" - "ocol0 = IMP_GENERATOR(cacheCoord);\n" -"}\n" -; - PSTextureEncoder::PSTextureEncoder() : m_ready(false), m_out(nullptr), m_outRTV(nullptr), m_outStage(nullptr), - m_encodeParams(nullptr), - m_quad(nullptr), m_vShader(nullptr), m_quadLayout(nullptr), - m_efbEncodeBlendState(nullptr), m_efbEncodeDepthState(nullptr), - m_efbEncodeRastState(nullptr), m_efbSampler(nullptr), - m_dynamicShader(nullptr), m_classLinkage(nullptr) + m_encodeParams(nullptr) { - for (size_t i = 0; i < 4; ++i) - m_fetchClass[i] = nullptr; - for (size_t i = 0; i < 2; ++i) - m_scaledFetchClass[i] = nullptr; - for (size_t i = 0; i < 2; ++i) - m_intensityClass[i] = nullptr; - for (size_t i = 0; i < 16; ++i) - m_generatorClass[i] = nullptr; } -static const D3D11_INPUT_ELEMENT_DESC QUAD_LAYOUT_DESC[] = { - { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 } -}; - -static const struct QuadVertex -{ - float posX; - float posY; -} QUAD_VERTS[4] = { { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } }; - void PSTextureEncoder::Init() { m_ready = false; @@ -882,25 +39,21 @@ void PSTextureEncoder::Init() HRESULT hr; // Create output texture RGBA format - - // This format allows us to generate one cache line in two pixels. D3D11_TEXTURE2D_DESC t2dd = CD3D11_TEXTURE2D_DESC( - DXGI_FORMAT_R32G32B32A32_UINT, - EFB_WIDTH, EFB_HEIGHT/4, 1, 1, D3D11_BIND_RENDER_TARGET); + DXGI_FORMAT_R8G8B8A8_UINT, + EFB_WIDTH, EFB_HEIGHT, 1, 1, D3D11_BIND_RENDER_TARGET); hr = D3D::device->CreateTexture2D(&t2dd, nullptr, &m_out); CHECK(SUCCEEDED(hr), "create efb encode output texture"); D3D::SetDebugObjectName(m_out, "efb encoder output texture"); // Create output render target view - D3D11_RENDER_TARGET_VIEW_DESC rtvd = CD3D11_RENDER_TARGET_VIEW_DESC(m_out, - D3D11_RTV_DIMENSION_TEXTURE2D, DXGI_FORMAT_R32G32B32A32_UINT); + D3D11_RTV_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UINT); hr = D3D::device->CreateRenderTargetView(m_out, &rtvd, &m_outRTV); CHECK(SUCCEEDED(hr), "create efb encode output render target view"); D3D::SetDebugObjectName(m_outRTV, "efb encoder output rtv"); // Create output staging buffer - t2dd.Usage = D3D11_USAGE_STAGING; t2dd.BindFlags = 0; t2dd.CPUAccessFlags = D3D11_CPU_ACCESS_READ; @@ -909,87 +62,12 @@ void PSTextureEncoder::Init() D3D::SetDebugObjectName(m_outStage, "efb encoder output staging buffer"); // Create constant buffer for uploading data to shaders - D3D11_BUFFER_DESC bd = CD3D11_BUFFER_DESC(sizeof(EFBEncodeParams), D3D11_BIND_CONSTANT_BUFFER); hr = D3D::device->CreateBuffer(&bd, nullptr, &m_encodeParams); CHECK(SUCCEEDED(hr), "create efb encode params buffer"); D3D::SetDebugObjectName(m_encodeParams, "efb encoder params buffer"); - // Create vertex quad - - bd = CD3D11_BUFFER_DESC(sizeof(QUAD_VERTS), D3D11_BIND_VERTEX_BUFFER, - D3D11_USAGE_IMMUTABLE); - D3D11_SUBRESOURCE_DATA srd = { QUAD_VERTS, 0, 0 }; - - hr = D3D::device->CreateBuffer(&bd, &srd, &m_quad); - CHECK(SUCCEEDED(hr), "create efb encode quad vertex buffer"); - D3D::SetDebugObjectName(m_quad, "efb encoder quad vertex buffer"); - - // Create vertex shader - - D3DBlob* bytecode = nullptr; - if (!D3D::CompileVertexShader(EFB_ENCODE_VS, &bytecode)) - { - ERROR_LOG(VIDEO, "EFB encode vertex shader failed to compile"); - return; - } - - hr = D3D::device->CreateVertexShader(bytecode->Data(), bytecode->Size(), nullptr, &m_vShader); - CHECK(SUCCEEDED(hr), "create efb encode vertex shader"); - D3D::SetDebugObjectName(m_vShader, "efb encoder vertex shader"); - - // Create input layout for vertex quad using bytecode from vertex shader - - hr = D3D::device->CreateInputLayout(QUAD_LAYOUT_DESC, - sizeof(QUAD_LAYOUT_DESC)/sizeof(D3D11_INPUT_ELEMENT_DESC), - bytecode->Data(), bytecode->Size(), &m_quadLayout); - CHECK(SUCCEEDED(hr), "create efb encode quad vertex layout"); - D3D::SetDebugObjectName(m_quadLayout, "efb encoder quad layout"); - - bytecode->Release(); - - // Create pixel shader - -#ifdef USE_DYNAMIC_MODE - if (!InitDynamicMode()) -#else - if (!InitStaticMode()) -#endif - return; - - // Create blend state - - D3D11_BLEND_DESC bld = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); - hr = D3D::device->CreateBlendState(&bld, &m_efbEncodeBlendState); - CHECK(SUCCEEDED(hr), "create efb encode blend state"); - D3D::SetDebugObjectName(m_efbEncodeBlendState, "efb encoder blend state"); - - // Create depth state - - D3D11_DEPTH_STENCIL_DESC dsd = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT()); - dsd.DepthEnable = FALSE; - hr = D3D::device->CreateDepthStencilState(&dsd, &m_efbEncodeDepthState); - CHECK(SUCCEEDED(hr), "create efb encode depth state"); - D3D::SetDebugObjectName(m_efbEncodeDepthState, "efb encoder depth state"); - - // Create rasterizer state - - D3D11_RASTERIZER_DESC rd = CD3D11_RASTERIZER_DESC(CD3D11_DEFAULT()); - rd.CullMode = D3D11_CULL_NONE; - rd.DepthClipEnable = FALSE; - hr = D3D::device->CreateRasterizerState(&rd, &m_efbEncodeRastState); - CHECK(SUCCEEDED(hr), "create efb encode rast state"); - D3D::SetDebugObjectName(m_efbEncodeRastState, "efb encoder rast state"); - - // Create efb texture sampler - - D3D11_SAMPLER_DESC sd = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT()); - sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - hr = D3D::device->CreateSamplerState(&sd, &m_efbSampler); - CHECK(SUCCEEDED(hr), "create efb encode texture sampler"); - D3D::SetDebugObjectName(m_efbSampler, "efb encoder texture sampler"); - m_ready = true; } @@ -997,32 +75,12 @@ void PSTextureEncoder::Shutdown() { m_ready = false; - for (size_t i = 0; i < 4; ++i) - SAFE_RELEASE(m_fetchClass[i]); - for (size_t i = 0; i < 2; ++i) - SAFE_RELEASE(m_scaledFetchClass[i]); - for (size_t i = 0; i < 2; ++i) - SAFE_RELEASE(m_intensityClass[i]); - for (size_t i = 0; i < 16; ++i) - SAFE_RELEASE(m_generatorClass[i]); - m_linkageArray.clear(); - - SAFE_RELEASE(m_classLinkage); - SAFE_RELEASE(m_dynamicShader); - for (auto& it : m_staticShaders) { SAFE_RELEASE(it.second); } m_staticShaders.clear(); - SAFE_RELEASE(m_efbSampler); - SAFE_RELEASE(m_efbEncodeRastState); - SAFE_RELEASE(m_efbEncodeDepthState); - SAFE_RELEASE(m_efbEncodeBlendState); - SAFE_RELEASE(m_quadLayout); - SAFE_RELEASE(m_vShader); - SAFE_RELEASE(m_quad); SAFE_RELEASE(m_encodeParams); SAFE_RELEASE(m_outStage); SAFE_RELEASE(m_outRTV); @@ -1072,33 +130,14 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, size_t encodeSize = 0; // Reset API - g_renderer->ResetAPIState(); // Set up all the state for EFB encoding -#ifdef USE_DYNAMIC_MODE - if (SetDynamicShader(dstFormat, srcFormat, isIntensity, scaleByHalf)) -#else - if (SetStaticShader(dstFormat, srcFormat, isIntensity, scaleByHalf)) -#endif { - D3D::stateman->SetVertexShader(m_vShader); - D3D::stateman->SetGeometryShader(nullptr); - - D3D::stateman->PushBlendState(m_efbEncodeBlendState); - D3D::stateman->PushDepthState(m_efbEncodeDepthState); - D3D::stateman->PushRasterizerState(m_efbEncodeRastState); - - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow*2), FLOAT(numBlocksY)); + D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow*2*4), FLOAT(numBlocksY*4)); D3D::context->RSSetViewports(1, &vp); - D3D::stateman->SetInputLayout(m_quadLayout); - D3D::stateman->SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - UINT stride = sizeof(QuadVertex); - UINT offset = 0; - D3D::stateman->SetVertexBuffer(m_quad, stride, offset); - EFBRectangle fullSrcRect; fullSrcRect.left = 0; fullSrcRect.top = 0; @@ -1106,17 +145,6 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, fullSrcRect.bottom = EFB_HEIGHT; TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect); - EFBEncodeParams params = { 0 }; - params.NumHalfCacheLinesX = FLOAT(cacheLinesPerRow*2); - params.NumBlocksY = FLOAT(numBlocksY); - params.PosX = FLOAT(correctSrc.left); - params.PosY = FLOAT(correctSrc.top); - params.TexLeft = float(targetRect.left) / g_renderer->GetTargetWidth(); - params.TexTop = float(targetRect.top) / g_renderer->GetTargetHeight(); - params.TexRight = float(targetRect.right) / g_renderer->GetTargetWidth(); - params.TexBottom = float(targetRect.bottom) / g_renderer->GetTargetHeight(); - D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, ¶ms, 0, 0); - D3D::context->OMSetRenderTargets(1, &m_outRTV, nullptr); ID3D11ShaderResourceView* pEFB = (srcFormat == PEControl::Z24) ? @@ -1126,41 +154,27 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, // expecting the blurred edges around multisampled shapes. FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); - D3D::stateman->SetVertexConstants(m_encodeParams); - D3D::stateman->SetPixelConstants(m_encodeParams); - D3D::stateman->SetTexture(0, pEFB); - D3D::stateman->SetSampler(0, m_efbSampler); + EFBEncodeParams params; + params.SrcLeft = correctSrc.left; + params.SrcLeft = correctSrc.top; + params.DestWidth = actualWidth; + params.ScaleFactor = scaleByHalf ? 2 : 1; + D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, ¶ms, 0, 0); + D3D::stateman->SetPixelConstants(0, m_encodeParams); - // Encode! - - D3D::stateman->Apply(); - D3D::context->Draw(4, 0); + D3D::drawShadedTexQuad(pEFB, + targetRect.AsRECT(), + Renderer::GetTargetWidth(), + Renderer::GetTargetHeight(), + SetStaticShader(dstFormat, srcFormat, isIntensity, scaleByHalf), + VertexShaderCache::GetSimpleVertexShader(), + VertexShaderCache::GetSimpleInputLayout()); // Copy to staging buffer - - D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow*2, numBlocksY, 1); + D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow*2*4, numBlocksY*4, 1); D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox); - // Clean up state - - D3D::context->OMSetRenderTargets(0, nullptr, nullptr); - - D3D::stateman->SetSampler(0, nullptr); - D3D::stateman->SetTexture(0, nullptr); - D3D::stateman->SetPixelConstants(nullptr); - D3D::stateman->SetVertexConstants(nullptr); - - D3D::stateman->SetPixelShader(nullptr); - D3D::stateman->SetVertexBuffer(nullptr, 0, 0); - - D3D::stateman->PopRasterizerState(); - D3D::stateman->PopDepthState(); - D3D::stateman->PopBlendState(); - - D3D::stateman->Apply(); - // Transfer staging buffer to GameCube/Wii RAM - D3D11_MAPPED_SUBRESOURCE map = { 0 }; hr = D3D::context->Map(m_outStage, 0, D3D11_MAP_READ, 0, &map); CHECK(SUCCEEDED(hr), "map staging buffer (0x%x)", hr); @@ -1179,7 +193,6 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, } // Restore API - g_renderer->RestoreAPIState(); D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), @@ -1188,25 +201,7 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, return encodeSize; } -bool PSTextureEncoder::InitStaticMode() -{ - // Nothing to really do. - return true; -} - -static const char* FETCH_FUNC_NAMES[4] = { - "Fetch_0", "Fetch_1", "Fetch_2", "Fetch_3" -}; - -static const char* SCALEDFETCH_FUNC_NAMES[2] = { - "ScaledFetch_0", "ScaledFetch_1" -}; - -static const char* INTENSITY_FUNC_NAMES[2] = { - "Intensity_0", "Intensity_1" -}; - -bool PSTextureEncoder::SetStaticShader(unsigned int dstFormat, PEControl::PixelFormat srcFormat, +ID3D11PixelShader* PSTextureEncoder::SetStaticShader(unsigned int dstFormat, PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf) { size_t fetchNum = static_cast(srcFormat); @@ -1219,48 +214,33 @@ bool PSTextureEncoder::SetStaticShader(unsigned int dstFormat, PEControl::PixelF ComboMap::iterator it = m_staticShaders.find(key); if (it == m_staticShaders.end()) { - const char* generatorFuncName = nullptr; - switch (generatorNum) - { - case 0x0: generatorFuncName = "Generate_0"; break; - case 0x1: generatorFuncName = "Generate_1"; break; - case 0x2: generatorFuncName = "Generate_2"; break; - case 0x3: generatorFuncName = "Generate_3"; break; - case 0x4: generatorFuncName = "Generate_4"; break; - case 0x5: generatorFuncName = "Generate_5"; break; - case 0x6: generatorFuncName = "Generate_6"; break; - case 0x7: generatorFuncName = "Generate_7"; break; - case 0x8: generatorFuncName = "Generate_8"; break; - case 0x9: generatorFuncName = "Generate_9"; break; - case 0xA: generatorFuncName = "Generate_A"; break; - case 0xB: generatorFuncName = "Generate_B"; break; - case 0xC: generatorFuncName = "Generate_C"; break; - default: - WARN_LOG(VIDEO, "No generator available for dst format 0x%X; aborting", generatorNum); - m_staticShaders[key] = nullptr; - return false; - } - INFO_LOG(VIDEO, "Compiling efb encoding shader for dstFormat 0x%X, srcFormat %d, isIntensity %d, scaleByHalf %d", dstFormat, static_cast(srcFormat), isIntensity ? 1 : 0, scaleByHalf ? 1 : 0); - // Shader permutation not found, so compile it + u32 format = dstFormat; + + if (srcFormat == PEControl::Z24) + { + format |= _GX_TF_ZTF; + if (dstFormat == 11) + format = GX_TF_Z16; + else if (format < GX_TF_Z8 || format > GX_TF_Z24X8) + format |= _GX_TF_CTF; + } + else + { + if (dstFormat > GX_TF_RGBA8 || (dstFormat < GX_TF_RGB565 && !isIntensity)) + format |= _GX_TF_CTF; + } + D3DBlob* bytecode = nullptr; - D3D_SHADER_MACRO macros[] = { - { "IMP_FETCH", FETCH_FUNC_NAMES[fetchNum] }, - { "IMP_SCALEDFETCH", SCALEDFETCH_FUNC_NAMES[scaledFetchNum] }, - { "IMP_INTENSITY", INTENSITY_FUNC_NAMES[intensityNum] }, - { "IMP_GENERATOR", generatorFuncName }, - { nullptr, nullptr } - }; - if (!D3D::CompilePixelShader(EFB_ENCODE_PS, &bytecode, macros)) + const char* shader = TextureConversionShader::GenerateEncodingShader(format, API_D3D); + if (!D3D::CompilePixelShader(shader, &bytecode)) { WARN_LOG(VIDEO, "EFB encoder shader for dstFormat 0x%X, srcFormat %d, isIntensity %d, scaleByHalf %d failed to compile", dstFormat, static_cast(srcFormat), isIntensity ? 1 : 0, scaleByHalf ? 1 : 0); - // Add dummy shader to map to prevent trying to compile over and - // over again m_staticShaders[key] = nullptr; - return false; + return nullptr; } ID3D11PixelShader* newShader; @@ -1271,175 +251,7 @@ bool PSTextureEncoder::SetStaticShader(unsigned int dstFormat, PEControl::PixelF bytecode->Release(); } - if (it != m_staticShaders.end()) - { - if (it->second) - { - D3D::stateman->SetPixelShader(it->second); - return true; - } - else - return false; - } - else - return false; -} - -bool PSTextureEncoder::InitDynamicMode() -{ - HRESULT hr; - - D3D_SHADER_MACRO macros[] = { - { "DYNAMIC_MODE", nullptr }, - { nullptr, nullptr } - }; - - D3DBlob* bytecode = nullptr; - if (!D3D::CompilePixelShader(EFB_ENCODE_PS, &bytecode, macros)) - { - ERROR_LOG(VIDEO, "EFB encode pixel shader failed to compile"); - return false; - } - - hr = D3D::device->CreateClassLinkage(&m_classLinkage); - CHECK(SUCCEEDED(hr), "create efb encode class linkage"); - D3D::SetDebugObjectName(m_classLinkage, "efb encoder class linkage"); - - hr = D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), m_classLinkage, &m_dynamicShader); - CHECK(SUCCEEDED(hr), "create efb encode pixel shader"); - D3D::SetDebugObjectName(m_dynamicShader, "efb encoder pixel shader"); - - // Use D3DReflect - - ID3D11ShaderReflection* reflect = nullptr; - hr = PD3DReflect(bytecode->Data(), bytecode->Size(), IID_ID3D11ShaderReflection, (void**)&reflect); - CHECK(SUCCEEDED(hr), "reflect on efb encoder shader"); - - // Get number of slots and create dynamic linkage array - - UINT numSlots = reflect->GetNumInterfaceSlots(); - m_linkageArray.resize(numSlots, nullptr); - - // Get interface slots - - ID3D11ShaderReflectionVariable* var = reflect->GetVariableByName("g_fetch"); - m_fetchSlot = var->GetInterfaceSlot(0); - - var = reflect->GetVariableByName("g_scaledFetch"); - m_scaledFetchSlot = var->GetInterfaceSlot(0); - - var = reflect->GetVariableByName("g_intensity"); - m_intensitySlot = var->GetInterfaceSlot(0); - - var = reflect->GetVariableByName("g_generator"); - m_generatorSlot = var->GetInterfaceSlot(0); - - INFO_LOG(VIDEO, "Fetch slot %d, scaledFetch slot %d, intensity slot %d, generator slot %d", - m_fetchSlot, m_scaledFetchSlot, m_intensitySlot, m_generatorSlot); - - // Class instances will be created at the time they are used - - for (size_t i = 0; i < 4; ++i) - m_fetchClass[i] = nullptr; - for (size_t i = 0; i < 2; ++i) - m_scaledFetchClass[i] = nullptr; - for (size_t i = 0; i < 2; ++i) - m_intensityClass[i] = nullptr; - for (size_t i = 0; i < 16; ++i) - m_generatorClass[i] = nullptr; - - reflect->Release(); - bytecode->Release(); - - return true; -} - -static const char* FETCH_CLASS_NAMES[4] = { - "cFetch_0", "cFetch_1", "cFetch_2", "cFetch_3" -}; - -static const char* SCALEDFETCH_CLASS_NAMES[2] = { - "cScaledFetch_0", "cScaledFetch_1" -}; - -static const char* INTENSITY_CLASS_NAMES[2] = { - "cIntensity_0", "cIntensity_1" -}; - -bool PSTextureEncoder::SetDynamicShader(unsigned int dstFormat, - PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf) -{ - size_t fetchNum = static_cast(srcFormat); - size_t scaledFetchNum = scaleByHalf ? 1 : 0; - size_t intensityNum = isIntensity ? 1 : 0; - size_t generatorNum = dstFormat; - - // FIXME: Not all the possible generators are available as classes yet. - // When dynamic mode is usable, implement them. - const char* generatorName = nullptr; - switch (generatorNum) - { - case 0x4: generatorName = "cGenerator_4"; break; - case 0x5: generatorName = "cGenerator_5"; break; - case 0x6: generatorName = "cGenerator_6"; break; - case 0x8: generatorName = "cGenerator_8"; break; - case 0xB: generatorName = "cGenerator_B"; break; - default: - WARN_LOG(VIDEO, "No generator available for dst format 0x%X; aborting", generatorNum); - return false; - } - - // Make sure class instances are available - if (!m_fetchClass[fetchNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - FETCH_CLASS_NAMES[fetchNum], dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - FETCH_CLASS_NAMES[fetchNum], 0, 0, 0, 0, &m_fetchClass[fetchNum]); - CHECK(SUCCEEDED(hr), "create fetch class instance"); - } - if (!m_scaledFetchClass[scaledFetchNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - SCALEDFETCH_CLASS_NAMES[scaledFetchNum], dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - SCALEDFETCH_CLASS_NAMES[scaledFetchNum], 0, 0, 0, 0, - &m_scaledFetchClass[scaledFetchNum]); - CHECK(SUCCEEDED(hr), "create scaled fetch class instance"); - } - if (!m_intensityClass[intensityNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - INTENSITY_CLASS_NAMES[intensityNum], dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - INTENSITY_CLASS_NAMES[intensityNum], 0, 0, 0, 0, - &m_intensityClass[intensityNum]); - CHECK(SUCCEEDED(hr), "create intensity class instance"); - } - if (!m_generatorClass[generatorNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - generatorName, dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - generatorName, 0, 0, 0, 0, &m_generatorClass[generatorNum]); - CHECK(SUCCEEDED(hr), "create generator class instance"); - } - - // Assemble dynamic linkage array - if (m_fetchSlot != UINT(-1)) - m_linkageArray[m_fetchSlot] = m_fetchClass[fetchNum]; - if (m_scaledFetchSlot != UINT(-1)) - m_linkageArray[m_scaledFetchSlot] = m_scaledFetchClass[scaledFetchNum]; - if (m_intensitySlot != UINT(-1)) - m_linkageArray[m_intensitySlot] = m_intensityClass[intensityNum]; - if (m_generatorSlot != UINT(-1)) - m_linkageArray[m_generatorSlot] = m_generatorClass[generatorNum]; - - D3D::stateman->SetPixelShaderDynamic(m_dynamicShader, - m_linkageArray.empty() ? nullptr : &m_linkageArray[0], - (UINT)m_linkageArray.size()); - - return true; + return it->second; } } diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h index c14020acfe..21c927c6e5 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h @@ -24,9 +24,7 @@ namespace DX11 class PSTextureEncoder : public TextureEncoder { - public: - PSTextureEncoder(); void Init(); @@ -36,25 +34,14 @@ public: bool isIntensity, bool scaleByHalf); private: - bool m_ready; ID3D11Texture2D* m_out; ID3D11RenderTargetView* m_outRTV; ID3D11Texture2D* m_outStage; ID3D11Buffer* m_encodeParams; - ID3D11Buffer* m_quad; - ID3D11VertexShader* m_vShader; - ID3D11InputLayout* m_quadLayout; - ID3D11BlendState* m_efbEncodeBlendState; - ID3D11DepthStencilState* m_efbEncodeDepthState; - ID3D11RasterizerState* m_efbEncodeRastState; - ID3D11SamplerState* m_efbSampler; - // Stuff only used in static-linking mode (SM4.0-compatible) - - bool InitStaticMode(); - bool SetStaticShader(unsigned int dstFormat, + ID3D11PixelShader* SetStaticShader(unsigned int dstFormat, PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf); typedef unsigned int ComboKey; // Key for a shader combination @@ -69,35 +56,6 @@ private: typedef std::map ComboMap; ComboMap m_staticShaders; - - // Stuff only used for dynamic-linking mode (SM5.0+, available as soon as - // Microsoft fixes their bloody HLSL compiler) - - bool InitDynamicMode(); - bool SetDynamicShader(unsigned int dstFormat, - PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf); - - ID3D11PixelShader* m_dynamicShader; - ID3D11ClassLinkage* m_classLinkage; - - // Interface slots - UINT m_fetchSlot; - UINT m_scaledFetchSlot; - UINT m_intensitySlot; - UINT m_generatorSlot; - - // Class instances - // Fetch: 0 is RGB, 1 is RGBA, 2 is RGB565, 3 is Z - ID3D11ClassInstance* m_fetchClass[4]; - // ScaledFetch: 0 is off, 1 is on - ID3D11ClassInstance* m_scaledFetchClass[2]; - // Intensity: 0 is off, 1 is on - ID3D11ClassInstance* m_intensityClass[2]; - // Generator: one for each dst format, 16 total - ID3D11ClassInstance* m_generatorClass[16]; - - std::vector m_linkageArray; - }; } diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 8734eb42c3..3e09113518 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -70,21 +70,24 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, " out vec4 ocol0;\n"); WRITE(p, "void main()\n"); + WRITE(p, "{\n" + " int2 sampleUv;\n" + " int2 uv1 = int2(gl_FragCoord.xy);\n" + ); } else // D3D { - WRITE(p,"sampler samp0 : register(s0);\n"); + WRITE(p, "sampler samp0 : register(s0);\n"); WRITE(p, "Texture2D Tex0 : register(t0);\n"); - WRITE(p,"void main(\n"); - WRITE(p," out float4 ocol0 : SV_Target)\n"); + WRITE(p, "void main(\n"); + WRITE(p, " out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n"); + WRITE(p, "{\n" + " int2 sampleUv;\n" + " int2 uv1 = int2((rawpos + 1) / 2 * float2(640, 528));\n" + ); } - WRITE(p, "{\n" - " int2 sampleUv;\n" - " int2 uv1 = int2(gl_FragCoord.xy);\n" - ); - WRITE(p, " int y_block_position = uv1.y & %d;\n", ~(blkH - 1)); WRITE(p, " int y_offset_in_block = uv1.y & %d;\n", blkH - 1); WRITE(p, " int x_virtual_position = (uv1.x << %d) + y_offset_in_block * position.z;\n", IntLog2(samples)); @@ -116,9 +119,18 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType) { - WRITE(p, " %s = texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n", - dest, xoffset, colorComp - ); + if (ApiType == API_OPENGL) + { + WRITE(p, " %s = texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n", + dest, xoffset, colorComp + ); + } + else + { + WRITE(p, " %s = Tex0.Sample(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n", + dest, xoffset, colorComp + ); + } } static void WriteColorToIntensity(char*& p, const char* src, const char* dest) @@ -134,6 +146,7 @@ static void WriteColorToIntensity(char*& p, const char* src, const char* dest) static void WriteToBitDepth(char*& p, u8 depth, const char* src, const char* dest) { + //WRITE(p, " ocol0 = float4(1,1,1,1;\n"); WRITE(p, " %s = floor(%s * 255.0 / exp2(8.0 - %d.0));\n", dest, src, depth); }