From ef75f3005d5e09edaa739d298f644be1f28e3f19 Mon Sep 17 00:00:00 2001 From: magumagu Date: Sun, 25 Jan 2015 15:49:35 -0800 Subject: [PATCH 01/10] WIP. --- .../VideoBackends/D3D/PSTextureEncoder.cpp | 1286 +---------------- .../Core/VideoBackends/D3D/PSTextureEncoder.h | 44 +- .../VideoCommon/TextureConversionShader.cpp | 35 +- 3 files changed, 74 insertions(+), 1291 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index 0ca9ee2211..96f90bb870 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -6,875 +6,32 @@ #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DShader.h" #include "VideoBackends/D3D/D3DState.h" +#include "VideoBackends/D3D/D3DUtil.h" #include "VideoBackends/D3D/FramebufferManager.h" #include "VideoBackends/D3D/PSTextureEncoder.h" #include "VideoBackends/D3D/Render.h" #include "VideoBackends/D3D/TextureCache.h" +#include "VideoBackends/D3D/VertexShaderCache.h" -// "Static mode" will compile a new EFB encoder shader for every combination of -// encoding configurations. It's compatible with Shader Model 4. - -// "Dynamic mode" will use the dynamic-linking feature of Shader Model 5. Only -// one shader needs to be compiled. - -// Unfortunately, the June 2010 DirectX SDK includes a broken HLSL compiler -// which cripples dynamic linking for us. -// See . -// Dynamic mode is disabled for now. To enable it, uncomment the line below. - -//#define USE_DYNAMIC_MODE - -// FIXME: When Microsoft fixes their HLSL compiler, make Dolphin enable dynamic -// mode on Shader Model 5-compatible cards. +#include "VideoCommon/TextureConversionShader.h" namespace DX11 { -union EFBEncodeParams +struct EFBEncodeParams { - struct - { - FLOAT NumHalfCacheLinesX; - FLOAT NumBlocksY; - FLOAT PosX; - FLOAT PosY; - FLOAT TexLeft; - FLOAT TexTop; - FLOAT TexRight; - FLOAT TexBottom; - }; - // Constant buffers must be a multiple of 16 bytes in size. - u8 pad[32]; // Pad to the next multiple of 16 bytes + DWORD SrcTop; + DWORD SrcLeft; + DWORD DestWidth; + DWORD ScaleFactor; }; -static const char EFB_ENCODE_VS[] = -"// dolphin-emu EFB encoder vertex shader\n" - -"cbuffer cbParams : register(b0)\n" -"{\n" - "struct\n" // Should match EFBEncodeParams above - "{\n" - "float NumHalfCacheLinesX;\n" - "float NumBlocksY;\n" - "float PosX;\n" // Upper-left corner of source - "float PosY;\n" - "float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized) - "float TexTop;\n" - "float TexRight;\n" - "float TexBottom;\n" - "} Params;\n" -"}\n" - -"struct Output\n" -"{\n" - "float4 Pos : SV_Position;\n" - "float2 Coord : ENCODECOORD;\n" -"};\n" - -"Output main(in float2 Pos : POSITION)\n" -"{\n" - "Output result;\n" - "result.Pos = float4(2*Pos.x-1, -2*Pos.y+1, 0.0, 1.0);\n" - "result.Coord = Pos * float2(Params.NumHalfCacheLinesX, Params.NumBlocksY);\n" - "return result;\n" -"}\n" -; - -static const char EFB_ENCODE_PS[] = -"// dolphin-emu EFB encoder pixel shader\n" - -// Input - -"cbuffer cbParams : register(b0)\n" -"{\n" - "struct\n" // Should match EFBEncodeParams above - "{\n" - "float NumHalfCacheLinesX;\n" - "float NumBlocksY;\n" - "float PosX;\n" // Upper-left corner of source - "float PosY;\n" - "float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized) - "float TexTop;\n" - "float TexRight;\n" - "float TexBottom;\n" - "} Params;\n" -"}\n" - -"Texture2DArray EFBTexture : register(t0);\n" -"sampler EFBSampler : register(s0);\n" - -// Constants - -"static const float2 INV_EFB_DIMS = float2(1.0/640.0, 1.0/528.0);\n" - -// FIXME: Is this correct? -"static const float3 INTENSITY_COEFFS = float3(0.257, 0.504, 0.098);\n" -"static const float INTENSITY_ADD = 16.0/255.0;\n" - -// Utility functions - -"uint4 Swap4_32(uint4 v) {\n" - "return (((v >> 24) & 0xFF) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | ((v << 24) & 0xFF000000));\n" -"}\n" - -"uint4 UINT4_8888_BE(uint4 a, uint4 b, uint4 c, uint4 d) {\n" - "return (d << 24) | (c << 16) | (b << 8) | a;\n" -"}\n" - -"uint UINT_44444444_BE(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) {\n" - "return (g << 28) | (h << 24) | (e << 20) | (f << 16) | (c << 12) | (d << 8) | (a << 4) | b;\n" -"}\n" - -"uint UINT_1555(uint a, uint b, uint c, uint d) {\n" - "return (a << 15) | (b << 10) | (c << 5) | d;\n" -"}\n" - -"uint UINT_3444(uint a, uint b, uint c, uint d) {\n" - "return (a << 12) | (b << 8) | (c << 4) | d;\n" -"}\n" - -"uint UINT_565(uint a, uint b, uint c) {\n" - "return (a << 11) | (b << 5) | c;\n" -"}\n" - -"uint UINT_1616(uint a, uint b) {\n" - "return (a << 16) | b;\n" -"}\n" - -"uint Float8ToUint3(float v) {\n" - "return (uint)round(v*255.0) >> 5;\n" -"}\n" - -"uint Float8ToUint4(float v) {\n" - "return (uint)round(v*255.0) >> 4;\n" -"}\n" - -"uint Float8ToUint5(float v) {\n" - "return (uint)round(v*255.0) >> 3;\n" -"}\n" - -"uint Float8ToUint6(float v) {\n" - "return (uint)round(v*255.0) >> 2;\n" -"}\n" - -"uint EncodeRGB5A3(float4 pixel) {\n" - "if (pixel.a >= 224.0/255.0) {\n" - // Encode to ARGB1555 - "return UINT_1555(1, Float8ToUint5(pixel.r), Float8ToUint5(pixel.g), Float8ToUint5(pixel.b));\n" - "} else {\n" - // Encode to ARGB3444 - "return UINT_3444(Float8ToUint3(pixel.a), Float8ToUint4(pixel.r), Float8ToUint4(pixel.g), Float8ToUint4(pixel.b));\n" - "}\n" -"}\n" - -"uint EncodeRGB565(float4 pixel) {\n" - "return UINT_565(Float8ToUint5(pixel.r), Float8ToUint6(pixel.g), Float8ToUint5(pixel.b));\n" -"}\n" - -"float2 CalcTexCoord(float2 coord)\n" -"{\n" - // Add 0.5,0.5 to sample from the center of the EFB pixel - "float2 efbCoord = coord + float2(0.5,0.5);\n" - "return lerp(float2(Params.TexLeft,Params.TexTop), float2(Params.TexRight,Params.TexBottom), efbCoord * INV_EFB_DIMS);\n" -"}\n" - -// Interface and classes for different source formats - -"float4 Fetch_0(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - "float4 result = EFBTexture.Sample(EFBSampler, float3(texCoord.xy, 0.0));\n" - "result.a = 1.0;\n" - "return result;\n" -"}\n" - -"float4 Fetch_1(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - "return EFBTexture.Sample(EFBSampler, float3(texCoord.xy, 0.0));\n" -"}\n" - -"float4 Fetch_2(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - "float4 result = EFBTexture.Sample(EFBSampler, float3(texCoord.xy, 0.0));\n" - "result.a = 1.0;\n" - "return result;\n" -"}\n" - -"float4 Fetch_3(float2 coord)\n" -"{\n" - "float2 texCoord = CalcTexCoord(coord);\n" - - "uint depth24 = 0xFFFFFF * EFBTexture.Sample(EFBSampler, float3(texCoord.xy, 0.0)).r;\n" - "uint4 bytes = uint4(\n" - "(depth24 >> 16) & 0xFF,\n" // r - "(depth24 >> 8) & 0xFF,\n" // g - "depth24 & 0xFF,\n" // b - "255);\n" // a - "return bytes / 255.0;\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iFetch\n" -"{\n" - "float4 Fetch(float2 coord);\n" -"};\n" - -// Source format 0 -"class cFetch_0 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_0(coord); }\n" -"};\n" - - -// Source format 1 -"class cFetch_1 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_1(coord); }\n" -"};\n" - -// Source format 2 -"class cFetch_2 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_2(coord); }\n" -"};\n" - -// Source format 3 -"class cFetch_3 : iFetch\n" -"{\n" - "float4 Fetch(float2 coord)\n" - "{ return Fetch_3(coord); }\n" -"};\n" - -// Declare fetch interface; must be set by application -"iFetch g_fetch;\n" -"#define IMP_FETCH g_fetch.Fetch\n" - -"#endif\n" // #ifdef DYNAMIC_MODE - -"#ifndef IMP_FETCH\n" -"#error No Fetch specified\n" -"#endif\n" - -// Interface and classes for different intensity settings (on or off) - -"float4 Intensity_0(float4 sample)\n" -"{\n" - "return sample;\n" -"}\n" - -"float4 Intensity_1(float4 sample)\n" -"{\n" - "sample.r = dot(INTENSITY_COEFFS, sample.rgb) + INTENSITY_ADD;\n" - // FIXME: Is this correct? What happens if you use one of the non-R - // formats with intensity on? - "sample = sample.rrrr;\n" - "return sample;\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iIntensity\n" -"{\n" - "float4 Intensity(float4 sample);\n" -"};\n" - -// Intensity off -"class cIntensity_0 : iIntensity\n" -"{\n" - "float4 Intensity(float4 sample)\n" - "{ return Intensity_0(sample); }\n" -"};\n" - -// Intensity on -"class cIntensity_1 : iIntensity\n" -"{\n" - "float4 Intensity(float4 sample)\n" - "{ return Intensity_1(sample); }\n" -"};\n" - -// Declare intensity interface; must be set by application -"iIntensity g_intensity;\n" -"#define IMP_INTENSITY g_intensity.Intensity\n" - -"#endif\n" // #ifdef DYNAMIC_MODE - -"#ifndef IMP_INTENSITY\n" -"#error No Intensity specified\n" -"#endif\n" - - -// Interface and classes for different scale/filter settings (on or off) - -"float4 ScaledFetch_0(float2 coord)\n" -"{\n" - "return IMP_FETCH(float2(Params.PosX,Params.PosY) + coord);\n" -"}\n" - -"float4 ScaledFetch_1(float2 coord)\n" -"{\n" - "float2 ul = float2(Params.PosX,Params.PosY) + 2*coord;\n" - "float4 sample0 = IMP_FETCH(ul+float2(0,0));\n" - "float4 sample1 = IMP_FETCH(ul+float2(1,0));\n" - "float4 sample2 = IMP_FETCH(ul+float2(0,1));\n" - "float4 sample3 = IMP_FETCH(ul+float2(1,1));\n" - // Average all four samples together - // FIXME: Is this correct? - "return 0.25 * (sample0+sample1+sample2+sample3);\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iScaledFetch\n" -"{\n" - "float4 ScaledFetch(float2 coord);\n" -"};\n" - -// Scale off -"class cScaledFetch_0 : iScaledFetch\n" -"{\n" - "float4 ScaledFetch(float2 coord)\n" - "{ return ScaledFetch_0(coord); }\n" -"};\n" - -// Scale on -"class cScaledFetch_1 : iScaledFetch\n" -"{\n" - "float4 ScaledFetch(float2 coord)\n" - "{ return ScaledFetch_1(coord); }\n" -"};\n" - -// Declare scaled fetch interface; must be set by application code -"iScaledFetch g_scaledFetch;\n" -"#define IMP_SCALEDFETCH g_scaledFetch.ScaledFetch\n" - -"#endif\n" // #ifdef DYNAMIC_MODE - -"#ifndef IMP_SCALEDFETCH\n" -"#error No ScaledFetch specified\n" -"#endif\n" - -// Main EFB-sampling function: performs all steps of fetching pixels, scaling, -// applying intensity function - -"float4 SampleEFB(float2 coord)\n" -"{\n" - // FIXME: Does intensity happen before or after scaling? Or does - // it matter? - "float4 sample = IMP_SCALEDFETCH(coord);\n" - "return IMP_INTENSITY(sample);\n" -"}\n" - -// Interfaces and classes for different destination formats - -"uint4 Generate_0(float2 cacheCoord)\n" // R4 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,8);\n" - "float2 subBlockUL = blockUL + float2(0, 4*(cacheCoord.x%2));\n" - - "float4 sample[32];\n" - "for (uint y = 0; y < 4; ++y) {\n" - "for (uint x = 0; x < 8; ++x) {\n" - "sample[y*8+x] = SampleEFB(subBlockUL+float2(x,y));\n" - "}\n" - "}\n" - - "uint dw[4];\n" - "for (uint i = 0; i < 4; ++i) {\n" - "dw[i] = UINT_44444444_BE(\n" - "Float8ToUint4(sample[8*i+0].r),\n" - "Float8ToUint4(sample[8*i+1].r),\n" - "Float8ToUint4(sample[8*i+2].r),\n" - "Float8ToUint4(sample[8*i+3].r),\n" - "Float8ToUint4(sample[8*i+4].r),\n" - "Float8ToUint4(sample[8*i+5].r),\n" - "Float8ToUint4(sample[8*i+6].r),\n" - "Float8ToUint4(sample[8*i+7].r)\n" - ");\n" - "}\n" - - "return uint4(dw[0], dw[1], dw[2], dw[3]);\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_1(float2 cacheCoord)\n" // R8 (FIXME: Duplicate of R8 below?) -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.r, sample4.r, sample8.r, sampleC.r)),\n" - "round(255*float4(sample1.r, sample5.r, sample9.r, sampleD.r)),\n" - "round(255*float4(sample2.r, sample6.r, sampleA.r, sampleE.r)),\n" - "round(255*float4(sample3.r, sample7.r, sampleB.r, sampleF.r))\n" - ");\n" - - "return dw4;\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_2(float2 cacheCoord)\n" // A4 R4 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint dw0 = UINT_44444444_BE(\n" - "Float8ToUint4(sample0.a), Float8ToUint4(sample0.r),\n" - "Float8ToUint4(sample1.a), Float8ToUint4(sample1.r),\n" - "Float8ToUint4(sample2.a), Float8ToUint4(sample2.r),\n" - "Float8ToUint4(sample3.a), Float8ToUint4(sample3.r)\n" - ");\n" - "uint dw1 = UINT_44444444_BE(\n" - "Float8ToUint4(sample4.a), Float8ToUint4(sample4.r),\n" - "Float8ToUint4(sample5.a), Float8ToUint4(sample5.r),\n" - "Float8ToUint4(sample6.a), Float8ToUint4(sample6.r),\n" - "Float8ToUint4(sample7.a), Float8ToUint4(sample7.r)\n" - ");\n" - "uint dw2 = UINT_44444444_BE(\n" - "Float8ToUint4(sample8.a), Float8ToUint4(sample8.r),\n" - "Float8ToUint4(sample9.a), Float8ToUint4(sample9.r),\n" - "Float8ToUint4(sampleA.a), Float8ToUint4(sampleA.r),\n" - "Float8ToUint4(sampleB.a), Float8ToUint4(sampleB.r)\n" - ");\n" - "uint dw3 = UINT_44444444_BE(\n" - "Float8ToUint4(sampleC.a), Float8ToUint4(sampleC.r),\n" - "Float8ToUint4(sampleD.a), Float8ToUint4(sampleD.r),\n" - "Float8ToUint4(sampleE.a), Float8ToUint4(sampleE.r),\n" - "Float8ToUint4(sampleF.a), Float8ToUint4(sampleF.r)\n" - ");\n" - - "return uint4(dw0, dw1, dw2, dw3);\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_3(float2 cacheCoord)\n" // A8 R8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.a, sample2.a, sample4.a, sample6.a)),\n" - "round(255*float4(sample0.r, sample2.r, sample4.r, sample6.r)),\n" - "round(255*float4(sample1.a, sample3.a, sample5.a, sample7.a)),\n" - "round(255*float4(sample1.r, sample3.r, sample5.r, sample7.r))\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_4(float2 cacheCoord)\n" // R5 G6 B5 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint dw0 = UINT_1616(EncodeRGB565(sample0), EncodeRGB565(sample1));\n" - "uint dw1 = UINT_1616(EncodeRGB565(sample2), EncodeRGB565(sample3));\n" - "uint dw2 = UINT_1616(EncodeRGB565(sample4), EncodeRGB565(sample5));\n" - "uint dw3 = UINT_1616(EncodeRGB565(sample6), EncodeRGB565(sample7));\n" - - "return Swap4_32(uint4(dw0, dw1, dw2, dw3));\n" -"}\n" - -"uint4 Generate_5(float2 cacheCoord)\n" // 1 R5 G5 B5 or 0 A3 R4 G4 G4 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint dw0 = UINT_1616(EncodeRGB5A3(sample0), EncodeRGB5A3(sample1));\n" - "uint dw1 = UINT_1616(EncodeRGB5A3(sample2), EncodeRGB5A3(sample3));\n" - "uint dw2 = UINT_1616(EncodeRGB5A3(sample4), EncodeRGB5A3(sample5));\n" - "uint dw3 = UINT_1616(EncodeRGB5A3(sample6), EncodeRGB5A3(sample7));\n" - - "return Swap4_32(uint4(dw0, dw1, dw2, dw3));\n" -"}\n" - -"uint4 Generate_6(float2 cacheCoord)\n" // A8 R8 A8 R8 | G8 B8 G8 B8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(4,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4;\n" - "if (cacheCoord.x % 4 < 2)\n" - "{\n" - // First cache line gets AR - "dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.a, sample2.a, sample4.a, sample6.a)),\n" - "round(255*float4(sample0.r, sample2.r, sample4.r, sample6.r)),\n" - "round(255*float4(sample1.a, sample3.a, sample5.a, sample7.a)),\n" - "round(255*float4(sample1.r, sample3.r, sample5.r, sample7.r))\n" - ");\n" - "}\n" - "else\n" - "{\n" - // Second cache line gets GB - "dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.g, sample2.g, sample4.g, sample6.g)),\n" - "round(255*float4(sample0.b, sample2.b, sample4.b, sample6.b)),\n" - "round(255*float4(sample1.g, sample3.g, sample5.g, sample7.g)),\n" - "round(255*float4(sample1.b, sample3.b, sample5.b, sample7.b))\n" - ");\n" - "}\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_7(float2 cacheCoord)\n" // A8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.a, sample4.a, sample8.a, sampleC.a)),\n" - "round(255*float4(sample1.a, sample5.a, sample9.a, sampleD.a)),\n" - "round(255*float4(sample2.a, sample6.a, sampleA.a, sampleE.a)),\n" - "round(255*float4(sample3.a, sample7.a, sampleB.a, sampleF.a))\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_8(float2 cacheCoord)\n" // R8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.r, sample4.r, sample8.r, sampleC.r)),\n" - "round(255*float4(sample1.r, sample5.r, sample9.r, sampleD.r)),\n" - "round(255*float4(sample2.r, sample6.r, sampleA.r, sampleE.r)),\n" - "round(255*float4(sample3.r, sample7.r, sampleB.r, sampleF.r))\n" - ");\n" - - "return dw4;\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_9(float2 cacheCoord)\n" // G8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.g, sample4.g, sample8.g, sampleC.g)),\n" - "round(255*float4(sample1.g, sample5.g, sample9.g, sampleD.g)),\n" - "round(255*float4(sample2.g, sample6.g, sampleA.g, sampleE.g)),\n" - "round(255*float4(sample3.g, sample7.g, sampleB.g, sampleF.g))\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_A(float2 cacheCoord)\n" // B8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(8,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n" - "float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n" - "float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n" - "float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n" - "float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n" - "float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.b, sample4.b, sample8.b, sampleC.b)),\n" - "round(255*float4(sample1.b, sample5.b, sample9.b, sampleD.b)),\n" - "round(255*float4(sample2.b, sample6.b, sampleA.b, sampleE.b)),\n" - "round(255*float4(sample3.b, sample7.b, sampleB.b, sampleF.b))\n" - ");\n" - - "return dw4;\n" -"}\n" - -"uint4 Generate_B(float2 cacheCoord)\n" // G8 R8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.g, sample2.g, sample4.g, sample6.g)),\n" - "round(255*float4(sample0.r, sample2.r, sample4.r, sample6.r)),\n" - "round(255*float4(sample1.g, sample3.g, sample5.g, sample7.g)),\n" - "round(255*float4(sample1.r, sample3.r, sample5.r, sample7.r))\n" - ");\n" - - "return dw4;\n" -"}\n" - -// FIXME: Untested -"uint4 Generate_C(float2 cacheCoord)\n" // B8 G8 -"{\n" - "float2 blockCoord = floor(cacheCoord / float2(2,1));\n" - - "float2 blockUL = blockCoord * float2(4,4);\n" - "float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n" - - "float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n" - "float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n" - "float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n" - "float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n" - "float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n" - "float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n" - "float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n" - "float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n" - - "uint4 dw4 = UINT4_8888_BE(\n" - "round(255*float4(sample0.b, sample2.b, sample4.b, sample6.b)),\n" - "round(255*float4(sample0.g, sample2.g, sample4.g, sample6.g)),\n" - "round(255*float4(sample1.b, sample3.b, sample5.b, sample7.b)),\n" - "round(255*float4(sample1.g, sample3.g, sample5.g, sample7.g))\n" - ");\n" - - "return dw4;\n" -"}\n" - -"#ifdef DYNAMIC_MODE\n" -"interface iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord);\n" -"};\n" - -"class cGenerator_4 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_4(cacheCoord); }\n" -"};\n" - -"class cGenerator_5 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_5(cacheCoord); }\n" -"};\n" - -"class cGenerator_6 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_6(cacheCoord); }\n" -"};\n" - -"class cGenerator_8 : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_8(cacheCoord); }\n" -"};\n" - -"class cGenerator_B : iGenerator\n" -"{\n" - "uint4 Generate(float2 cacheCoord)\n" - "{ return Generate_B(cacheCoord); }\n" -"};\n" - -// Declare generator interface; must be set by application -"iGenerator g_generator;\n" -"#define IMP_GENERATOR g_generator.Generate\n" - -"#endif\n" - -"#ifndef IMP_GENERATOR\n" -"#error No generator specified\n" -"#endif\n" - -"void main(out uint4 ocol0 : SV_Target, in float4 Pos : SV_Position, in float2 fCacheCoord : ENCODECOORD)\n" -"{\n" - "float2 cacheCoord = floor(fCacheCoord);\n" - "ocol0 = IMP_GENERATOR(cacheCoord);\n" -"}\n" -; - PSTextureEncoder::PSTextureEncoder() : m_ready(false), m_out(nullptr), m_outRTV(nullptr), m_outStage(nullptr), - m_encodeParams(nullptr), - m_quad(nullptr), m_vShader(nullptr), m_quadLayout(nullptr), - m_efbEncodeBlendState(nullptr), m_efbEncodeDepthState(nullptr), - m_efbEncodeRastState(nullptr), m_efbSampler(nullptr), - m_dynamicShader(nullptr), m_classLinkage(nullptr) + m_encodeParams(nullptr) { - for (size_t i = 0; i < 4; ++i) - m_fetchClass[i] = nullptr; - for (size_t i = 0; i < 2; ++i) - m_scaledFetchClass[i] = nullptr; - for (size_t i = 0; i < 2; ++i) - m_intensityClass[i] = nullptr; - for (size_t i = 0; i < 16; ++i) - m_generatorClass[i] = nullptr; } -static const D3D11_INPUT_ELEMENT_DESC QUAD_LAYOUT_DESC[] = { - { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 } -}; - -static const struct QuadVertex -{ - float posX; - float posY; -} QUAD_VERTS[4] = { { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } }; - void PSTextureEncoder::Init() { m_ready = false; @@ -882,25 +39,21 @@ void PSTextureEncoder::Init() HRESULT hr; // Create output texture RGBA format - - // This format allows us to generate one cache line in two pixels. D3D11_TEXTURE2D_DESC t2dd = CD3D11_TEXTURE2D_DESC( - DXGI_FORMAT_R32G32B32A32_UINT, - EFB_WIDTH, EFB_HEIGHT/4, 1, 1, D3D11_BIND_RENDER_TARGET); + DXGI_FORMAT_R8G8B8A8_UINT, + EFB_WIDTH, EFB_HEIGHT, 1, 1, D3D11_BIND_RENDER_TARGET); hr = D3D::device->CreateTexture2D(&t2dd, nullptr, &m_out); CHECK(SUCCEEDED(hr), "create efb encode output texture"); D3D::SetDebugObjectName(m_out, "efb encoder output texture"); // Create output render target view - D3D11_RENDER_TARGET_VIEW_DESC rtvd = CD3D11_RENDER_TARGET_VIEW_DESC(m_out, - D3D11_RTV_DIMENSION_TEXTURE2D, DXGI_FORMAT_R32G32B32A32_UINT); + D3D11_RTV_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UINT); hr = D3D::device->CreateRenderTargetView(m_out, &rtvd, &m_outRTV); CHECK(SUCCEEDED(hr), "create efb encode output render target view"); D3D::SetDebugObjectName(m_outRTV, "efb encoder output rtv"); // Create output staging buffer - t2dd.Usage = D3D11_USAGE_STAGING; t2dd.BindFlags = 0; t2dd.CPUAccessFlags = D3D11_CPU_ACCESS_READ; @@ -909,87 +62,12 @@ void PSTextureEncoder::Init() D3D::SetDebugObjectName(m_outStage, "efb encoder output staging buffer"); // Create constant buffer for uploading data to shaders - D3D11_BUFFER_DESC bd = CD3D11_BUFFER_DESC(sizeof(EFBEncodeParams), D3D11_BIND_CONSTANT_BUFFER); hr = D3D::device->CreateBuffer(&bd, nullptr, &m_encodeParams); CHECK(SUCCEEDED(hr), "create efb encode params buffer"); D3D::SetDebugObjectName(m_encodeParams, "efb encoder params buffer"); - // Create vertex quad - - bd = CD3D11_BUFFER_DESC(sizeof(QUAD_VERTS), D3D11_BIND_VERTEX_BUFFER, - D3D11_USAGE_IMMUTABLE); - D3D11_SUBRESOURCE_DATA srd = { QUAD_VERTS, 0, 0 }; - - hr = D3D::device->CreateBuffer(&bd, &srd, &m_quad); - CHECK(SUCCEEDED(hr), "create efb encode quad vertex buffer"); - D3D::SetDebugObjectName(m_quad, "efb encoder quad vertex buffer"); - - // Create vertex shader - - D3DBlob* bytecode = nullptr; - if (!D3D::CompileVertexShader(EFB_ENCODE_VS, &bytecode)) - { - ERROR_LOG(VIDEO, "EFB encode vertex shader failed to compile"); - return; - } - - hr = D3D::device->CreateVertexShader(bytecode->Data(), bytecode->Size(), nullptr, &m_vShader); - CHECK(SUCCEEDED(hr), "create efb encode vertex shader"); - D3D::SetDebugObjectName(m_vShader, "efb encoder vertex shader"); - - // Create input layout for vertex quad using bytecode from vertex shader - - hr = D3D::device->CreateInputLayout(QUAD_LAYOUT_DESC, - sizeof(QUAD_LAYOUT_DESC)/sizeof(D3D11_INPUT_ELEMENT_DESC), - bytecode->Data(), bytecode->Size(), &m_quadLayout); - CHECK(SUCCEEDED(hr), "create efb encode quad vertex layout"); - D3D::SetDebugObjectName(m_quadLayout, "efb encoder quad layout"); - - bytecode->Release(); - - // Create pixel shader - -#ifdef USE_DYNAMIC_MODE - if (!InitDynamicMode()) -#else - if (!InitStaticMode()) -#endif - return; - - // Create blend state - - D3D11_BLEND_DESC bld = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); - hr = D3D::device->CreateBlendState(&bld, &m_efbEncodeBlendState); - CHECK(SUCCEEDED(hr), "create efb encode blend state"); - D3D::SetDebugObjectName(m_efbEncodeBlendState, "efb encoder blend state"); - - // Create depth state - - D3D11_DEPTH_STENCIL_DESC dsd = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT()); - dsd.DepthEnable = FALSE; - hr = D3D::device->CreateDepthStencilState(&dsd, &m_efbEncodeDepthState); - CHECK(SUCCEEDED(hr), "create efb encode depth state"); - D3D::SetDebugObjectName(m_efbEncodeDepthState, "efb encoder depth state"); - - // Create rasterizer state - - D3D11_RASTERIZER_DESC rd = CD3D11_RASTERIZER_DESC(CD3D11_DEFAULT()); - rd.CullMode = D3D11_CULL_NONE; - rd.DepthClipEnable = FALSE; - hr = D3D::device->CreateRasterizerState(&rd, &m_efbEncodeRastState); - CHECK(SUCCEEDED(hr), "create efb encode rast state"); - D3D::SetDebugObjectName(m_efbEncodeRastState, "efb encoder rast state"); - - // Create efb texture sampler - - D3D11_SAMPLER_DESC sd = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT()); - sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - hr = D3D::device->CreateSamplerState(&sd, &m_efbSampler); - CHECK(SUCCEEDED(hr), "create efb encode texture sampler"); - D3D::SetDebugObjectName(m_efbSampler, "efb encoder texture sampler"); - m_ready = true; } @@ -997,32 +75,12 @@ void PSTextureEncoder::Shutdown() { m_ready = false; - for (size_t i = 0; i < 4; ++i) - SAFE_RELEASE(m_fetchClass[i]); - for (size_t i = 0; i < 2; ++i) - SAFE_RELEASE(m_scaledFetchClass[i]); - for (size_t i = 0; i < 2; ++i) - SAFE_RELEASE(m_intensityClass[i]); - for (size_t i = 0; i < 16; ++i) - SAFE_RELEASE(m_generatorClass[i]); - m_linkageArray.clear(); - - SAFE_RELEASE(m_classLinkage); - SAFE_RELEASE(m_dynamicShader); - for (auto& it : m_staticShaders) { SAFE_RELEASE(it.second); } m_staticShaders.clear(); - SAFE_RELEASE(m_efbSampler); - SAFE_RELEASE(m_efbEncodeRastState); - SAFE_RELEASE(m_efbEncodeDepthState); - SAFE_RELEASE(m_efbEncodeBlendState); - SAFE_RELEASE(m_quadLayout); - SAFE_RELEASE(m_vShader); - SAFE_RELEASE(m_quad); SAFE_RELEASE(m_encodeParams); SAFE_RELEASE(m_outStage); SAFE_RELEASE(m_outRTV); @@ -1072,33 +130,14 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, size_t encodeSize = 0; // Reset API - g_renderer->ResetAPIState(); // Set up all the state for EFB encoding -#ifdef USE_DYNAMIC_MODE - if (SetDynamicShader(dstFormat, srcFormat, isIntensity, scaleByHalf)) -#else - if (SetStaticShader(dstFormat, srcFormat, isIntensity, scaleByHalf)) -#endif { - D3D::stateman->SetVertexShader(m_vShader); - D3D::stateman->SetGeometryShader(nullptr); - - D3D::stateman->PushBlendState(m_efbEncodeBlendState); - D3D::stateman->PushDepthState(m_efbEncodeDepthState); - D3D::stateman->PushRasterizerState(m_efbEncodeRastState); - - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow*2), FLOAT(numBlocksY)); + D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow*2*4), FLOAT(numBlocksY*4)); D3D::context->RSSetViewports(1, &vp); - D3D::stateman->SetInputLayout(m_quadLayout); - D3D::stateman->SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - UINT stride = sizeof(QuadVertex); - UINT offset = 0; - D3D::stateman->SetVertexBuffer(m_quad, stride, offset); - EFBRectangle fullSrcRect; fullSrcRect.left = 0; fullSrcRect.top = 0; @@ -1106,17 +145,6 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, fullSrcRect.bottom = EFB_HEIGHT; TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect); - EFBEncodeParams params = { 0 }; - params.NumHalfCacheLinesX = FLOAT(cacheLinesPerRow*2); - params.NumBlocksY = FLOAT(numBlocksY); - params.PosX = FLOAT(correctSrc.left); - params.PosY = FLOAT(correctSrc.top); - params.TexLeft = float(targetRect.left) / g_renderer->GetTargetWidth(); - params.TexTop = float(targetRect.top) / g_renderer->GetTargetHeight(); - params.TexRight = float(targetRect.right) / g_renderer->GetTargetWidth(); - params.TexBottom = float(targetRect.bottom) / g_renderer->GetTargetHeight(); - D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, ¶ms, 0, 0); - D3D::context->OMSetRenderTargets(1, &m_outRTV, nullptr); ID3D11ShaderResourceView* pEFB = (srcFormat == PEControl::Z24) ? @@ -1126,41 +154,27 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, // expecting the blurred edges around multisampled shapes. FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); - D3D::stateman->SetVertexConstants(m_encodeParams); - D3D::stateman->SetPixelConstants(m_encodeParams); - D3D::stateman->SetTexture(0, pEFB); - D3D::stateman->SetSampler(0, m_efbSampler); + EFBEncodeParams params; + params.SrcLeft = correctSrc.left; + params.SrcLeft = correctSrc.top; + params.DestWidth = actualWidth; + params.ScaleFactor = scaleByHalf ? 2 : 1; + D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, ¶ms, 0, 0); + D3D::stateman->SetPixelConstants(0, m_encodeParams); - // Encode! - - D3D::stateman->Apply(); - D3D::context->Draw(4, 0); + D3D::drawShadedTexQuad(pEFB, + targetRect.AsRECT(), + Renderer::GetTargetWidth(), + Renderer::GetTargetHeight(), + SetStaticShader(dstFormat, srcFormat, isIntensity, scaleByHalf), + VertexShaderCache::GetSimpleVertexShader(), + VertexShaderCache::GetSimpleInputLayout()); // Copy to staging buffer - - D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow*2, numBlocksY, 1); + D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow*2*4, numBlocksY*4, 1); D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox); - // Clean up state - - D3D::context->OMSetRenderTargets(0, nullptr, nullptr); - - D3D::stateman->SetSampler(0, nullptr); - D3D::stateman->SetTexture(0, nullptr); - D3D::stateman->SetPixelConstants(nullptr); - D3D::stateman->SetVertexConstants(nullptr); - - D3D::stateman->SetPixelShader(nullptr); - D3D::stateman->SetVertexBuffer(nullptr, 0, 0); - - D3D::stateman->PopRasterizerState(); - D3D::stateman->PopDepthState(); - D3D::stateman->PopBlendState(); - - D3D::stateman->Apply(); - // Transfer staging buffer to GameCube/Wii RAM - D3D11_MAPPED_SUBRESOURCE map = { 0 }; hr = D3D::context->Map(m_outStage, 0, D3D11_MAP_READ, 0, &map); CHECK(SUCCEEDED(hr), "map staging buffer (0x%x)", hr); @@ -1179,7 +193,6 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, } // Restore API - g_renderer->RestoreAPIState(); D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), @@ -1188,25 +201,7 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, return encodeSize; } -bool PSTextureEncoder::InitStaticMode() -{ - // Nothing to really do. - return true; -} - -static const char* FETCH_FUNC_NAMES[4] = { - "Fetch_0", "Fetch_1", "Fetch_2", "Fetch_3" -}; - -static const char* SCALEDFETCH_FUNC_NAMES[2] = { - "ScaledFetch_0", "ScaledFetch_1" -}; - -static const char* INTENSITY_FUNC_NAMES[2] = { - "Intensity_0", "Intensity_1" -}; - -bool PSTextureEncoder::SetStaticShader(unsigned int dstFormat, PEControl::PixelFormat srcFormat, +ID3D11PixelShader* PSTextureEncoder::SetStaticShader(unsigned int dstFormat, PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf) { size_t fetchNum = static_cast(srcFormat); @@ -1219,48 +214,33 @@ bool PSTextureEncoder::SetStaticShader(unsigned int dstFormat, PEControl::PixelF ComboMap::iterator it = m_staticShaders.find(key); if (it == m_staticShaders.end()) { - const char* generatorFuncName = nullptr; - switch (generatorNum) - { - case 0x0: generatorFuncName = "Generate_0"; break; - case 0x1: generatorFuncName = "Generate_1"; break; - case 0x2: generatorFuncName = "Generate_2"; break; - case 0x3: generatorFuncName = "Generate_3"; break; - case 0x4: generatorFuncName = "Generate_4"; break; - case 0x5: generatorFuncName = "Generate_5"; break; - case 0x6: generatorFuncName = "Generate_6"; break; - case 0x7: generatorFuncName = "Generate_7"; break; - case 0x8: generatorFuncName = "Generate_8"; break; - case 0x9: generatorFuncName = "Generate_9"; break; - case 0xA: generatorFuncName = "Generate_A"; break; - case 0xB: generatorFuncName = "Generate_B"; break; - case 0xC: generatorFuncName = "Generate_C"; break; - default: - WARN_LOG(VIDEO, "No generator available for dst format 0x%X; aborting", generatorNum); - m_staticShaders[key] = nullptr; - return false; - } - INFO_LOG(VIDEO, "Compiling efb encoding shader for dstFormat 0x%X, srcFormat %d, isIntensity %d, scaleByHalf %d", dstFormat, static_cast(srcFormat), isIntensity ? 1 : 0, scaleByHalf ? 1 : 0); - // Shader permutation not found, so compile it + u32 format = dstFormat; + + if (srcFormat == PEControl::Z24) + { + format |= _GX_TF_ZTF; + if (dstFormat == 11) + format = GX_TF_Z16; + else if (format < GX_TF_Z8 || format > GX_TF_Z24X8) + format |= _GX_TF_CTF; + } + else + { + if (dstFormat > GX_TF_RGBA8 || (dstFormat < GX_TF_RGB565 && !isIntensity)) + format |= _GX_TF_CTF; + } + D3DBlob* bytecode = nullptr; - D3D_SHADER_MACRO macros[] = { - { "IMP_FETCH", FETCH_FUNC_NAMES[fetchNum] }, - { "IMP_SCALEDFETCH", SCALEDFETCH_FUNC_NAMES[scaledFetchNum] }, - { "IMP_INTENSITY", INTENSITY_FUNC_NAMES[intensityNum] }, - { "IMP_GENERATOR", generatorFuncName }, - { nullptr, nullptr } - }; - if (!D3D::CompilePixelShader(EFB_ENCODE_PS, &bytecode, macros)) + const char* shader = TextureConversionShader::GenerateEncodingShader(format, API_D3D); + if (!D3D::CompilePixelShader(shader, &bytecode)) { WARN_LOG(VIDEO, "EFB encoder shader for dstFormat 0x%X, srcFormat %d, isIntensity %d, scaleByHalf %d failed to compile", dstFormat, static_cast(srcFormat), isIntensity ? 1 : 0, scaleByHalf ? 1 : 0); - // Add dummy shader to map to prevent trying to compile over and - // over again m_staticShaders[key] = nullptr; - return false; + return nullptr; } ID3D11PixelShader* newShader; @@ -1271,175 +251,7 @@ bool PSTextureEncoder::SetStaticShader(unsigned int dstFormat, PEControl::PixelF bytecode->Release(); } - if (it != m_staticShaders.end()) - { - if (it->second) - { - D3D::stateman->SetPixelShader(it->second); - return true; - } - else - return false; - } - else - return false; -} - -bool PSTextureEncoder::InitDynamicMode() -{ - HRESULT hr; - - D3D_SHADER_MACRO macros[] = { - { "DYNAMIC_MODE", nullptr }, - { nullptr, nullptr } - }; - - D3DBlob* bytecode = nullptr; - if (!D3D::CompilePixelShader(EFB_ENCODE_PS, &bytecode, macros)) - { - ERROR_LOG(VIDEO, "EFB encode pixel shader failed to compile"); - return false; - } - - hr = D3D::device->CreateClassLinkage(&m_classLinkage); - CHECK(SUCCEEDED(hr), "create efb encode class linkage"); - D3D::SetDebugObjectName(m_classLinkage, "efb encoder class linkage"); - - hr = D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), m_classLinkage, &m_dynamicShader); - CHECK(SUCCEEDED(hr), "create efb encode pixel shader"); - D3D::SetDebugObjectName(m_dynamicShader, "efb encoder pixel shader"); - - // Use D3DReflect - - ID3D11ShaderReflection* reflect = nullptr; - hr = PD3DReflect(bytecode->Data(), bytecode->Size(), IID_ID3D11ShaderReflection, (void**)&reflect); - CHECK(SUCCEEDED(hr), "reflect on efb encoder shader"); - - // Get number of slots and create dynamic linkage array - - UINT numSlots = reflect->GetNumInterfaceSlots(); - m_linkageArray.resize(numSlots, nullptr); - - // Get interface slots - - ID3D11ShaderReflectionVariable* var = reflect->GetVariableByName("g_fetch"); - m_fetchSlot = var->GetInterfaceSlot(0); - - var = reflect->GetVariableByName("g_scaledFetch"); - m_scaledFetchSlot = var->GetInterfaceSlot(0); - - var = reflect->GetVariableByName("g_intensity"); - m_intensitySlot = var->GetInterfaceSlot(0); - - var = reflect->GetVariableByName("g_generator"); - m_generatorSlot = var->GetInterfaceSlot(0); - - INFO_LOG(VIDEO, "Fetch slot %d, scaledFetch slot %d, intensity slot %d, generator slot %d", - m_fetchSlot, m_scaledFetchSlot, m_intensitySlot, m_generatorSlot); - - // Class instances will be created at the time they are used - - for (size_t i = 0; i < 4; ++i) - m_fetchClass[i] = nullptr; - for (size_t i = 0; i < 2; ++i) - m_scaledFetchClass[i] = nullptr; - for (size_t i = 0; i < 2; ++i) - m_intensityClass[i] = nullptr; - for (size_t i = 0; i < 16; ++i) - m_generatorClass[i] = nullptr; - - reflect->Release(); - bytecode->Release(); - - return true; -} - -static const char* FETCH_CLASS_NAMES[4] = { - "cFetch_0", "cFetch_1", "cFetch_2", "cFetch_3" -}; - -static const char* SCALEDFETCH_CLASS_NAMES[2] = { - "cScaledFetch_0", "cScaledFetch_1" -}; - -static const char* INTENSITY_CLASS_NAMES[2] = { - "cIntensity_0", "cIntensity_1" -}; - -bool PSTextureEncoder::SetDynamicShader(unsigned int dstFormat, - PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf) -{ - size_t fetchNum = static_cast(srcFormat); - size_t scaledFetchNum = scaleByHalf ? 1 : 0; - size_t intensityNum = isIntensity ? 1 : 0; - size_t generatorNum = dstFormat; - - // FIXME: Not all the possible generators are available as classes yet. - // When dynamic mode is usable, implement them. - const char* generatorName = nullptr; - switch (generatorNum) - { - case 0x4: generatorName = "cGenerator_4"; break; - case 0x5: generatorName = "cGenerator_5"; break; - case 0x6: generatorName = "cGenerator_6"; break; - case 0x8: generatorName = "cGenerator_8"; break; - case 0xB: generatorName = "cGenerator_B"; break; - default: - WARN_LOG(VIDEO, "No generator available for dst format 0x%X; aborting", generatorNum); - return false; - } - - // Make sure class instances are available - if (!m_fetchClass[fetchNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - FETCH_CLASS_NAMES[fetchNum], dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - FETCH_CLASS_NAMES[fetchNum], 0, 0, 0, 0, &m_fetchClass[fetchNum]); - CHECK(SUCCEEDED(hr), "create fetch class instance"); - } - if (!m_scaledFetchClass[scaledFetchNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - SCALEDFETCH_CLASS_NAMES[scaledFetchNum], dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - SCALEDFETCH_CLASS_NAMES[scaledFetchNum], 0, 0, 0, 0, - &m_scaledFetchClass[scaledFetchNum]); - CHECK(SUCCEEDED(hr), "create scaled fetch class instance"); - } - if (!m_intensityClass[intensityNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - INTENSITY_CLASS_NAMES[intensityNum], dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - INTENSITY_CLASS_NAMES[intensityNum], 0, 0, 0, 0, - &m_intensityClass[intensityNum]); - CHECK(SUCCEEDED(hr), "create intensity class instance"); - } - if (!m_generatorClass[generatorNum]) - { - INFO_LOG(VIDEO, "Creating %s class instance for encoder 0x%X", - generatorName, dstFormat); - HRESULT hr = m_classLinkage->CreateClassInstance( - generatorName, 0, 0, 0, 0, &m_generatorClass[generatorNum]); - CHECK(SUCCEEDED(hr), "create generator class instance"); - } - - // Assemble dynamic linkage array - if (m_fetchSlot != UINT(-1)) - m_linkageArray[m_fetchSlot] = m_fetchClass[fetchNum]; - if (m_scaledFetchSlot != UINT(-1)) - m_linkageArray[m_scaledFetchSlot] = m_scaledFetchClass[scaledFetchNum]; - if (m_intensitySlot != UINT(-1)) - m_linkageArray[m_intensitySlot] = m_intensityClass[intensityNum]; - if (m_generatorSlot != UINT(-1)) - m_linkageArray[m_generatorSlot] = m_generatorClass[generatorNum]; - - D3D::stateman->SetPixelShaderDynamic(m_dynamicShader, - m_linkageArray.empty() ? nullptr : &m_linkageArray[0], - (UINT)m_linkageArray.size()); - - return true; + return it->second; } } diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h index c14020acfe..21c927c6e5 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h @@ -24,9 +24,7 @@ namespace DX11 class PSTextureEncoder : public TextureEncoder { - public: - PSTextureEncoder(); void Init(); @@ -36,25 +34,14 @@ public: bool isIntensity, bool scaleByHalf); private: - bool m_ready; ID3D11Texture2D* m_out; ID3D11RenderTargetView* m_outRTV; ID3D11Texture2D* m_outStage; ID3D11Buffer* m_encodeParams; - ID3D11Buffer* m_quad; - ID3D11VertexShader* m_vShader; - ID3D11InputLayout* m_quadLayout; - ID3D11BlendState* m_efbEncodeBlendState; - ID3D11DepthStencilState* m_efbEncodeDepthState; - ID3D11RasterizerState* m_efbEncodeRastState; - ID3D11SamplerState* m_efbSampler; - // Stuff only used in static-linking mode (SM4.0-compatible) - - bool InitStaticMode(); - bool SetStaticShader(unsigned int dstFormat, + ID3D11PixelShader* SetStaticShader(unsigned int dstFormat, PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf); typedef unsigned int ComboKey; // Key for a shader combination @@ -69,35 +56,6 @@ private: typedef std::map ComboMap; ComboMap m_staticShaders; - - // Stuff only used for dynamic-linking mode (SM5.0+, available as soon as - // Microsoft fixes their bloody HLSL compiler) - - bool InitDynamicMode(); - bool SetDynamicShader(unsigned int dstFormat, - PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf); - - ID3D11PixelShader* m_dynamicShader; - ID3D11ClassLinkage* m_classLinkage; - - // Interface slots - UINT m_fetchSlot; - UINT m_scaledFetchSlot; - UINT m_intensitySlot; - UINT m_generatorSlot; - - // Class instances - // Fetch: 0 is RGB, 1 is RGBA, 2 is RGB565, 3 is Z - ID3D11ClassInstance* m_fetchClass[4]; - // ScaledFetch: 0 is off, 1 is on - ID3D11ClassInstance* m_scaledFetchClass[2]; - // Intensity: 0 is off, 1 is on - ID3D11ClassInstance* m_intensityClass[2]; - // Generator: one for each dst format, 16 total - ID3D11ClassInstance* m_generatorClass[16]; - - std::vector m_linkageArray; - }; } diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 8734eb42c3..3e09113518 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -70,21 +70,24 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, " out vec4 ocol0;\n"); WRITE(p, "void main()\n"); + WRITE(p, "{\n" + " int2 sampleUv;\n" + " int2 uv1 = int2(gl_FragCoord.xy);\n" + ); } else // D3D { - WRITE(p,"sampler samp0 : register(s0);\n"); + WRITE(p, "sampler samp0 : register(s0);\n"); WRITE(p, "Texture2D Tex0 : register(t0);\n"); - WRITE(p,"void main(\n"); - WRITE(p," out float4 ocol0 : SV_Target)\n"); + WRITE(p, "void main(\n"); + WRITE(p, " out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n"); + WRITE(p, "{\n" + " int2 sampleUv;\n" + " int2 uv1 = int2((rawpos + 1) / 2 * float2(640, 528));\n" + ); } - WRITE(p, "{\n" - " int2 sampleUv;\n" - " int2 uv1 = int2(gl_FragCoord.xy);\n" - ); - WRITE(p, " int y_block_position = uv1.y & %d;\n", ~(blkH - 1)); WRITE(p, " int y_offset_in_block = uv1.y & %d;\n", blkH - 1); WRITE(p, " int x_virtual_position = (uv1.x << %d) + y_offset_in_block * position.z;\n", IntLog2(samples)); @@ -116,9 +119,18 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType) { - WRITE(p, " %s = texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n", - dest, xoffset, colorComp - ); + if (ApiType == API_OPENGL) + { + WRITE(p, " %s = texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n", + dest, xoffset, colorComp + ); + } + else + { + WRITE(p, " %s = Tex0.Sample(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n", + dest, xoffset, colorComp + ); + } } static void WriteColorToIntensity(char*& p, const char* src, const char* dest) @@ -134,6 +146,7 @@ static void WriteColorToIntensity(char*& p, const char* src, const char* dest) static void WriteToBitDepth(char*& p, u8 depth, const char* src, const char* dest) { + //WRITE(p, " ocol0 = float4(1,1,1,1;\n"); WRITE(p, " %s = floor(%s * 255.0 / exp2(8.0 - %d.0));\n", dest, src, depth); } From 6c1bdfe04cd12cbc75c86d6b71472393443426a5 Mon Sep 17 00:00:00 2001 From: magumagu Date: Sun, 25 Jan 2015 17:22:29 -0800 Subject: [PATCH 02/10] More work. --- .../Core/VideoBackends/D3D/PSTextureEncoder.cpp | 15 ++++++++------- .../Core/VideoCommon/TextureConversionShader.cpp | 5 ++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index 96f90bb870..492d67c13e 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -40,15 +40,15 @@ void PSTextureEncoder::Init() // Create output texture RGBA format D3D11_TEXTURE2D_DESC t2dd = CD3D11_TEXTURE2D_DESC( - DXGI_FORMAT_R8G8B8A8_UINT, - EFB_WIDTH, EFB_HEIGHT, 1, 1, D3D11_BIND_RENDER_TARGET); + DXGI_FORMAT_B8G8R8A8_UNORM, + EFB_WIDTH * 4, EFB_HEIGHT / 4, 1, 1, D3D11_BIND_RENDER_TARGET); hr = D3D::device->CreateTexture2D(&t2dd, nullptr, &m_out); CHECK(SUCCEEDED(hr), "create efb encode output texture"); D3D::SetDebugObjectName(m_out, "efb encoder output texture"); // Create output render target view D3D11_RENDER_TARGET_VIEW_DESC rtvd = CD3D11_RENDER_TARGET_VIEW_DESC(m_out, - D3D11_RTV_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UINT); + D3D11_RTV_DIMENSION_TEXTURE2D, DXGI_FORMAT_B8G8R8A8_UNORM); hr = D3D::device->CreateRenderTargetView(m_out, &rtvd, &m_outRTV); CHECK(SUCCEEDED(hr), "create efb encode output render target view"); D3D::SetDebugObjectName(m_outRTV, "efb encoder output rtv"); @@ -135,7 +135,7 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, // Set up all the state for EFB encoding { - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow*2*4), FLOAT(numBlocksY*4)); + D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow * 8), FLOAT(numBlocksY)); D3D::context->RSSetViewports(1, &vp); EFBRectangle fullSrcRect; @@ -156,11 +156,11 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, EFBEncodeParams params; params.SrcLeft = correctSrc.left; - params.SrcLeft = correctSrc.top; + params.SrcTop = correctSrc.top; params.DestWidth = actualWidth; params.ScaleFactor = scaleByHalf ? 2 : 1; D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, ¶ms, 0, 0); - D3D::stateman->SetPixelConstants(0, m_encodeParams); + D3D::stateman->SetPixelConstants(m_encodeParams); D3D::drawShadedTexQuad(pEFB, targetRect.AsRECT(), @@ -171,7 +171,7 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, VertexShaderCache::GetSimpleInputLayout()); // Copy to staging buffer - D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow*2*4, numBlocksY*4, 1); + D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow * 8, numBlocksY, 1); D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox); // Transfer staging buffer to GameCube/Wii RAM @@ -182,6 +182,7 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, u8* src = (u8*)map.pData; for (unsigned int y = 0; y < numBlocksY; ++y) { + _assert_msg_(POWERPC, map.RowPitch >= cacheLinesPerRow * 32, ""); memcpy(dst, src, cacheLinesPerRow*32); dst += bpmem.copyMipMapStrideChannels*32; src += map.RowPitch; diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 3e09113518..e7600d6248 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -84,7 +84,7 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, " out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n"); WRITE(p, "{\n" " int2 sampleUv;\n" - " int2 uv1 = int2((rawpos + 1) / 2 * float2(640, 528));\n" + " int2 uv1 = int2(rawpos.xy);\n" ); } @@ -127,7 +127,7 @@ static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, } else { - WRITE(p, " %s = Tex0.Sample(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n", + WRITE(p, " %s = Tex0.Sample(samp0, uv0 + float2(%d, 0) * sample_offset).%s;\n", dest, xoffset, colorComp ); } @@ -146,7 +146,6 @@ static void WriteColorToIntensity(char*& p, const char* src, const char* dest) static void WriteToBitDepth(char*& p, u8 depth, const char* src, const char* dest) { - //WRITE(p, " ocol0 = float4(1,1,1,1;\n"); WRITE(p, " %s = floor(%s * 255.0 / exp2(8.0 - %d.0));\n", dest, src, depth); } From b0b99b69229c1c76c6eca51583a47090bd2e383c Mon Sep 17 00:00:00 2001 From: magumagu Date: Sun, 25 Jan 2015 19:48:29 -0800 Subject: [PATCH 03/10] Fix shader so it's possible to use with D3D Map(). Well, that's not strictly true, but trying to memcpy between two buffers using different row lengths and different strides is at minimum extremely unintuitive. --- .../VideoCommon/TextureConversionShader.cpp | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index e7600d6248..ebb3b065fd 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -88,21 +88,19 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) ); } - WRITE(p, " int y_block_position = uv1.y & %d;\n", ~(blkH - 1)); - WRITE(p, " int y_offset_in_block = uv1.y & %d;\n", blkH - 1); - WRITE(p, " int x_virtual_position = (uv1.x << %d) + y_offset_in_block * position.z;\n", IntLog2(samples)); - WRITE(p, " int x_block_position = (x_virtual_position >> %d) & %d;\n", IntLog2(blkH), ~(blkW - 1)); + WRITE(p, " int x_block_position = (uv1.x >> %d) << %d;\n", IntLog2(blkH * blkW / samples), IntLog2(blkW)); + WRITE(p, " int y_block_position = uv1.y << %d;\n", IntLog2(blkH)); + WRITE(p, " int offset_in_block = uv1.x & %d;\n", (blkH * blkW / samples) - 1); + WRITE(p, " int y_offset_in_block = offset_in_block >> %d;\n", IntLog2(blkW / samples)); + WRITE(p, " int x_offset_in_block = (offset_in_block & %d) << %d;\n", (blkW / samples) - 1, IntLog2(samples)); if (samples == 1) { - // 32 bit textures (RGBA8 and Z24) are stored in 2 cache line increments - WRITE(p, " bool first = 0 == (x_virtual_position & %d);\n", 8 * samples); // first cache line, used in the encoders - WRITE(p, " x_virtual_position = x_virtual_position << 1;\n"); + WRITE(p, " bool first = 0 == (x_block_position & %d);\n", blkW); + WRITE(p, " x_block_position >>= 1;\n", blkW); } - WRITE(p, " int x_offset_in_block = x_virtual_position & %d;\n", blkW - 1); - WRITE(p, " int y_offset = (x_virtual_position >> %d) & %d;\n", IntLog2(blkW), blkH - 1); - WRITE(p, " sampleUv.x = x_offset_in_block + x_block_position;\n"); - WRITE(p, " sampleUv.y = y_block_position + y_offset;\n"); + WRITE(p, " sampleUv.x = x_block_position + x_offset_in_block;\n"); + WRITE(p, " sampleUv.y = y_block_position + y_offset_in_block;\n"); WRITE(p, " float2 uv0 = float2(sampleUv);\n"); // sampleUv is the sample position in (int)gx_coords WRITE(p, " uv0 += float2(0.5, 0.5);\n"); // move to center of pixel From cb5d3fce4f3f79acca6b1b869b38eb5b5d5cd28a Mon Sep 17 00:00:00 2001 From: magumagu Date: Sun, 25 Jan 2015 21:20:25 -0800 Subject: [PATCH 04/10] Fix stupid mistake. --- Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index 492d67c13e..35f8ed7159 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -20,8 +20,8 @@ namespace DX11 struct EFBEncodeParams { - DWORD SrcTop; DWORD SrcLeft; + DWORD SrcTop; DWORD DestWidth; DWORD ScaleFactor; }; From 1ee09ced0af722d3c7394560a7afdb94c92a0c05 Mon Sep 17 00:00:00 2001 From: magumagu Date: Sun, 25 Jan 2015 21:38:30 -0800 Subject: [PATCH 05/10] Fix OpenGL coordinate computation. --- Source/Core/VideoBackends/OGL/TextureConverter.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp index 6384991ceb..5edeebd50b 100644 --- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp @@ -320,14 +320,16 @@ int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer, source.left, source.top, expandedWidth, bScaleByHalf ? 2 : 1); - int cacheBytes = 32; + unsigned int numBlocksX = expandedWidth / TexDecoder_GetBlockWidthInTexels(format); + unsigned int numBlocksY = expandedHeight / TexDecoder_GetBlockHeightInTexels(format); + unsigned int cacheLinesPerRow; if ((format & 0x0f) == 6) - cacheBytes = 64; + cacheLinesPerRow = numBlocksX * 2; + else + cacheLinesPerRow = numBlocksX; - int readStride = (expandedWidth * cacheBytes) / - TexDecoder_GetBlockWidthInTexels(format); EncodeToRamUsingShader(source_texture, - dest_ptr, expandedWidth / samples, expandedHeight, readStride, + dest_ptr, cacheLinesPerRow * 8, numBlocksY, cacheLinesPerRow * 32, bScaleByHalf > 0 && !bFromZBuffer); return size_in_bytes; // TODO: D3D11 is calculating this value differently! From 92189823f3d07045431191267c807cf898a066b5 Mon Sep 17 00:00:00 2001 From: magumagu Date: Sun, 25 Jan 2015 22:53:30 -0800 Subject: [PATCH 06/10] Fix RGBA8 encoding. --- Source/Core/VideoCommon/TextureConversionShader.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index ebb3b065fd..1c56621dc2 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -90,14 +90,15 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, " int x_block_position = (uv1.x >> %d) << %d;\n", IntLog2(blkH * blkW / samples), IntLog2(blkW)); WRITE(p, " int y_block_position = uv1.y << %d;\n", IntLog2(blkH)); + if (samples == 1) + { + // With samples == 1, we write out pairs of blocks; one A8R8, one G8B8. + WRITE(p, " bool first = !(uv1.x & %d);\n", blkH * blkW / 2); + samples = 2; + } WRITE(p, " int offset_in_block = uv1.x & %d;\n", (blkH * blkW / samples) - 1); WRITE(p, " int y_offset_in_block = offset_in_block >> %d;\n", IntLog2(blkW / samples)); WRITE(p, " int x_offset_in_block = (offset_in_block & %d) << %d;\n", (blkW / samples) - 1, IntLog2(samples)); - if (samples == 1) - { - WRITE(p, " bool first = 0 == (x_block_position & %d);\n", blkW); - WRITE(p, " x_block_position >>= 1;\n", blkW); - } WRITE(p, " sampleUv.x = x_block_position + x_offset_in_block;\n"); WRITE(p, " sampleUv.y = y_block_position + y_offset_in_block;\n"); From cb0573012778b319db372f95d88dbea9c2a7d31e Mon Sep 17 00:00:00 2001 From: magumagu Date: Sun, 25 Jan 2015 23:05:23 -0800 Subject: [PATCH 07/10] Use linear sampling in ScaleByHalf mode. --- Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp | 6 ++++++ Source/Core/VideoBackends/D3D/TextureCache.cpp | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index 35f8ed7159..3e4db43af9 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -162,6 +162,12 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, ¶ms, 0, 0); D3D::stateman->SetPixelConstants(m_encodeParams); + // Use linear filtering if (bScaleByHalf), use point filtering otherwise + if (scaleByHalf) + D3D::SetLinearCopySampler(); + else + D3D::SetPointCopySampler(); + D3D::drawShadedTexQuad(pEFB, targetRect.AsRECT(), Renderer::GetTargetWidth(), diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp index aab90e2560..086ec5ab5c 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp @@ -191,7 +191,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo else if (!TextureCache::Find(addr, hash)) TextureCache::MakeRangeDynamic(addr, (u32)encoded_size); - this->hash = hash; + this->hash = 0; } } From 33259c272b3b03bd4d30ce116b1350ad102e39af Mon Sep 17 00:00:00 2001 From: magumagu Date: Sun, 25 Jan 2015 23:11:36 -0800 Subject: [PATCH 08/10] Remove some debugging junk. --- Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp | 1 - Source/Core/VideoBackends/D3D/TextureCache.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index 3e4db43af9..00f7236c40 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -188,7 +188,6 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, u8* src = (u8*)map.pData; for (unsigned int y = 0; y < numBlocksY; ++y) { - _assert_msg_(POWERPC, map.RowPitch >= cacheLinesPerRow * 32, ""); memcpy(dst, src, cacheLinesPerRow*32); dst += bpmem.copyMipMapStrideChannels*32; src += map.RowPitch; diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp index 086ec5ab5c..aab90e2560 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp @@ -191,7 +191,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo else if (!TextureCache::Find(addr, hash)) TextureCache::MakeRangeDynamic(addr, (u32)encoded_size); - this->hash = 0; + this->hash = hash; } } From b56025e6eb30034eb522049355c3547e0e513ed2 Mon Sep 17 00:00:00 2001 From: magumagu Date: Sun, 25 Jan 2015 23:28:39 -0800 Subject: [PATCH 09/10] Don't use boolean negation. --- Source/Core/VideoCommon/TextureConversionShader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 1c56621dc2..ab7ab643d1 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -93,7 +93,7 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) if (samples == 1) { // With samples == 1, we write out pairs of blocks; one A8R8, one G8B8. - WRITE(p, " bool first = !(uv1.x & %d);\n", blkH * blkW / 2); + WRITE(p, " bool first = (uv1.x & %d) == 0;\n", blkH * blkW / 2); samples = 2; } WRITE(p, " int offset_in_block = uv1.x & %d;\n", (blkH * blkW / samples) - 1); From 9dbb9bf3b558416dd2c7c61766ec9ce5e841a510 Mon Sep 17 00:00:00 2001 From: magumagu Date: Sun, 25 Jan 2015 23:32:32 -0800 Subject: [PATCH 10/10] Make sure EFB2RAM buffer is wide enough for new coordinate system. --- Source/Core/VideoBackends/OGL/TextureConverter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp index 5edeebd50b..cc9f348743 100644 --- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp @@ -35,7 +35,7 @@ static GLuint s_texConvFrameBuffer[2] = {0,0}; static GLuint s_srcTexture = 0; // for decoding from RAM static GLuint s_dstTexture = 0; // for encoding to RAM -const int renderBufferWidth = 1024; +const int renderBufferWidth = EFB_WIDTH * 4; const int renderBufferHeight = 1024; static SHADER s_rgbToYuyvProgram;