diff --git a/Source/Core/VideoCommon/Src/VideoConfig.cpp b/Source/Core/VideoCommon/Src/VideoConfig.cpp index 6b6c796015..707aec734c 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.cpp +++ b/Source/Core/VideoCommon/Src/VideoConfig.cpp @@ -97,6 +97,7 @@ void VideoConfig::Load(const char *ini_file) iniFile.Get("Hacks", "EFBScaledCopy", &bCopyEFBScaled, true); iniFile.Get("Hacks", "EFBCopyCacheEnable", &bEFBCopyCacheEnable, false); iniFile.Get("Hacks", "EFBEmulateFormatChanges", &bEFBEmulateFormatChanges, false); + iniFile.Get("Hacks", "ForceDualSourceBlend", &bForceDualSourceBlend, false); iniFile.Get("Hardware", "Adapter", &iAdapter, 0); @@ -265,6 +266,7 @@ void VideoConfig::Save(const char *ini_file) iniFile.Set("Hacks", "EFBScaledCopy", bCopyEFBScaled); iniFile.Set("Hacks", "EFBCopyCacheEnable", bEFBCopyCacheEnable); iniFile.Set("Hacks", "EFBEmulateFormatChanges", bEFBEmulateFormatChanges); + iniFile.Set("Hacks", "ForceDualSourceBlend", bForceDualSourceBlend); iniFile.Set("Hardware", "Adapter", iAdapter); diff --git a/Source/Core/VideoCommon/Src/VideoConfig.h b/Source/Core/VideoCommon/Src/VideoConfig.h index b856683595..4a773d30e7 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.h +++ b/Source/Core/VideoCommon/Src/VideoConfig.h @@ -124,7 +124,8 @@ struct VideoConfig bool bEnablePixelLighting; bool bHackedBufferUpload; bool bFastDepthCalc; - + //for dx9-backend + bool bForceDualSourceBlend; int iLog; // CONF_ bits int iSaveTargetId; // TODO: Should be dropped diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PerfQuery.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PerfQuery.cpp index 8a1d5c59a9..fafd06e901 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PerfQuery.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PerfQuery.cpp @@ -35,6 +35,8 @@ void PerfQuery::DestroyDeviceObjects() void PerfQuery::EnableQuery(PerfQueryGroup type) { + if (!ShouldEmulate()) + return; // Is this sane? if (m_query_count > ARRAYSIZE(m_query_buffer) / 2) WeakFlush(); @@ -58,6 +60,8 @@ void PerfQuery::EnableQuery(PerfQueryGroup type) void PerfQuery::DisableQuery(PerfQueryGroup type) { + if (!ShouldEmulate()) + return; // stop query if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) { @@ -74,6 +78,8 @@ void PerfQuery::ResetQuery() u32 PerfQuery::GetQueryResult(PerfQueryType type) { + if (!ShouldEmulate()) + return 0; u32 result = 0; if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC) @@ -98,6 +104,8 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type) void PerfQuery::FlushOne() { + if (!ShouldEmulate()) + return; auto& entry = m_query_buffer[m_query_read_pos]; DWORD result = 0; @@ -118,12 +126,16 @@ void PerfQuery::FlushOne() // TODO: could selectively flush things, but I don't think that will do much void PerfQuery::FlushResults() { + if (!ShouldEmulate()) + return; while (!IsFlushed()) FlushOne(); } void PerfQuery::WeakFlush() { + if (!ShouldEmulate()) + return; while (!IsFlushed()) { auto& entry = m_query_buffer[m_query_read_pos]; @@ -148,6 +160,8 @@ void PerfQuery::WeakFlush() bool PerfQuery::IsFlushed() const { + if (!ShouldEmulate()) + return true; return 0 == m_query_count; } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index a4a788c7e3..ea6503e9e0 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -117,30 +117,30 @@ LPDIRECT3DPIXELSHADER9 PixelShaderCache::ReinterpRGB8ToRGBA6() /* old code here for reference const char code[] = { - "uniform sampler samp0 : register(s0);\n" - "void main(\n" - " out float4 ocol0 : COLOR0,\n" - " in float2 uv0 : TEXCOORD0){\n" - " ocol0 = tex2D(samp0,uv0);\n" - " float4 src8 = round(ocol0*255.f);\n" - " ocol0.r = floor(src8.r/4.f);\n" // dst6r = src8r>>2; - " ocol0.g = frac(src8.r/4.f)*4.f*16.f + floor(src8.g/16.f);\n" // dst6g = ((src8r&0x3)<<4)|(src8g>>4); - " ocol0.b = frac(src8.g/16.f)*16.f*4.f + floor(src8.b/64.f);\n" // dst6b = ((src8g&0xF)<<2)|(src8b>>6); - " ocol0.a = frac(src8.b/64.f)*64.f;\n" // dst6a = src8b&0x3F; - " ocol0 /= 63.f;\n" - "}\n" + "uniform sampler samp0 : register(s0);\n" + "void main(\n" + " out float4 ocol0 : COLOR0,\n" + " in float2 uv0 : TEXCOORD0){\n" + " ocol0 = tex2D(samp0,uv0);\n" + " float4 src8 = round(ocol0*255.f);\n" + " ocol0.r = floor(src8.r/4.f);\n" // dst6r = src8r>>2; + " ocol0.g = frac(src8.r/4.f)*4.f*16.f + floor(src8.g/16.f);\n" // dst6g = ((src8r&0x3)<<4)|(src8g>>4); + " ocol0.b = frac(src8.g/16.f)*16.f*4.f + floor(src8.b/64.f);\n" // dst6b = ((src8g&0xF)<<2)|(src8b>>6); + " ocol0.a = frac(src8.b/64.f)*64.f;\n" // dst6a = src8b&0x3F; + " ocol0 /= 63.f;\n" + "}\n" }; */ const char code[] = { "uniform sampler samp0 : register(s0);\n" "void main(\n" - "out float4 ocol0 : COLOR0,\n" - "in float2 uv0 : TEXCOORD0){\n" - "float4 temp1 = float4(1.0f/4.0f,1.0f/16.0f,1.0f/64.0f,0.0f);\n" - "float4 temp2 = float4(1.0f,64.0f,255.0f,1.0f/63.0f);\n" - "float4 src8 = round(tex2D(samp0,uv0)*temp2.z) * temp1;\n" - "ocol0 = (frac(src8.wxyz) * temp2.xyyy + floor(src8)) * temp2.w;\n" + "out float4 ocol0 : COLOR0,\n" + "in float2 uv0 : TEXCOORD0){\n" + "float4 temp1 = float4(1.0f/4.0f,1.0f/16.0f,1.0f/64.0f,0.0f);\n" + "float4 temp2 = float4(1.0f,64.0f,255.0f,1.0f/63.0f);\n" + "float4 src8 = round(tex2D(samp0,uv0)*temp2.z) * temp1;\n" + "ocol0 = (frac(src8.wxyz) * temp2.xyyy + floor(src8)) * temp2.w;\n" "}\n" }; if (!s_rgb8_to_rgba6) s_rgb8_to_rgba6 = D3D::CompileAndCreatePixelShader(code, (int)strlen(code)); @@ -168,26 +168,28 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv if(copyMatrixType == COPY_TYPE_MATRIXCOLOR) WRITE(p, "uniform float4 cColMatrix[7] : register(c%d);\n", C_COLORMATRIX); WRITE(p, "void main(\n" - "out float4 ocol0 : COLOR0,\n"); + "out float4 ocol0 : COLOR0,\n"); switch(SSAAMode % MAX_SSAA_SHADERS) { case 0: // 1 Sample WRITE(p, "in float2 uv0 : TEXCOORD0,\n" - "in float uv1 : TEXCOORD1){\n" - "float4 texcol = tex2D(samp0,uv0.xy);\n"); + "in float uv1 : TEXCOORD1){\n" + "float4 texcol = tex2D(samp0,uv0.xy);\n"); break; - case 1: // 1 Samples SSAA - WRITE(p, "in float2 uv0 : TEXCOORD0,\n" - "in float uv1 : TEXCOORD1){\n" - "float4 texcol = tex2D(samp0,uv0.xy);\n"); - break; - case 2: // 4 Samples SSAA + case 1: // 4 Samples in 4x SSAA buffer WRITE(p, "in float4 uv0 : TEXCOORD0,\n" - "in float uv1 : TEXCOORD1,\n" - "in float4 uv2 : TEXCOORD2,\n" - "in float4 uv3 : TEXCOORD3){\n" - "float4 texcol = (tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv3.wz))*0.25f;\n"); + "in float uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3){\n" + "float4 texcol = (tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv3.wz))*0.25f;\n"); + break; + case 2: // 4 Samples in 9x SSAA buffer + WRITE(p, "in float4 uv0 : TEXCOORD0,\n" + "in float uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3){\n" + "float4 texcol = (tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv3.wz))*0.25f;\n"); break; } @@ -196,7 +198,7 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv // Watch out for the fire fumes effect in Metroid it's really sensitive to this, // the lighting in RE0 is also way beyond sensitive since the "good value" is hardcoded and Dolphin is almost always off. WRITE(p, "float4 EncodedDepth = frac(texcol.r * (16777215.f/16777216.f) * float4(1.0f,256.0f,256.0f*256.0f,1.0f));\n" - "texcol = floor(EncodedDepth * float4(256.f,256.f,256.f,15.0f)) / float4(255.0f,255.0f,255.0f,15.0f);\n"); + "texcol = floor(EncodedDepth * float4(256.f,256.f,256.f,15.0f)) / float4(255.0f,255.0f,255.0f,15.0f);\n"); } else { @@ -217,7 +219,7 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv WRITE(p, "}\n"); if (text[sizeof(text) - 1] != 0x7C) PanicAlert("PixelShaderCache copy shader generator - buffer too small, canary has been eaten!"); - + uselocale(old_locale); // restore locale freelocale(locale); return D3D::CompileAndCreatePixelShader(text, (int)strlen(text)); @@ -231,10 +233,10 @@ void PixelShaderCache::Init() { char pprog[3072]; sprintf(pprog, "void main(\n" - "out float4 ocol0 : COLOR0,\n" - " in float4 incol0 : COLOR0){\n" - "ocol0 = incol0;\n" - "}\n"); + "out float4 ocol0 : COLOR0,\n" + " in float4 incol0 : COLOR0){\n" + "ocol0 = incol0;\n" + "}\n"); s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); } @@ -299,27 +301,27 @@ void PixelShaderCache::Shutdown() for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++) for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++) if(s_CopyProgram[copyMatrixType][depthType][ssaaMode] - && (copyMatrixType == 0 || s_CopyProgram[copyMatrixType][depthType][ssaaMode] != s_CopyProgram[copyMatrixType-1][depthType][ssaaMode])) + && (copyMatrixType == 0 || s_CopyProgram[copyMatrixType][depthType][ssaaMode] != s_CopyProgram[copyMatrixType-1][depthType][ssaaMode])) s_CopyProgram[copyMatrixType][depthType][ssaaMode]->Release(); - for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++) - for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++) - for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++) - s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL; + for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++) + for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++) + for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++) + s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL; - if (s_ClearProgram) s_ClearProgram->Release(); - s_ClearProgram = NULL; - if (s_rgb8_to_rgba6) s_rgb8_to_rgba6->Release(); - s_rgb8_to_rgba6 = NULL; - if (s_rgba6_to_rgb8) s_rgba6_to_rgb8->Release(); - s_rgba6_to_rgb8 = NULL; + if (s_ClearProgram) s_ClearProgram->Release(); + s_ClearProgram = NULL; + if (s_rgb8_to_rgba6) s_rgb8_to_rgba6->Release(); + s_rgb8_to_rgba6 = NULL; + if (s_rgba6_to_rgb8) s_rgba6_to_rgb8->Release(); + s_rgba6_to_rgb8 = NULL; - Clear(); - g_ps_disk_cache.Sync(); - g_ps_disk_cache.Close(); + Clear(); + g_ps_disk_cache.Sync(); + g_ps_disk_cache.Close(); - unique_shaders.clear(); + unique_shaders.clear(); } bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index 80bfa54c0f..bd4f9fed98 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -70,7 +70,7 @@ void SetupDeviceObjects() VertexShaderManager::Dirty(); PixelShaderManager::Dirty(); TextureConverter::Init(); - + // To avoid shader compilation stutters, read back all shaders from cache. VertexShaderCache::Init(); PixelShaderCache::Init(); @@ -122,7 +122,7 @@ Renderer::Renderer() fullScreenRes = 0; D3D::Create(g_ActiveConfig.iAdapter, EmuWindow::GetWnd(), - fullScreenRes, backbuffer_ms_mode, false); + fullScreenRes, backbuffer_ms_mode, false); IS_AMD = D3D::IsATIDevice(); @@ -165,7 +165,7 @@ Renderer::Renderer() vp.MaxZ = 1.0f; D3D::dev->SetViewport(&vp); D3D::dev->Clear(0, NULL, D3DCLEAR_TARGET, 0x0, 0, 0); - + D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorRTSurface()); D3D::dev->SetDepthStencilSurface(FramebufferManager::GetEFBDepthRTSurface()); vp.X = 0; @@ -189,7 +189,7 @@ Renderer::~Renderer() D3D::EndFrame(); D3D::Present(); D3D::Close(); - + delete[] st; } @@ -267,7 +267,7 @@ bool Renderer::CheckForResize() return true; } - + return false; } @@ -400,7 +400,7 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) colmat[0] = colmat[5] = colmat[10] = 1.0f; PixelShaderManager::SetColorMatrix(colmat); // set transformation LPDIRECT3DTEXTURE9 read_texture = FramebufferManager::GetEFBDepthTexture(); - + D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); D3DFORMAT bformat = FramebufferManager::GetEFBDepthRTSurfaceFormat(); @@ -484,10 +484,10 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) // TODO: Speed this up by batching pokes? ResetAPIState(); D3D::drawColorQuad(poke_data, - (float)RectToLock.left * 2.f / (float)Renderer::GetTargetWidth() - 1.f, - - (float)RectToLock.top * 2.f / (float)Renderer::GetTargetHeight() + 1.f, - (float)RectToLock.right * 2.f / (float)Renderer::GetTargetWidth() - 1.f, - - (float)RectToLock.bottom * 2.f / (float)Renderer::GetTargetHeight() + 1.f); + (float)RectToLock.left * 2.f / (float)Renderer::GetTargetWidth() - 1.f, + - (float)RectToLock.top * 2.f / (float)Renderer::GetTargetHeight() + 1.f, + (float)RectToLock.right * 2.f / (float)Renderer::GetTargetWidth() - 1.f, + - (float)RectToLock.bottom * 2.f / (float)Renderer::GetTargetHeight() + 1.f); RestoreAPIState(); return 0; } @@ -797,7 +797,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons // Prepare to copy the XFBs to our backbuffer D3D::dev->SetDepthStencilSurface(NULL); D3D::dev->SetRenderTarget(0, D3D::GetBackBufferSurface()); - + UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height); D3DVIEWPORT9 vp; @@ -856,7 +856,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons xfbSource = xfbSourceList[i]; MathUtil::Rectangle sourceRc; - + sourceRc.left = 0; sourceRc.top = 0; sourceRc.right = (float)xfbSource->texWidth; @@ -905,7 +905,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons Width,Height, PixelShaderCache::GetColorCopyProgram(g_ActiveConfig.iMultisampleMode), VertexShaderCache::GetSimpleVertexShader(g_ActiveConfig.iMultisampleMode),Gamma); - + } D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER); @@ -953,7 +953,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons { char msg [255]; sprintf_s(msg,255, "Dumping Frames to \"%sframedump0.avi\" (%dx%d RGB24)", - File::GetUserPath(D_DUMPFRAMES_IDX).c_str(), s_recordWidth, s_recordHeight); + File::GetUserPath(D_DUMPFRAMES_IDX).c_str(), s_recordWidth, s_recordHeight); OSD::AddMessage(msg, 2000); } } @@ -1059,7 +1059,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons s_LastAA = newAA; UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height); - + int SupersampleCoeficient = (s_LastAA % 3) + 1; s_LastEFBScale = g_ActiveConfig.iEFBScale; @@ -1081,6 +1081,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorRTSurface()); D3D::dev->SetDepthStencilSurface(FramebufferManager::GetEFBDepthRTSurface()); D3D::dev->Clear(0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0,0,0), 1.0f, 0); + SetLineWidth(); } if (XFBWrited) @@ -1141,8 +1142,8 @@ void Renderer::RestoreState() D3D::RefreshRenderState(D3DRS_ZFUNC); } // TODO: Enable this code. Caused glitches for me however (neobrain) -// for (unsigned int i = 0; i < 8; ++i) -// D3D::dev->SetTexture(i, NULL); + // for (unsigned int i = 0; i < 8; ++i) + // D3D::dev->SetTexture(i, NULL); } // ALWAYS call RestoreAPIState for each ResetAPIState call you're doing @@ -1316,9 +1317,8 @@ void Renderer::SetLineWidth() { // We can't change line width in D3D unless we use ID3DXLine float fratio = xfregs.viewport.wd != 0 ? Renderer::EFBToScaledXf(1.f) : 1.0f; - float psize = bpmem.lineptwidth.linesize * fratio / 6.0f; - //little hack to compensate scaling problems in dx9 must be taken out when scaling is fixed. - psize *= 2.0f; + float psize = bpmem.lineptwidth.pointsize * fratio / 6.0f; + psize = psize > 0 ? psize : 1.0; if (psize > m_fMaxPointSize) { psize = m_fMaxPointSize; @@ -1348,7 +1348,7 @@ void Renderer::SetSamplerState(int stage, int texindex) const FourTexUnits &tex = bpmem.tex[texindex]; const TexMode0 &tm0 = tex.texMode0[stage]; const TexMode1 &tm1 = tex.texMode1[stage]; - + D3DTEXTUREFILTERTYPE min, mag, mip; if (g_ActiveConfig.bForceFiltering) { @@ -1370,7 +1370,7 @@ void Renderer::SetSamplerState(int stage, int texindex) D3D::SetSamplerState(stage, D3DSAMP_MINFILTER, min); D3D::SetSamplerState(stage, D3DSAMP_MAGFILTER, mag); D3D::SetSamplerState(stage, D3DSAMP_MIPFILTER, mip); - + D3D::SetSamplerState(stage, D3DSAMP_ADDRESSU, d3dClamps[tm0.wrap_s]); D3D::SetSamplerState(stage, D3DSAMP_ADDRESSV, d3dClamps[tm0.wrap_t]); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp index adb4ddf580..dc79d68def 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp @@ -37,15 +37,15 @@ inline void DumpBadShaders() { #if defined(_DEBUG) || defined(DEBUGFAST) // TODO: Reimplement! -/* std::string error_shaders; + /* std::string error_shaders; error_shaders.append(VertexShaderCache::GetCurrentShaderCode()); error_shaders.append(PixelShaderCache::GetCurrentShaderCode()); char filename[512] = "bad_shader_combo_0.txt"; int which = 0; while (File::Exists(filename)) { - which++; - sprintf(filename, "bad_shader_combo_%i.txt", which); + which++; + sprintf(filename, "bad_shader_combo_%i.txt", which); } File::WriteStringToFile(true, error_shaders, filename); PanicAlert("DrawIndexedPrimitiveUP failed. Shaders written to %s", filename);*/ @@ -64,7 +64,7 @@ void VertexManager::CreateDeviceObjects() m_index_buffer_size = (IBUFFER_SIZE > DeviceCaps.MaxVertexIndex) ? DeviceCaps.MaxVertexIndex : IBUFFER_SIZE; //if device caps are not enough for Vbuffer fall back to vertex arrays if (m_index_buffer_size < MAXIBUFFERSIZE || m_vertex_buffer_size < MAXVBUFFERSIZE) return; - + m_vertex_buffers = new LPDIRECT3DVERTEXBUFFER9[MAX_VBUFFER_COUNT]; m_index_buffers = new LPDIRECT3DINDEXBUFFER9[MAX_VBUFFER_COUNT]; @@ -144,59 +144,61 @@ void VertexManager::PrepareDrawBuffers(u32 stride) int datasize = IndexGenerator::GetNumVerts() * stride; int TdataSize = IndexGenerator::GetTriangleindexLen(); int LDataSize = IndexGenerator::GetLineindexLen(); - int PDataSize = IndexGenerator::GetPointindexLen(); int IndexDataSize = TdataSize + LDataSize; - DWORD LockMode = D3DLOCK_NOOVERWRITE; - m_vertex_buffer_cursor--; - m_vertex_buffer_cursor = m_vertex_buffer_cursor - (m_vertex_buffer_cursor % stride) + stride; - if (m_vertex_buffer_cursor > m_vertex_buffer_size - datasize) + if(IndexDataSize) { - LockMode = D3DLOCK_DISCARD; - m_vertex_buffer_cursor = 0; - m_current_vertex_buffer = (m_current_vertex_buffer + 1) % m_buffers_count; - } - if(FAILED(m_vertex_buffers[m_current_vertex_buffer]->Lock(m_vertex_buffer_cursor, datasize,(VOID**)(&pVertices), LockMode))) - { - DestroyDeviceObjects(); - return; - } - memcpy(pVertices, s_pBaseBufferPointer, datasize); - m_vertex_buffers[m_current_vertex_buffer]->Unlock(); + DWORD LockMode = D3DLOCK_NOOVERWRITE; + m_vertex_buffer_cursor--; + m_vertex_buffer_cursor = m_vertex_buffer_cursor - (m_vertex_buffer_cursor % stride) + stride; + if (m_vertex_buffer_cursor > m_vertex_buffer_size - datasize) + { + LockMode = D3DLOCK_DISCARD; + m_vertex_buffer_cursor = 0; + m_current_vertex_buffer = (m_current_vertex_buffer + 1) % m_buffers_count; + } + if(FAILED(m_vertex_buffers[m_current_vertex_buffer]->Lock(m_vertex_buffer_cursor, datasize,(VOID**)(&pVertices), LockMode))) + { + DestroyDeviceObjects(); + return; + } + memcpy(pVertices, s_pBaseBufferPointer, datasize); + m_vertex_buffers[m_current_vertex_buffer]->Unlock(); - LockMode = D3DLOCK_NOOVERWRITE; - if (m_index_buffer_cursor > m_index_buffer_size - IndexDataSize) - { - LockMode = D3DLOCK_DISCARD; - m_index_buffer_cursor = 0; - m_current_index_buffer = (m_current_index_buffer + 1) % m_buffers_count; - } - - if(FAILED(m_index_buffers[m_current_index_buffer]->Lock(m_index_buffer_cursor * sizeof(u16), IndexDataSize * sizeof(u16), (VOID**)(&pIndices), LockMode ))) - { - DestroyDeviceObjects(); - return; + LockMode = D3DLOCK_NOOVERWRITE; + if (m_index_buffer_cursor > m_index_buffer_size - IndexDataSize) + { + LockMode = D3DLOCK_DISCARD; + m_index_buffer_cursor = 0; + m_current_index_buffer = (m_current_index_buffer + 1) % m_buffers_count; + } + + if(FAILED(m_index_buffers[m_current_index_buffer]->Lock(m_index_buffer_cursor * sizeof(u16), IndexDataSize * sizeof(u16), (VOID**)(&pIndices), LockMode ))) + { + DestroyDeviceObjects(); + return; + } + if(TdataSize) + { + memcpy(pIndices, GetTriangleIndexBuffer(), TdataSize * sizeof(u16)); + pIndices += TdataSize; + } + if(LDataSize) + { + memcpy(pIndices, GetLineIndexBuffer(), LDataSize * sizeof(u16)); + pIndices += LDataSize; + } + m_index_buffers[m_current_index_buffer]->Unlock(); } - if(TdataSize) - { - memcpy(pIndices, GetTriangleIndexBuffer(), TdataSize * sizeof(u16)); - pIndices += TdataSize; - } - if(LDataSize) - { - memcpy(pIndices, GetLineIndexBuffer(), LDataSize * sizeof(u16)); - pIndices += LDataSize; - } - m_index_buffers[m_current_index_buffer]->Unlock(); if(m_current_stride != stride || m_vertex_buffer_cursor == 0) { m_current_stride = stride; - D3D::SetStreamSource( 0, m_vertex_buffers[m_current_vertex_buffer], 0, stride); + D3D::SetStreamSource( 0, m_vertex_buffers[m_current_vertex_buffer], 0, m_current_stride); } if (m_index_buffer_cursor == 0) { D3D::SetIndices(m_index_buffers[m_current_index_buffer]); } - + ADDSTAT(stats.thisFrame.bytesVertexStreamed, datasize); ADDSTAT(stats.thisFrame.bytesIndexStreamed, IndexDataSize); } @@ -241,22 +243,28 @@ void VertexManager::DrawVertexBuffer(int stride) } if (points > 0) { - //DrawIndexedPrimitive does not support point list so we have to draw the points one by one - for (int i = 0; i < points; i++) + //DrawIndexedPrimitive does not support point list so we have to draw them using DrawPrimitive + u16* PointIndexBuffer = GetPointIndexBuffer(); + int i = 0; + do { + int count = i + 1; + while (count < points && PointIndexBuffer[count - 1] + 1 == PointIndexBuffer[count]) + { + count++; + } if (FAILED(D3D::dev->DrawPrimitive( - D3DPT_POINTLIST, - basevertex + GetPointIndexBuffer()[i], - 1))) + D3DPT_POINTLIST, + basevertex + PointIndexBuffer[i], + count - i))) { DumpBadShaders(); } INCSTAT(stats.thisFrame.numDrawCalls); - } - - + i = count; + } while (i < points); } - + } void VertexManager::DrawVertexArray(int stride) @@ -351,7 +359,7 @@ void VertexManager::vFlush() PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components); u32 stride = g_nativeVertexFmt->GetVertexStride(); bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && - bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24; + bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24; bool useDualSource = useDstAlpha && g_ActiveConfig.backend_info.bSupportsDualSourceBlend; DSTALPHA_MODE AlphaMode = useDualSource ? DSTALPHA_DUAL_SOURCE_BLEND : DSTALPHA_NONE; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index 2d11368a6d..7c53bdfe23 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -59,72 +59,76 @@ void VertexShaderCache::Init() { char* vProg = new char[2048]; sprintf(vProg,"struct VSOUTPUT\n" - "{\n" - "float4 vPosition : POSITION;\n" - "float2 vTexCoord : TEXCOORD0;\n" - "float vTexCoord1 : TEXCOORD1;\n" - "};\n" - "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float inTEX2 : TEXCOORD2)\n" - "{\n" - "VSOUTPUT OUT;\n" - "OUT.vPosition = inPosition;\n" - "OUT.vTexCoord = inTEX0;\n" - "OUT.vTexCoord1 = inTEX2;\n" - "return OUT;\n" - "}\n"); + "{\n" + "float4 vPosition : POSITION;\n" + "float2 vTexCoord : TEXCOORD0;\n" + "float vTexCoord1 : TEXCOORD1;\n" + "};\n" + "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float inTEX2 : TEXCOORD2)\n" + "{\n" + "VSOUTPUT OUT;\n" + "OUT.vPosition = inPosition;\n" + "OUT.vTexCoord = inTEX0;\n" + "OUT.vTexCoord1 = inTEX2;\n" + "return OUT;\n" + "}\n"); SimpleVertexShader[0] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg)); sprintf(vProg,"struct VSOUTPUT\n" - "{\n" - "float4 vPosition : POSITION;\n" - "float4 vColor0 : COLOR0;\n" - "};\n" - "VSOUTPUT main(float4 inPosition : POSITION,float4 inColor0: COLOR0)\n" - "{\n" - "VSOUTPUT OUT;\n" - "OUT.vPosition = inPosition;\n" - "OUT.vColor0 = inColor0;\n" - "return OUT;\n" - "}\n"); + "{\n" + "float4 vPosition : POSITION;\n" + "float4 vColor0 : COLOR0;\n" + "};\n" + "VSOUTPUT main(float4 inPosition : POSITION,float4 inColor0: COLOR0)\n" + "{\n" + "VSOUTPUT OUT;\n" + "OUT.vPosition = inPosition;\n" + "OUT.vColor0 = inColor0;\n" + "return OUT;\n" + "}\n"); ClearVertexShader = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg)); sprintf(vProg, "struct VSOUTPUT\n" - "{\n" - "float4 vPosition : POSITION;\n" - "float2 vTexCoord : TEXCOORD0;\n" - "float vTexCoord1 : TEXCOORD1;\n" - "};\n" - "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inInvTexSize : TEXCOORD1,float inTEX2 : TEXCOORD2)\n" - "{\n" - "VSOUTPUT OUT;" - "OUT.vPosition = inPosition;\n" - "OUT.vTexCoord = inTEX0;\n" - "OUT.vTexCoord1 = inTEX2;\n" - "return OUT;\n" - "}\n"); + "{\n" + "float4 vPosition : POSITION;\n" + "float4 vTexCoord : TEXCOORD0;\n" + "float vTexCoord1 : TEXCOORD1;\n" + "float4 vTexCoord2 : TEXCOORD2;\n" + "float4 vTexCoord3 : TEXCOORD3;\n" + "};\n" + "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float inTEX2 : TEXCOORD2)\n" + "{\n" + "VSOUTPUT OUT;" + "OUT.vPosition = inPosition;\n" + "OUT.vTexCoord = inTEX0.xyyx;\n" + "OUT.vTexCoord1 = inTEX2.x;\n" + "OUT.vTexCoord2 = inTEX0.xyyx + (float4(-0.495f,-0.495f, 0.495f,-0.495f) * inTEX1.xyyx);\n" + "OUT.vTexCoord3 = inTEX0.xyyx + (float4( 0.495f, 0.495f,-0.495f, 0.495f) * inTEX1.xyyx);\n" + "return OUT;\n" + "}\n"); SimpleVertexShader[1] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg)); sprintf(vProg, "struct VSOUTPUT\n" - "{\n" - "float4 vPosition : POSITION;\n" - "float4 vTexCoord : TEXCOORD0;\n" - "float vTexCoord1 : TEXCOORD1;\n" - "float4 vTexCoord2 : TEXCOORD2;\n" - "float4 vTexCoord3 : TEXCOORD3;\n" - "};\n" - "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float inTEX2 : TEXCOORD2)\n" - "{\n" - "VSOUTPUT OUT;" - "OUT.vPosition = inPosition;\n" - "OUT.vTexCoord = inTEX0.xyyx;\n" - "OUT.vTexCoord1 = inTEX2.x;\n" - "OUT.vTexCoord2 = inTEX0.xyyx + (float4(-1.0f,-0.5f, 1.0f,-0.5f) * inTEX1.xyyx);\n" - "OUT.vTexCoord3 = inTEX0.xyyx + (float4( 1.0f, 0.5f,-1.0f, 0.5f) * inTEX1.xyyx);\n" - "return OUT;\n" - "}\n"); + "{\n" + "float4 vPosition : POSITION;\n" + "float4 vTexCoord : TEXCOORD0;\n" + "float vTexCoord1 : TEXCOORD1;\n" + "float4 vTexCoord2 : TEXCOORD2;\n" + "float4 vTexCoord3 : TEXCOORD3;\n" + "};\n" + "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float inTEX2 : TEXCOORD2)\n" + "{\n" + "VSOUTPUT OUT;" + "OUT.vPosition = inPosition;\n" + "OUT.vTexCoord = inTEX0.xyyx;\n" + "OUT.vTexCoord1 = inTEX2.x;\n" + "OUT.vTexCoord2 = inTEX0.xyyx + (float4(-0.9f,-0.45f, 0.9f,-0.45f) * inTEX1.xyyx);\n" + "OUT.vTexCoord3 = inTEX0.xyyx + (float4( 0.9f, 0.45f,-0.9f, 0.45f) * inTEX1.xyyx);\n" + "return OUT;\n" + "}\n"); SimpleVertexShader[2] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg)); - + Clear(); delete [] vProg; @@ -136,7 +140,7 @@ void VertexShaderCache::Init() char cache_filename[MAX_PATH]; sprintf(cache_filename, "%sdx9-%s-vs.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(), - SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str()); + SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str()); VertexShaderCacheInserter inserter; g_vs_disk_cache.OpenAndRead(cache_filename, inserter); @@ -168,7 +172,7 @@ void VertexShaderCache::Shutdown() if (ClearVertexShader) ClearVertexShader->Release(); ClearVertexShader = NULL; - + Clear(); g_vs_disk_cache.Sync(); g_vs_disk_cache.Close(); @@ -251,10 +255,16 @@ bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, const u8 *byt return false; } +float VSConstantbuffer[4*C_VENVCONST_END]; + void Renderer::SetVSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) { - const float f[4] = { f1, f2, f3, f4 }; - DX9::D3D::dev->SetVertexShaderConstantF(const_number, f, 1); + float* VSConstantbuffer_pointer = &VSConstantbuffer[const_number]; + VSConstantbuffer_pointer[0] = f1; + VSConstantbuffer_pointer[1] = f2; + VSConstantbuffer_pointer[2] = f3; + VSConstantbuffer_pointer[3] = f4; + DX9::D3D::dev->SetVertexShaderConstantF(const_number, VSConstantbuffer_pointer, 1); } void Renderer::SetVSConstant4fv(unsigned int const_number, const float *f) @@ -264,15 +274,15 @@ void Renderer::SetVSConstant4fv(unsigned int const_number, const float *f) void Renderer::SetMultiVSConstant3fv(unsigned int const_number, unsigned int count, const float *f) { - float buf[4*C_VENVCONST_END]; + float* VSConstantbuffer_pointer = &VSConstantbuffer[const_number]; for (unsigned int i = 0; i < count; i++) { - buf[4*i ] = *f++; - buf[4*i+1] = *f++; - buf[4*i+2] = *f++; - buf[4*i+3] = 0.f; + *VSConstantbuffer_pointer++ = *f++; + *VSConstantbuffer_pointer++ = *f++; + *VSConstantbuffer_pointer++ = *f++; + *VSConstantbuffer_pointer++ = 0.f; } - DX9::D3D::dev->SetVertexShaderConstantF(const_number, buf, count); + DX9::D3D::dev->SetVertexShaderConstantF(const_number, &VSConstantbuffer[const_number], count); } void Renderer::SetMultiVSConstant4fv(unsigned int const_number, unsigned int count, const float *f) diff --git a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp index 1a4e1bf84d..f3ca31ba82 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp @@ -90,10 +90,24 @@ void InitBackendInfo() g_Config.backend_info.bUseRGBATextures = false; g_Config.backend_info.bUseMinimalMipCount = true; g_Config.backend_info.bSupports3DVision = true; - g_Config.backend_info.bSupportsPrimitiveRestart = false; // TODO: figure out if it does + g_Config.backend_info.bSupportsPrimitiveRestart = false; // D3D9 does not support primitive restart g_Config.backend_info.bSupportsSeparateAlphaFunction = device_caps.PrimitiveMiscCaps & D3DPMISCCAPS_SEPARATEALPHABLEND; // Dual source blend disabled by default until a proper method to test for support is found - g_Config.backend_info.bSupportsDualSourceBlend = false; + g_Config.backend_info.bSupports3DVision = true; + OSVERSIONINFO info; + ZeroMemory(&info, sizeof(OSVERSIONINFO)); + info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + if (GetVersionEx(&info)) + { + // dual source blending is only supported in windows 7 o newer. sorry xp users + // we cannot test for device caps because most drivers just declare the minimun caps + // and don't expose their support for some functionalities + g_Config.backend_info.bSupportsDualSourceBlend = g_Config.backend_info.bSupportsSeparateAlphaFunction && (info.dwPlatformId == VER_PLATFORM_WIN32_NT) && ((info.dwMajorVersion > 6) || ((info.dwMajorVersion == 6) && info.dwMinorVersion >= 1)); + } + else + { + g_Config.backend_info.bSupportsDualSourceBlend = false; + } g_Config.backend_info.bSupportsFormatReinterpretation = true; g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants; g_Config.backend_info.bSupportsEarlyZ = false; @@ -112,7 +126,7 @@ void InitBackendInfo() for (int i = 0; i < (int)adapter.aa_levels.size(); ++i) g_Config.backend_info.AAModes.push_back(adapter.aa_levels[i].name); } - + // Clear ppshaders string vector g_Config.backend_info.PPShaders.clear(); @@ -139,6 +153,9 @@ bool VideoBackend::Initialize(void *&window_handle) g_Config.GameIniLoad(SConfig::GetInstance().m_LocalCoreStartupParameter.m_strGameIni.c_str()); g_Config.UpdateProjectionHack(); g_Config.VerifyValidity(); + // as only some driver/hardware configurations support dual source blending only enable it if is + // configured by user + g_Config.backend_info.bSupportsDualSourceBlend &= g_Config.bForceDualSourceBlend; UpdateActiveConfig(); window_handle = (void*)EmuWindow::Create((HWND)window_handle, GetModuleHandle(0), _T("Loading - Please wait."));