this commit is divided in 4 parts:

1 - Optimize pixelshadergen to avoid redundant register overflow math, with this if a game don't need this will be not applied.
this must bring some fill rate back and improve speed a little in fill rate limited systems.
2- some corrections to vertexshadergen to avoid uninitialized texture coordinates, dono if is the correct way to fix it but t least it will make house of the dead overkill playable in dx11.
the bad thing: still missing geometri in dx9, in dx11 it works exactly as on opengl.
3 - some optimization made to improve fps a little wih the latests changes made to fifo.
* back to the original code in beginfield as now it will work right.
* check for efb access more often as a lot of time is lost waiting for efb access
4 - apply a little fix for missing textures in nvidia opengl tanks to  Wagnard28 for finding that nvidia does not like invalid shader id :)
please test for any regression

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5812 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado
2010-06-29 14:40:37 +00:00
parent 4afe48a0fb
commit d511b50612
11 changed files with 213 additions and 145 deletions

View File

@ -21,7 +21,7 @@
#include "D3DBase.h"
#include "D3DShader.h"
#include "D3DUtil.h"
#include "Fifo.h"
#include "Statistics.h"
#include "Profiler.h"
#include "FBManager.h"
@ -304,6 +304,7 @@ void Flush()
if (LocalVBuffer == s_pCurBufferPointer) return;
if (Flushed) return;
Flushed=true;
VideoFifo_CheckEFBAccess();
DVSTARTPROFILE();

View File

@ -71,7 +71,6 @@ static bool s_PluginInitialized = false;
volatile u32 s_swapRequested = FALSE;
static u32 s_efbAccessRequested = FALSE;
static volatile u32 s_FifoShuttingDown = FALSE;
static bool ForceSwap = true;
static volatile struct
{
@ -234,7 +233,6 @@ void Video_Prepare()
s_efbAccessRequested = FALSE;
s_FifoShuttingDown = FALSE;
s_swapRequested = FALSE;
ForceSwap = true;
Renderer::Init();
TextureCache::Init();
BPInit();
@ -307,16 +305,16 @@ void Video_SetRendering(bool bEnabled)
Fifo_SetRendering(bEnabled);
}
// run from the graphics thread
// Run from the graphics thread (from Fifo.cpp)
void VideoFifo_CheckSwapRequest()
{
if (Common::AtomicLoadAcquire(s_swapRequested))
if(g_ActiveConfig.bUseXFB)
{
if (ForceSwap || g_ActiveConfig.bUseXFB)
if (Common::AtomicLoadAcquire(s_swapRequested))
{
Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight);
Common::AtomicStoreRelease(s_swapRequested, FALSE);
}
Common::AtomicStoreRelease(s_swapRequested, FALSE);
}
}
@ -325,33 +323,29 @@ inline bool addrRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper)
return !((aLower >= bUpper) || (bLower >= aUpper));
}
// Run from the graphics thread
// Run from the graphics thread (from Fifo.cpp)
void VideoFifo_CheckSwapRequestAt(u32 xfbAddr, u32 fbWidth, u32 fbHeight)
{
if (Common::AtomicLoadAcquire(s_swapRequested) && g_ActiveConfig.bUseXFB)
if (g_ActiveConfig.bUseXFB)
{
u32 aLower = xfbAddr;
u32 aUpper = xfbAddr + 2 * fbWidth * fbHeight;
u32 bLower = s_beginFieldArgs.xfbAddr;
u32 bUpper = s_beginFieldArgs.xfbAddr + 2 * s_beginFieldArgs.fbWidth * s_beginFieldArgs.fbHeight;
if(Common::AtomicLoadAcquire(s_swapRequested))
{
u32 aLower = xfbAddr;
u32 aUpper = xfbAddr + 2 * fbWidth * fbHeight;
u32 bLower = s_beginFieldArgs.xfbAddr;
u32 bUpper = s_beginFieldArgs.xfbAddr + 2 * s_beginFieldArgs.fbWidth * s_beginFieldArgs.fbHeight;
if (addrRangesOverlap(aLower, aUpper, bLower, bUpper))
VideoFifo_CheckSwapRequest();
}
ForceSwap = false;
if (addrRangesOverlap(aLower, aUpper, bLower, bUpper))
VideoFifo_CheckSwapRequest();
}
}
}
// Run from the CPU thread (from VideoInterface.cpp)
void Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
{
if (s_PluginInitialized && g_ActiveConfig.bUseXFB)
{
s_beginFieldArgs.xfbAddr = xfbAddr;
s_beginFieldArgs.field = field;
s_beginFieldArgs.fbWidth = fbWidth;
s_beginFieldArgs.fbHeight = fbHeight;
Common::AtomicStoreRelease(s_swapRequested, TRUE);
{
if (g_VideoInitialize.bOnThread)
{
while (Common::AtomicLoadAcquire(s_swapRequested) && !s_FifoShuttingDown)
@ -359,7 +353,13 @@ void Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
Common::YieldCPU();
}
else
VideoFifo_CheckSwapRequest();
VideoFifo_CheckSwapRequest();
s_beginFieldArgs.xfbAddr = xfbAddr;
s_beginFieldArgs.field = field;
s_beginFieldArgs.fbWidth = fbWidth;
s_beginFieldArgs.fbHeight = fbHeight;
Common::AtomicStoreRelease(s_swapRequested, TRUE);
}
}

View File

@ -19,7 +19,7 @@
#include "FileUtil.h"
#include "D3DBase.h"
#include "Fifo.h"
#include "Statistics.h"
#include "Profiler.h"
#include "VertexManager.h"
@ -226,7 +226,7 @@ void Flush()
if (LocalVBuffer == s_pCurBufferPointer) return;
if(Flushed) return;
Flushed=true;
VideoFifo_CheckEFBAccess();
DVSTARTPROFILE();
u32 usedtextures = 0;

View File

@ -65,7 +65,6 @@ static bool s_PluginInitialized = false;
volatile u32 s_swapRequested = FALSE;
static u32 s_efbAccessRequested = FALSE;
static volatile u32 s_FifoShuttingDown = FALSE;
static bool ForceSwap = true;
static volatile struct
{
@ -275,7 +274,6 @@ void Video_Prepare()
s_efbAccessRequested = FALSE;
s_FifoShuttingDown = FALSE;
s_swapRequested = FALSE;
ForceSwap = true;
Renderer::Init();
TextureCache::Init();
BPInit();
@ -342,16 +340,16 @@ void Video_SetRendering(bool bEnabled) {
Fifo_SetRendering(bEnabled);
}
// Run from the graphics thread
// Run from the graphics thread (from Fifo.cpp)
void VideoFifo_CheckSwapRequest()
{
if (Common::AtomicLoadAcquire(s_swapRequested))
if(g_ActiveConfig.bUseXFB)
{
if (ForceSwap || g_ActiveConfig.bUseXFB)
if (Common::AtomicLoadAcquire(s_swapRequested))
{
Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight);
Common::AtomicStoreRelease(s_swapRequested, FALSE);
}
Common::AtomicStoreRelease(s_swapRequested, FALSE);
}
}
@ -360,34 +358,29 @@ inline bool addrRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper)
return !((aLower >= bUpper) || (bLower >= aUpper));
}
// Run from the graphics thread
// Run from the graphics thread (from Fifo.cpp)
void VideoFifo_CheckSwapRequestAt(u32 xfbAddr, u32 fbWidth, u32 fbHeight)
{
if (Common::AtomicLoadAcquire(s_swapRequested) && g_ActiveConfig.bUseXFB)
if (g_ActiveConfig.bUseXFB)
{
u32 aLower = xfbAddr;
u32 aUpper = xfbAddr + 2 * fbWidth * fbHeight;
u32 bLower = s_beginFieldArgs.xfbAddr;
u32 bUpper = s_beginFieldArgs.xfbAddr + 2 * s_beginFieldArgs.fbWidth * s_beginFieldArgs.fbHeight;
if(Common::AtomicLoadAcquire(s_swapRequested))
{
u32 aLower = xfbAddr;
u32 aUpper = xfbAddr + 2 * fbWidth * fbHeight;
u32 bLower = s_beginFieldArgs.xfbAddr;
u32 bUpper = s_beginFieldArgs.xfbAddr + 2 * s_beginFieldArgs.fbWidth * s_beginFieldArgs.fbHeight;
if (addrRangesOverlap(aLower, aUpper, bLower, bUpper))
VideoFifo_CheckSwapRequest();
}
ForceSwap = false;
if (addrRangesOverlap(aLower, aUpper, bLower, bUpper))
VideoFifo_CheckSwapRequest();
}
}
}
// Run from the CPU thread (from VideoInterface.cpp)
void Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
{
if (s_PluginInitialized && g_ActiveConfig.bUseXFB)
{
s_beginFieldArgs.xfbAddr = xfbAddr;
s_beginFieldArgs.field = field;
s_beginFieldArgs.fbWidth = fbWidth;
s_beginFieldArgs.fbHeight = fbHeight;
Common::AtomicStoreRelease(s_swapRequested, TRUE);
{
if (g_VideoInitialize.bOnThread)
{
while (Common::AtomicLoadAcquire(s_swapRequested) && !s_FifoShuttingDown)
@ -395,7 +388,13 @@ void Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
Common::YieldCPU();
}
else
VideoFifo_CheckSwapRequest();
VideoFifo_CheckSwapRequest();
s_beginFieldArgs.xfbAddr = xfbAddr;
s_beginFieldArgs.field = field;
s_beginFieldArgs.fbWidth = fbWidth;
s_beginFieldArgs.fbHeight = fbHeight;
Common::AtomicStoreRelease(s_swapRequested, TRUE);
}
}

View File

@ -557,8 +557,6 @@ void Renderer::ResetAPIState()
{
// Gets us to a reasonably sane state where it's possible to do things like
// image copies with textured quads, etc.
glDisable(GL_VERTEX_PROGRAM_ARB);// needed by nvidia cards to avoid texture problems
glDisable(GL_FRAGMENT_PROGRAM_ARB);// needed by nvidia cards to avoid texture problems
VertexShaderCache::DisableShader();
PixelShaderCache::DisableShader();
glDisable(GL_SCISSOR_TEST);
@ -586,10 +584,8 @@ void Renderer::RestoreAPIState()
SetColorMask();
SetBlendMode(true);
glEnable(GL_VERTEX_PROGRAM_ARB);// needed by nvidia cards o avoid texture problems
glEnable(GL_FRAGMENT_PROGRAM_ARB);// needed by nvidia cards o avoid texture problems
VertexShaderCache::SetCurrentShader(0);
PixelShaderCache::SetCurrentShader(0);
VertexShaderCache::SetCurrentShader(1);
PixelShaderCache::SetCurrentShader(1);
}
void Renderer::SetColorMask()

View File

@ -202,6 +202,7 @@ void Flush()
if (LocalVBuffer == s_pCurBufferPointer) return;
if(Flushed) return;
Flushed=true;
VideoFifo_CheckEFBAccess();
#if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens,
xfregs.nNumChans, (int)xfregs.bEnableDualTexTransform, bpmem.ztex2.op,

View File

@ -106,7 +106,6 @@ static bool s_PluginInitialized = false;
volatile u32 s_swapRequested = FALSE;
static u32 s_efbAccessRequested = FALSE;
static volatile u32 s_FifoShuttingDown = FALSE;
static bool ForceSwap = true;
bool IsD3D()
{
@ -301,8 +300,7 @@ void Video_Prepare(void)
void Shutdown(void)
{
s_PluginInitialized = false;
ForceSwap = true;
s_PluginInitialized = false;
s_efbAccessRequested = FALSE;
s_swapRequested = FALSE;
@ -368,14 +366,13 @@ static volatile struct
// Run from the graphics thread (from Fifo.cpp)
void VideoFifo_CheckSwapRequest()
{
if (Common::AtomicLoadAcquire(s_swapRequested))
if(g_ActiveConfig.bUseXFB)
{
if (ForceSwap || g_ActiveConfig.bUseXFB)
if (Common::AtomicLoadAcquire(s_swapRequested))
{
Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight);
Common::AtomicStoreRelease(s_swapRequested, FALSE);
}
Common::AtomicStoreRelease(s_swapRequested, FALSE);
}
}
@ -387,31 +384,26 @@ inline bool addrRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper)
// Run from the graphics thread (from Fifo.cpp)
void VideoFifo_CheckSwapRequestAt(u32 xfbAddr, u32 fbWidth, u32 fbHeight)
{
if (Common::AtomicLoadAcquire(s_swapRequested) && g_ActiveConfig.bUseXFB)
if (g_ActiveConfig.bUseXFB)
{
u32 aLower = xfbAddr;
u32 aUpper = xfbAddr + 2 * fbWidth * fbHeight;
u32 bLower = s_beginFieldArgs.xfbAddr;
u32 bUpper = s_beginFieldArgs.xfbAddr + 2 * s_beginFieldArgs.fbWidth * s_beginFieldArgs.fbHeight;
if(Common::AtomicLoadAcquire(s_swapRequested))
{
u32 aLower = xfbAddr;
u32 aUpper = xfbAddr + 2 * fbWidth * fbHeight;
u32 bLower = s_beginFieldArgs.xfbAddr;
u32 bUpper = s_beginFieldArgs.xfbAddr + 2 * s_beginFieldArgs.fbWidth * s_beginFieldArgs.fbHeight;
if (addrRangesOverlap(aLower, aUpper, bLower, bUpper))
VideoFifo_CheckSwapRequest();
}
ForceSwap = false;
if (addrRangesOverlap(aLower, aUpper, bLower, bUpper))
VideoFifo_CheckSwapRequest();
}
}
}
// Run from the CPU thread (from VideoInterface.cpp)
void Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
{
if (s_PluginInitialized && g_ActiveConfig.bUseXFB)
{
s_beginFieldArgs.xfbAddr = xfbAddr;
s_beginFieldArgs.field = field;
s_beginFieldArgs.fbWidth = fbWidth;
s_beginFieldArgs.fbHeight = fbHeight;
Common::AtomicStoreRelease(s_swapRequested, TRUE);
{
if (g_VideoInitialize.bOnThread)
{
while (Common::AtomicLoadAcquire(s_swapRequested) && !s_FifoShuttingDown)
@ -420,6 +412,12 @@ void Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
}
else
VideoFifo_CheckSwapRequest();
s_beginFieldArgs.xfbAddr = xfbAddr;
s_beginFieldArgs.field = field;
s_beginFieldArgs.fbWidth = fbWidth;
s_beginFieldArgs.fbHeight = fbHeight;
Common::AtomicStoreRelease(s_swapRequested, TRUE);
}
}