apply some speedup to dx11 plugin + some minor stuff

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5721 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
luisr142004
2010-06-16 10:12:57 +00:00
parent 980a2680be
commit 4f8a6a1573
13 changed files with 202 additions and 221 deletions

View File

@ -294,7 +294,7 @@ HRESULT Create(HWND wnd)
{
// try using the first one
hr = factory->EnumAdapters(0, &adapter);
if (FAILED(hr)) MessageBox(wnd, _T("Failed to enumerate adapter"), _T("Dolphin Direct3D 11 plugin"), MB_OK | MB_ICONERROR);
if (FAILED(hr)) MessageBox(wnd, _T("Failed to enumerate adapters"), _T("Dolphin Direct3D 11 plugin"), MB_OK | MB_ICONERROR);
}
// TODO: Make this configurable
@ -303,7 +303,7 @@ HRESULT Create(HWND wnd)
{
// try using the first one
hr = adapter->EnumOutputs(0, &output);
if (FAILED(hr)) MessageBox(wnd, _T("Failed to enumerate output"), _T("Dolphin Direct3D 11 plugin"), MB_OK | MB_ICONERROR);
if (FAILED(hr)) MessageBox(wnd, _T("Failed to enumerate outputs"), _T("Dolphin Direct3D 11 plugin"), MB_OK | MB_ICONERROR);
}
// this will need to be changed once multisampling gets implemented

View File

@ -68,8 +68,8 @@ void ReplaceTexture2D(ID3D11Texture2D* pTexture, const u8* buffer, unsigned int
// TODO: Merge the conversions done here to VideoDecoder
switch (pcfmt)
{
case PC_TEX_FMT_IA4_AS_IA8:
case PC_TEX_FMT_IA8:
case PC_TEX_FMT_IA4_AS_IA8:
for (unsigned int y = 0; y < height; y++)
{
u16* in = (u16*)buffer + y * pitch;
@ -78,43 +78,30 @@ void ReplaceTexture2D(ID3D11Texture2D* pTexture, const u8* buffer, unsigned int
{
const u8 I = (*in & 0xFF);
const u8 A = (*in & 0xFF00) >> 8;
*pBits = (A << 24) | (I << 16) | (I << 8) | I;
*(pBits++) = (A << 24) | (I << 16) | (I << 8) | I;
in++;
pBits++;
}
}
break;
case PC_TEX_FMT_I8:
case PC_TEX_FMT_I4_AS_I8:
for (unsigned int y = 0; y < height; y++)
{
const u8 *pIn = buffer;
for (int y = 0; y < height; y++)
const u8* in = buffer + (y * pitch);
u32* pBits = (u32*)((u8*)outptr + (y * destPitch));
for(unsigned int i = 0; i < width; i++)
{
u8* pBits = ((u8*)outptr + (y * destPitch));
for(int i = 0; i < width * 4; i += 4)
{
pBits[i] = pIn[i / 4];
pBits[i+1] = pIn[i / 4];
pBits[i+2] = pIn[i / 4];
pBits[i + 3] = pIn[i / 4];
}
pIn += pitch;
const u8 I = *(in++);
memset( pBits++, I, 4 );
}
}
break;
case PC_TEX_FMT_BGRA32:
for (unsigned int y = 0; y < height; y++)
{
u32* in = (u32*)buffer + y * pitch;
u32* pBits = (u32*)((u8*)outptr + y * destPitch);
for (unsigned int x = 0; x < width; x++)
{
const u32 col = *in;
*pBits = col;
in++;
pBits++;
}
memcpy( pBits, in, destPitch );
}
break;
case PC_TEX_FMT_RGB565:
@ -125,13 +112,11 @@ void ReplaceTexture2D(ID3D11Texture2D* pTexture, const u8* buffer, unsigned int
for (unsigned int x = 0; x < width; x++)
{
// we can't simply shift here, since e.g. 11111 must map to 11111111 and not 11111000
const u16 col = *in;
*pBits = 0xFF000000 | // alpha
const u16 col = *(in++);
*(pBits++) = 0xFF000000 | // alpha
((((col&0xF800) << 5) * 255 / 31) & 0xFF0000) | // red
((((col& 0x7e0) << 3) * 255 / 63) & 0xFF00) | // green
(( (col& 0x1f) * 255 / 31)); // blue
pBits++;
in++;
}
}
break;

View File

@ -183,7 +183,7 @@ static const D3D11_BLEND d3dLogicOpDestFactors[16] =
D3D11_BLEND_ONE//15
};
static const D3D11_CULL_MODE d3dCullModes[4] =
static const D3D11_CULL_MODE d3dCullModes[4] =
{
D3D11_CULL_NONE,
D3D11_CULL_BACK,
@ -191,7 +191,7 @@ static const D3D11_CULL_MODE d3dCullModes[4] =
D3D11_CULL_BACK
};
static const D3D11_COMPARISON_FUNC d3dCmpFuncs[8] =
static const D3D11_COMPARISON_FUNC d3dCmpFuncs[8] =
{
D3D11_COMPARISON_NEVER,
D3D11_COMPARISON_LESS,
@ -206,7 +206,7 @@ static const D3D11_COMPARISON_FUNC d3dCmpFuncs[8] =
#define TEXF_NONE 0
#define TEXF_POINT 1
#define TEXF_LINEAR 2
static const unsigned int d3dMipFilters[4] =
static const unsigned int d3dMipFilters[4] =
{
TEXF_NONE,
TEXF_POINT,
@ -504,7 +504,7 @@ void Renderer::RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRect
VideoFifo_CheckEFBAccess();
VideoFifo_CheckSwapRequestAt(xfbAddr, fbWidth, fbHeight);
FBManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc);
XFBWrited = true;
XFBWrited = true;
// XXX: Without the VI, how would we know what kind of field this is? So
// just use progressive.
@ -574,7 +574,7 @@ bool Renderer::SetScissorRect()
return false;
}
void Renderer::SetColorMask()
void Renderer::SetColorMask()
{
UINT8 color_mask = 0;
if (bpmem.blendmode.alphaupdate) color_mask |= D3D11_COLOR_WRITE_ENABLE_ALPHA;
@ -729,7 +729,7 @@ void UpdateViewport()
FBManager.Destroy();
FBManager.Create();
D3D::context->OMSetRenderTargets(1, &FBManager.GetEFBColorTexture()->GetRTV(), FBManager.GetEFBDepthTexture()->GetDSV());
}
}
// some games set invalids values MinDepth and MaxDepth so fix them to the max an min allowed and let the shaders do this work
D3D11_VIEWPORT vp = CD3D11_VIEWPORT((float)X, (float)Y, (float)Width, (float)Height,
@ -739,16 +739,16 @@ void UpdateViewport()
}
void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z)
{
{
TargetRectangle targetRc = Renderer::ConvertEFBRectangle(rc);
// update the view port for clearing the picture
D3D11_VIEWPORT vp = CD3D11_VIEWPORT((float)targetRc.left, (float)targetRc.top, (float)targetRc.GetWidth(), (float)targetRc.GetHeight(),
0.f,
0.f,
1.f);
D3D::context->RSSetViewports(1, &vp);
// always set the scissor in case it was set by the game and has not been reset
// TODO: Do we really need to set the scissor rect? Why not just disable scissor testing?
D3D11_RECT sirc = CD3D11_RECT(targetRc.left, targetRc.top, targetRc.right, targetRc.bottom);
D3D11_RECT sirc = CD3D11_RECT(targetRc.left, targetRc.top, targetRc.right, targetRc.bottom);
D3D::context->RSSetScissorRects(1, &sirc);
D3D::context->OMSetDepthStencilState(cleardepthstates[zEnable], 0);
D3D::context->RSSetState(clearraststate);
@ -807,7 +807,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
{
if (g_bSkipCurrentFrame || (!XFBWrited && !g_ActiveConfig.bUseRealXFB) || !fbWidth || !fbHeight)
{
g_VideoInitialize.pCopiedToXFB(false);
g_VideoInitialize.pCopiedToXFB(false);
return;
}
// this function is called after the XFB field is changed, not after
@ -819,9 +819,9 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
const XFBSource** xfbSourceList = FBManager.GetXFBSource(xfbAddr, fbWidth, fbHeight, xfbCount);
if (!xfbSourceList || xfbCount == 0)
{
g_VideoInitialize.pCopiedToXFB(false);
g_VideoInitialize.pCopiedToXFB(false);
return;
}
}
Renderer::ResetAPIState();
// set the backbuffer as the rendering target
@ -857,13 +857,13 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
// draw each xfb source
for (u32 i = 0; i < xfbCount; ++i)
{
xfbSource = xfbSourceList[i];
xfbSource = xfbSourceList[i];
MathUtil::Rectangle<float> sourceRc;
sourceRc.left = 0;
sourceRc.top = 0;
sourceRc.right = xfbSource->texWidth;
sourceRc.bottom = xfbSource->texHeight;
sourceRc.bottom = xfbSource->texHeight;
MathUtil::Rectangle<float> drawRc;
drawRc.top = -1;
@ -1011,7 +1011,7 @@ void Renderer::SetDepthMode()
void Renderer::SetLogicOpMode()
{
if (bpmem.blendmode.logicopenable && bpmem.blendmode.logicmode != 3)
if (bpmem.blendmode.logicopenable && bpmem.blendmode.logicmode != 3)
{
s_blendMode = 0;
D3D::gfxstate->SetAlphaBlendEnable(true);
@ -1037,7 +1037,7 @@ void Renderer::SetLineWidth()
void Renderer::SetSamplerState(int stage, int texindex)
{
const FourTexUnits &tex = bpmem.tex[texindex];
const FourTexUnits &tex = bpmem.tex[texindex];
const TexMode0 &tm0 = tex.texMode0[stage];
const TexMode1 &tm1 = tex.texMode1[stage];
@ -1045,7 +1045,7 @@ void Renderer::SetSamplerState(int stage, int texindex)
mip = (tm0.min_filter == 8) ? TEXF_NONE:d3dMipFilters[tm0.min_filter & 3];
if ((tm0.min_filter & 3) && (tm0.min_filter != 8) && ((tm1.max_lod >> 4) == 0)) mip = TEXF_NONE;
if (texindex) stage += 4;
if (texindex) stage += 4;
// TODO: Clarify whether these values are correct
// NOTE: since there's no "no filter" in DX11 we're using point filters in these cases

View File

@ -144,7 +144,7 @@ bool TextureCache::TCacheEntry::IntersectsMemoryRange(u32 range_address, u32 ran
void TextureCache::Shutdown()
{
Invalidate(true);
FreeMemoryPages(temp, TEMP_SIZE);
FreeMemoryPages(temp, TEMP_SIZE);
temp = NULL;
SAFE_RELEASE(efbcopyblendstate);
@ -167,8 +167,8 @@ void TextureCache::Cleanup()
}
else
{
iter++;
}
iter++;
}
}
}
@ -215,7 +215,7 @@ TextureCache::TCacheEntry* TextureCache::Load(unsigned int stage, u32 address, u
}
else
{
// Let's reload the new texture data into the same texture,
// Let's reload the new texture data into the same texture,
// instead of destroying it and having to create a new one.
// Might speed up movie playback very, very slightly.
@ -281,7 +281,7 @@ TextureCache::TCacheEntry* TextureCache::Load(unsigned int stage, u32 address, u
if (entry.texture == NULL) PanicAlert("Failed to create texture at %s %d\n", __FILE__, __LINE__);
D3D::ReplaceTexture2D(entry.texture->GetTex(), temp, width, height, expandedWidth, d3d_fmt, pcfmt, 0, usage);
}
else
else
{
D3D::ReplaceTexture2D(entry.texture->GetTex(), temp, width, height, expandedWidth, d3d_fmt, pcfmt, 0, usage);
}
@ -329,8 +329,8 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, boo
int tex_w = (abs(source_rect.GetWidth()) >> bScaleByHalf);
int tex_h = (abs(source_rect.GetHeight()) >> bScaleByHalf);
int Scaledtex_w = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleX() * tex_w)):tex_w;
int Scaledtex_h = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleY() * tex_h)):tex_h;
int Scaledtex_w = (g_ActiveConfig.bCopyEFBScaled) ? ((int)(Renderer::GetTargetScaleX() * tex_w)) : tex_w;
int Scaledtex_h = (g_ActiveConfig.bCopyEFBScaled) ? ((int)(Renderer::GetTargetScaleY() * tex_h)) : tex_h;
TexCache::iterator iter;
D3DTexture2D* tex = NULL;
@ -372,135 +372,135 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, boo
unsigned int cbufid = (unsigned int)-1;
// TODO: Move this to TextureCache::Init()
if (bFromZBuffer)
if (bFromZBuffer)
{
switch(copyfmt)
switch(copyfmt)
{
case 0: // Z4
case 1: // Z8
colmat[0] = colmat[4] = colmat[8] = colmat[12] =1.0f;
case 0: // Z4
case 1: // Z8
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
cbufid = 12;
break;
case 3: // Z16 //?
colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
break;
case 3: // Z16 //?
colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
cbufid = 13;
case 11: // Z16 (reverse order)
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
case 11: // Z16 (reverse order)
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
cbufid = 14;
break;
case 6: // Z24X8
colmat[0] = colmat[5] = colmat[10] = 1.0f;
break;
case 6: // Z24X8
colmat[0] = colmat[5] = colmat[10] = 1.0f;
cbufid = 15;
break;
case 9: // Z8M
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
break;
case 9: // Z8M
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
cbufid = 16;
break;
case 10: // Z8L
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
break;
case 10: // Z8L
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
cbufid = 17;
break;
case 12: // Z16L
colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1.0f;
break;
case 12: // Z16L
colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1.0f;
cbufid = 18;
break;
default:
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", copyfmt);
colmat[2] = colmat[5] = colmat[8] = 1.0f;
break;
default:
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", copyfmt);
colmat[2] = colmat[5] = colmat[8] = 1.0f;
cbufid = 19;
break;
}
}
else if (bIsIntensityFmt)
break;
}
}
else if (bIsIntensityFmt)
{
colmat[16] = colmat[17] = colmat[18] = 16.0f/255.0f;
switch (copyfmt)
colmat[16] = colmat[17] = colmat[18] = 16.0f/255.0f;
switch (copyfmt)
{
case 0: // I4
case 1: // I8
case 2: // IA4
case 3: // IA8
case 0: // I4
case 1: // I8
case 2: // IA4
case 3: // IA8
// TODO - verify these coefficients
colmat[0] = 0.257f; colmat[1] = 0.504f; colmat[2] = 0.098f;
colmat[4] = 0.257f; colmat[5] = 0.504f; colmat[6] = 0.098f;
colmat[8] = 0.257f; colmat[9] = 0.504f; colmat[10] = 0.098f;
colmat[0] = 0.257f; colmat[1] = 0.504f; colmat[2] = 0.098f;
colmat[4] = 0.257f; colmat[5] = 0.504f; colmat[6] = 0.098f;
colmat[8] = 0.257f; colmat[9] = 0.504f; colmat[10] = 0.098f;
if (copyfmt < 2)
if (copyfmt < 2)
{
colmat[19] = 16.0f / 255.0f;
colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f;
colmat[19] = 16.0f / 255.0f;
colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f;
cbufid = 0;
}
else// alpha
}
else// alpha
{
colmat[15] = 1;
colmat[15] = 1;
cbufid = 1;
}
break;
default:
ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", copyfmt);
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1;
break;
}
}
else
break;
default:
ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", copyfmt);
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1;
break;
}
}
else
{
switch (copyfmt)
switch (copyfmt)
{
case 0: // R4
case 8: // R8
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
case 0: // R4
case 8: // R8
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
cbufid = 2;
break;
case 2: // RA4
case 3: // RA8
colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1;
break;
case 2: // RA4
case 3: // RA8
colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1;
cbufid = 3;
break;
break;
case 7: // A8
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1;
case 7: // A8
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1;
cbufid = 4;
break;
case 9: // G8
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1;
break;
case 9: // G8
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1;
cbufid = 5;
break;
case 10: // B8
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1;
break;
case 10: // B8
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1;
cbufid = 6;
break;
case 11: // RG8
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1;
break;
case 11: // RG8
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1;
cbufid = 7;
break;
case 12: // GB8
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1;
break;
case 12: // GB8
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1;
cbufid = 8;
break;
break;
case 4: // RGB565
colmat[0] = colmat[5] = colmat[10] = 1;
colmat[19] = 1; // set alpha to 1
case 4: // RGB565
colmat[0] = colmat[5] = colmat[10] = 1;
colmat[19] = 1; // set alpha to 1
cbufid = 9;
break;
case 5: // RGB5A3
case 6: // RGBA8
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1;
break;
case 5: // RGB5A3
case 6: // RGBA8
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1;
cbufid = 10;
break;
break;
default:
ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", copyfmt);
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1;
default:
ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", copyfmt);
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1;
cbufid = 11;
break;
}
}
break;
}
}
Renderer::ResetAPIState(); // reset any game specific settings
Renderer::ResetAPIState(); // reset any game specific settings
// stretch picture with increased internal resolution
D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, (float)Scaledtex_w, (float)Scaledtex_h);
D3D::context->RSSetViewports(1, &vp);