some fixes to my last commit and ....

modify shader generator to produce native sm 4.0 code.
eliminate compatibility mode in dx11 so now all shader must work much better.
please test.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5691 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado
2010-06-14 14:36:01 +00:00
parent ff50fd188f
commit fc12291806
9 changed files with 149 additions and 58 deletions

View File

@ -129,13 +129,6 @@
#endif // WIN32 #endif // WIN32
typedef enum
{
API_OPENGL,
API_D3D9,
API_D3D11
} API_TYPE;
// A macro to disallow the copy constructor and operator= functions // A macro to disallow the copy constructor and operator= functions
// This should be used in the private: declarations for a class // This should be used in the private: declarations for a class
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ #define DISALLOW_COPY_AND_ASSIGN(TypeName) \

View File

@ -22,7 +22,7 @@ static const char ID[4] = {'D', 'C', 'A', 'C'};
// Update this to the current SVN revision every time you change shader generation code. // Update this to the current SVN revision every time you change shader generation code.
// We don't automatically get this from SVN_REV because that would mean regenerating the // We don't automatically get this from SVN_REV because that would mean regenerating the
// shader cache for every revision, graphics-related or not, which is simply annoying. // shader cache for every revision, graphics-related or not, which is simply annoying.
const int version = 5659; const int version = 5691;
LinearDiskCache::LinearDiskCache() LinearDiskCache::LinearDiskCache()
: file_(NULL), num_entries_(0) { : file_(NULL), num_entries_(0) {

View File

@ -385,10 +385,18 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, API_
// Declare samplers // Declare samplers
if (texture_mask) if (texture_mask)
{ {
if (ApiType != API_OPENGL) if (ApiType == API_D3D11)
{
WRITE(p, "sampler ");
}
else if (ApiType == API_D3D9)
{
WRITE(p, "uniform sampler "); WRITE(p, "uniform sampler ");
}
else else
{
WRITE(p, "uniform samplerRECT "); WRITE(p, "uniform samplerRECT ");
}
bool bfirst = true; bool bfirst = true;
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
{ {
@ -399,18 +407,56 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, API_
} }
} }
WRITE(p, ";\n"); WRITE(p, ";\n");
if(ApiType == API_D3D11)
{
bfirst = true;
WRITE(p, "Texture2D ");
for (int i = 0; i < 8; ++i)
{
if (texture_mask & (1<<i))
{
WRITE(p, "%s Tex%d : register(t%d)", bfirst?"":",", i, i);
bfirst = false;
}
}
WRITE(p, ";\n");
}
} }
if (texture_mask != 0xff) { if (texture_mask != 0xff)
{
if(ApiType != API_D3D11)
{
WRITE(p, "uniform sampler2D "); WRITE(p, "uniform sampler2D ");
}
else
{
WRITE(p, "sampler ");
}
bool bfirst = true; bool bfirst = true;
for (int i = 0; i < 8; ++i) { for (int i = 0; i < 8; ++i)
{
if (!(texture_mask & (1<<i))) { if (!(texture_mask & (1<<i))) {
WRITE(p, "%s samp%d : register(s%d)", bfirst?"":",",i, i); WRITE(p, "%s samp%d : register(s%d)", bfirst?"":",",i, i);
bfirst = false; bfirst = false;
} }
} }
WRITE(p, ";\n"); WRITE(p, ";\n");
if(ApiType == API_D3D11)
{
WRITE(p, "Texture2D ");
bfirst = true;
for (int i = 0; i < 8; ++i)
{
if (!(texture_mask & (1<<i)))
{
WRITE(p, "%s Tex%d : register(t%d)", bfirst?"":",", i, i);
bfirst = false;
}
}
WRITE(p, ";\n");
}
} }
WRITE(p, "\n"); WRITE(p, "\n");
@ -425,9 +471,20 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, API_
WRITE(p, "uniform float4 "I_FOG"[2] : register(c%d);\n", C_FOG); WRITE(p, "uniform float4 "I_FOG"[2] : register(c%d);\n", C_FOG);
WRITE(p, "void main(\n"); WRITE(p, "void main(\n");
if(ApiType != API_D3D11)
WRITE(p, " out float4 ocol0 : COLOR0,\n"); WRITE(p, " out float4 ocol0 : COLOR0,\n");
else
WRITE(p, " out float4 ocol0 : SV_Target,\n");
if(ApiType != API_D3D11)
WRITE(p, " out float depth : DEPTH,\n"); WRITE(p, " out float depth : DEPTH,\n");
else
WRITE(p, " out float depth : SV_Depth,\n");
if(ApiType != API_D3D11)
WRITE(p, " in float4 rawpos : POSITION,\n"); WRITE(p, " in float4 rawpos : POSITION,\n");
else
WRITE(p, " in float4 rawpos : SV_Position,\n");
WRITE(p, " in float4 colors_0 : COLOR0,\n"); WRITE(p, " in float4 colors_0 : COLOR0,\n");
WRITE(p, " in float4 colors_1 : COLOR1\n"); WRITE(p, " in float4 colors_1 : COLOR1\n");
@ -449,7 +506,7 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, API_
char* pmainstart = p; char* pmainstart = p;
WRITE(p, " float4 c0="I_COLORS"[1],c1="I_COLORS"[2],c2="I_COLORS"[3],prev=float4(0.0f,0.0f,0.0f,0.0f),textemp,rastemp,konsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n" WRITE(p, " float4 c0="I_COLORS"[1],c1="I_COLORS"[2],c2="I_COLORS"[3],prev=float4(0.0f,0.0f,0.0f,0.0f),textemp=float4(0.0f,0.0f,0.0f,0.0f),rastemp=float4(0.0f,0.0f,0.0f,0.0f),konsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n"
" float3 comp16 = float3(1.0f,255.0f,0.0f), comp24 = float3(1.0f,255.0f,255.0f*255.0f);\n" " float3 comp16 = float3(1.0f,255.0f,0.0f), comp24 = float3(1.0f,255.0f,255.0f*255.0f);\n"
" float4 alphabump=0;\n" " float4 alphabump=0;\n"
" float3 tevcoord;\n" " float3 tevcoord;\n"
@ -874,20 +931,26 @@ void SampleTexture(char *&p, const char *destination, const char *texcoords, con
else { else {
WRITE(p, "tempcoord.y = %s.y;\n", texcoords); WRITE(p, "tempcoord.y = %s.y;\n", texcoords);
} }
if (ApiType == API_D3D11)
if (ApiType != API_OPENGL) WRITE(p, "%s= Tex%d.Sample(samp%d,tempcoord.xy).%s;\n", destination, texmap,texmap, texswap);
else if (ApiType == API_D3D9)
WRITE(p, "%s=tex2D(samp%d,tempcoord.xy).%s;\n", destination, texmap, texswap); WRITE(p, "%s=tex2D(samp%d,tempcoord.xy).%s;\n", destination, texmap, texswap);
else else
WRITE(p, "%s=texRECT(samp%d,tempcoord.xy).%s;\n", destination, texmap, texswap); WRITE(p, "%s=texRECT(samp%d,tempcoord.xy).%s;\n", destination, texmap, texswap);
} }
else { else {
if (ApiType != API_OPENGL) if (ApiType == API_D3D11)
WRITE(p, "%s=Tex%d.Sample(samp%d,%s.xy).%s;\n", destination,texmap,texmap, texcoords, texswap);
else if (ApiType == API_D3D9)
WRITE(p, "%s=tex2D(samp%d,%s.xy).%s;\n", destination, texmap, texcoords, texswap); WRITE(p, "%s=tex2D(samp%d,%s.xy).%s;\n", destination, texmap, texcoords, texswap);
else else
WRITE(p, "%s=texRECT(samp%d,%s.xy).%s;\n", destination, texmap, texcoords, texswap); WRITE(p, "%s=texRECT(samp%d,%s.xy).%s;\n", destination, texmap, texcoords, texswap);
} }
} }
else { else {
if (ApiType == API_D3D11)
WRITE(p, "%s=Tex%d.Sample(samp%d,%s.xy * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap);
else
WRITE(p, "%s=tex2D(samp%d,%s.xy * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap, texcoords, texmap, texswap); WRITE(p, "%s=tex2D(samp%d,%s.xy * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap, texcoords, texmap, texswap);
} }
} }

View File

@ -18,7 +18,7 @@
#ifndef GCOGL_PIXELSHADER_H #ifndef GCOGL_PIXELSHADER_H
#define GCOGL_PIXELSHADER_H #define GCOGL_PIXELSHADER_H
#include "Common.h" #include "VideoCommon.h"
#define I_COLORS "color" #define I_COLORS "color"
#define I_KCOLORS "k" #define I_KCOLORS "k"

View File

@ -19,7 +19,7 @@
#define GCOGL_VERTEXSHADER_H #define GCOGL_VERTEXSHADER_H
#include "XFMemory.h" #include "XFMemory.h"
#include "Common.h" #include "VideoCommon.h"
#define SHADER_POSMTX_ATTRIB 1 #define SHADER_POSMTX_ATTRIB 1
#define SHADER_NORM1_ATTRIB 6 #define SHADER_NORM1_ATTRIB 6

View File

@ -144,4 +144,11 @@ struct TargetRectangle : public MathUtil::Rectangle<int>
bool IsD3D(); bool IsD3D();
typedef enum
{
API_OPENGL,
API_D3D9,
API_D3D11
} API_TYPE;
#endif // _VIDEOCOMMON_H #endif // _VIDEOCOMMON_H

View File

@ -90,7 +90,7 @@ bool CompilePixelShader(const char* code, unsigned int len, ID3D10Blob** blob)
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY|D3D10_SHADER_DEBUG|D3D10_SHADER_WARNINGS_ARE_ERRORS; UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY|D3D10_SHADER_DEBUG|D3D10_SHADER_WARNINGS_ARE_ERRORS;
#else #else
UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY|D3D10_SHADER_OPTIMIZATION_LEVEL3|D3D10_SHADER_SKIP_VALIDATION; UINT flags = D3D10_SHADER_OPTIMIZATION_LEVEL3;
#endif #endif
HRESULT hr = D3DCompile(code, len, NULL, NULL, NULL, "main", D3D::PixelShaderVersionString(), HRESULT hr = D3DCompile(code, len, NULL, NULL, NULL, "main", D3D::PixelShaderVersionString(),
flags, 0, &shaderBuffer, &errorBuffer); flags, 0, &shaderBuffer, &errorBuffer);

View File

@ -47,43 +47,46 @@ ID3D11PixelShader* s_ClearProgram = NULL;
const char clear_program_code[] = { const char clear_program_code[] = {
"void main(\n" "void main(\n"
"out float4 ocol0 : COLOR0,\n" "out float4 ocol0 : SV_Target,\n"
"in float4 pos : POSITION,\n" "in float4 pos : SV_Position,\n"
"in float4 incol0 : COLOR0){\n" "in float4 incol0 : COLOR0){\n"
"ocol0 = incol0;\n" "ocol0 = incol0;\n"
"}\n" "}\n"
}; };
const char color_copy_program_code[] = { const char color_copy_program_code[] = {
"uniform sampler samp0 : register(s0);\n" "sampler samp0 : register(s0);\n"
"Texture2D Tex0 : register(t0);\n"
"void main(\n" "void main(\n"
"out float4 ocol0 : COLOR0,\n" "out float4 ocol0 : SV_Target,\n"
"in float4 pos : POSITION,\n" "in float4 pos : SV_Position,\n"
"in float2 uv0 : TEXCOORD0){\n" "in float2 uv0 : TEXCOORD0){\n"
"ocol0 = tex2D(samp0,uv0);\n" "ocol0 = Tex0.Sample(samp0,uv0);\n"
"}\n" "}\n"
}; };
const char color_matrix_program_code[] = { const char color_matrix_program_code[] = {
"uniform sampler samp0 : register(s0);\n" "sampler samp0 : register(s0);\n"
"Texture2D Tex0 : register(t0);\n"
"uniform float4 cColMatrix[5] : register(c0);\n" "uniform float4 cColMatrix[5] : register(c0);\n"
"void main(\n" "void main(\n"
"out float4 ocol0 : COLOR0,\n" "out float4 ocol0 : SV_Target,\n"
"in float4 pos : POSITION,\n" "in float4 pos : SV_Position,\n"
" in float2 uv0 : TEXCOORD0){\n" " in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0);\n" "float4 texcol = Tex0.Sample(samp0,uv0);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n" "}\n"
}; };
const char depth_matrix_program[] = { const char depth_matrix_program[] = {
"uniform sampler samp0 : register(s0);\n" "sampler samp0 : register(s0);\n"
"Texture2D Tex0 : register(t0);\n"
"uniform float4 cColMatrix[5] : register(c0);\n" "uniform float4 cColMatrix[5] : register(c0);\n"
"void main(\n" "void main(\n"
"out float4 ocol0 : COLOR0,\n" "out float4 ocol0 : SV_Target,\n"
" in float4 pos : POSITION,\n" " in float4 pos : SV_Position,\n"
" in float2 uv0 : TEXCOORD0){\n" " in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0);\n" "float4 texcol = Tex0.Sample(samp0,uv0);\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
@ -124,6 +127,11 @@ unsigned int ps_constant_offset_table[] = {
108, 112, // C_FOG, 8 108, 112, // C_FOG, 8
}; };
void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
{
if(D3D::gfxstate->psconstants[ps_constant_offset_table[const_number] ] != f1
|| D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]+1] != f2
|| D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]+2] != f3
|| D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]+3] != f4)
{ {
D3D::gfxstate->psconstants[ps_constant_offset_table[const_number] ] = f1; D3D::gfxstate->psconstants[ps_constant_offset_table[const_number] ] = f1;
D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]+1] = f2; D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]+1] = f2;
@ -131,18 +139,25 @@ void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, fl
D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]+3] = f4; D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]+3] = f4;
D3D::gfxstate->pscbufchanged = true; D3D::gfxstate->pscbufchanged = true;
} }
}
void SetPSConstant4fv(unsigned int const_number, const float* f) void SetPSConstant4fv(unsigned int const_number, const float* f)
{
if(memcmp(&D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]], f, sizeof(float)*4))
{ {
memcpy(&D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]], f, sizeof(float)*4); memcpy(&D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]], f, sizeof(float)*4);
D3D::gfxstate->pscbufchanged = true; D3D::gfxstate->pscbufchanged = true;
} }
}
void SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float* f) void SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float* f)
{
if(memcmp(&D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]], f, sizeof(float)*4*count))
{ {
memcpy(&D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]], f, sizeof(float)*4*count); memcpy(&D3D::gfxstate->psconstants[ps_constant_offset_table[const_number]], f, sizeof(float)*4*count);
D3D::gfxstate->pscbufchanged = true; D3D::gfxstate->pscbufchanged = true;
} }
}
// this class will load the precompiled shaders into our cache // this class will load the precompiled shaders into our cache
class PixelShaderCacheInserter : public LinearDiskCacheReader { class PixelShaderCacheInserter : public LinearDiskCacheReader {

View File

@ -50,6 +50,11 @@ ID3D11InputLayout* VertexShaderCache::GetClearInputLayout() { return ClearLayout
// maps the constant numbers to float indices in the constant buffer // maps the constant numbers to float indices in the constant buffer
unsigned int vs_constant_offset_table[238]; unsigned int vs_constant_offset_table[238];
void SetVSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) void SetVSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
{
if(D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number] ] != f1
|| D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]+1] != f2
|| D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]+2] != f3
|| D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]+3] != f4)
{ {
D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number] ] = f1; D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number] ] = f1;
D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]+1] = f2; D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]+1] = f2;
@ -57,12 +62,16 @@ void SetVSConstant4f(unsigned int const_number, float f1, float f2, float f3, fl
D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]+3] = f4; D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]+3] = f4;
D3D::gfxstate->vscbufchanged = true; D3D::gfxstate->vscbufchanged = true;
} }
}
void SetVSConstant4fv(unsigned int const_number, const float* f) void SetVSConstant4fv(unsigned int const_number, const float* f)
{
if(memcmp(&D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]], f, sizeof(float)*4))
{ {
memcpy(&D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]], f, sizeof(float)*4); memcpy(&D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]], f, sizeof(float)*4);
D3D::gfxstate->vscbufchanged = true; D3D::gfxstate->vscbufchanged = true;
} }
}
void SetMultiVSConstant3fv(unsigned int const_number, unsigned int count, const float* f) void SetMultiVSConstant3fv(unsigned int const_number, unsigned int count, const float* f)
{ {
@ -71,13 +80,17 @@ void SetMultiVSConstant3fv(unsigned int const_number, unsigned int count, const
memcpy(&D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number+i]], f+3*i, sizeof(float)*3); memcpy(&D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number+i]], f+3*i, sizeof(float)*3);
D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number+i]+3] = 0.f; D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number+i]+3] = 0.f;
} }
D3D::gfxstate->vscbufchanged = true;
} }
void SetMultiVSConstant4fv(unsigned int const_number, unsigned int count, const float* f) void SetMultiVSConstant4fv(unsigned int const_number, unsigned int count, const float* f)
{
if(memcmp(&D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]], f, sizeof(float)*4*count))
{ {
memcpy(&D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]], f, sizeof(float)*4*count); memcpy(&D3D::gfxstate->vsconstants[vs_constant_offset_table[const_number]], f, sizeof(float)*4*count);
D3D::gfxstate->vscbufchanged = true; D3D::gfxstate->vscbufchanged = true;
} }
}
// this class will load the precompiled shaders into our cache // this class will load the precompiled shaders into our cache
class VertexShaderCacheInserter : public LinearDiskCacheReader { class VertexShaderCacheInserter : public LinearDiskCacheReader {