Shader Disk Cache implementation for D3D. Saves generated shaders on disk. Eliminates "freeze jerks" in D3D plugin the _second_ and later times you play something.... not much to do about the first time. The D3D shader compiler is just slow.

Also assorted cleanup around the shader code.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4869 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard
2010-01-17 17:44:09 +00:00
parent f599fdcec5
commit 3e01152793
24 changed files with 551 additions and 265 deletions

View File

@ -720,6 +720,14 @@
RelativePath=".\Src\IniFile.h"
>
</File>
<File
RelativePath=".\Src\LinearDiskCache.cpp"
>
</File>
<File
RelativePath=".\Src\LinearDiskCache.h"
>
</File>
<File
RelativePath=".\Src\MathUtil.cpp"
>

View File

@ -66,6 +66,7 @@
#define GAMECONFIG_DIR "GameConfig"
#define MAPS_DIR "Maps"
#define CACHE_DIR "Cache"
#define SHADERCACHE_DIR "ShaderCache"
#define STATESAVES_DIR "StateSaves"
#define SCREENSHOTS_DIR "ScreenShots"
#define DUMP_DIR "Dump"
@ -128,6 +129,7 @@
#define FULL_CONFIG_DIR FULL_USERDATA_DIR CONFIG_DIR DIR_SEP
#define FULL_CACHE_DIR FULL_USERDATA_DIR CACHE_DIR DIR_SEP
#define FULL_SHADERCACHE_DIR FULL_USERDATA_DIR SHADERCACHE_DIR DIR_SEP
#define FULL_STATESAVES_DIR FULL_USERDATA_DIR STATESAVES_DIR DIR_SEP
#define FULL_SCREENSHOTS_DIR FULL_USERDATA_DIR SCREENSHOTS_DIR DIR_SEP
#define FULL_FRAMES_DIR FULL_USERDATA_DIR DUMP_DIR DIR_SEP DUMP_FRAMES_DIR

View File

@ -0,0 +1,149 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "LinearDiskCache.h"
static const char ID[4] = {'D', 'C', 'A', 'C'};
const int version = 1; // TODO: Get from SVN_REV
LinearDiskCache::LinearDiskCache()
: file_(NULL), num_entries_(0) {
}
void LinearDiskCache::WriteHeader() {
fwrite(ID, 4, 1, file_);
fwrite(&version, 4, 1, file_);
}
bool LinearDiskCache::ValidateHeader() {
char header_id[4];
int header_version;
fread(&header_id, 4, 1, file_);
fread(&header_version, 4, 1, file_);
if (memcmp(header_id, ID, 4) != 0)
return false;
if (header_version != version)
return false;
return true;
}
int LinearDiskCache::OpenAndRead(const char *filename, LinearDiskCacheReader *reader) {
int items_read_count = 0;
file_ = fopen(filename, "rb");
int file_size = 0;
if (file_) {
fseek(file_, 0, SEEK_END);
file_size = (int)ftell(file_);
}
bool file_corrupt = false;
if (file_size == 0) {
if (file_)
fclose(file_);
// Reopen for writing.
file_ = fopen(filename, "wb");
// Cache empty, let's initialize a header.
WriteHeader();
num_entries_ = 0;
} else {
// file_ must be != 0 here.
// Back to the start we go.
fseek(file_, 0, SEEK_SET);
// Check that the file is valid
if (!ValidateHeader()) {
// Not valid - delete the file and start over.
fclose(file_);
unlink(filename);
PanicAlert("LinearDiskCache file header broken.");
file_ = fopen(filename, "wb");
WriteHeader();
num_entries_ = 0;
} else {
// Valid - blow through it.
// We're past the header already thanks to ValidateHeader.
while (!feof(file_)) {
int key_size, value_size;
int key_size_size = fread(&key_size, 1, sizeof(key_size), file_);
int value_size_size = fread(&value_size, 1, sizeof(value_size), file_);
if (key_size_size == 0 && value_size_size == 0) {
// I guess feof isn't doing it's job - we're at the end.
break;
}
if (key_size <= 0 || value_size < 0 || key_size_size != 4 || value_size_size != 4) {
PanicAlert("Disk cache file %s corrupted/truncated! ks: %i vs %i kss %i vss %i", filename,
key_size, value_size, key_size_size, value_size_size);
file_corrupt = true;
break;
}
u8 *key = new u8[key_size];
u8 *value = new u8[value_size];
int actual_key_size = (int)fread(key, 1, key_size, file_);
int actual_value_size = (int)fread(value, 1, value_size, file_);
if (actual_key_size != key_size || actual_value_size != value_size) {
PanicAlert("Disk cache file %s corrupted/truncated! ks: %i actual ks: %i vs: %i actual vs: %i", filename,
key_size, actual_key_size, value_size, actual_value_size);
file_corrupt = true;
} else {
reader->Read(key, key_size, value, value_size);
items_read_count++;
}
delete [] key;
delete [] value;
}
fclose(file_);
// Done reading.
// Reopen file for append.
// At this point, ftell() will be at the end of the file,
// which happens to be exactly what we want.
file_ = fopen(filename, "ab");
fseek(file_, 0, SEEK_END);
}
}
if (file_corrupt) {
// Restore sanity, start over.
fclose(file_);
unlink(filename);
file_ = fopen(filename, "wb+");
WriteHeader();
}
return items_read_count;
}
void LinearDiskCache::Append(
const u8 *key, int key_size, const u8 *value, int value_size) {
// Should do a check that we don't already have "key"?
fwrite(&key_size, 1, sizeof(key_size), file_);
fwrite(&value_size, 1, sizeof(value_size), file_);
fwrite(key, 1, key_size, file_);
fwrite(value, 1, value_size, file_);
}
void LinearDiskCache::Sync() {
fflush(file_);
}
void LinearDiskCache::Close() {
fclose(file_);
file_ = 0;
num_entries_ = 0;
}

View File

@ -0,0 +1,67 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _LINEAR_DISKCACHE
#define _LINEAR_DISKCACHE
#include "Common.h"
#include <stdlib.h>
#include <stdio.h>
// On disk format:
// uint32 'DCAC'
// uint32 version; // svn_rev
// uint32 key_length;
// uint32 value_length;
// .... key;
// .... value;
class LinearDiskCacheReader {
public:
virtual void Read(const u8 *key, int key_size, const u8 *value, int value_size) = 0;
};
// Dead simple unsorted key-value store with append functionality.
// No random read functionality, all reading is done in OpenAndRead.
// Keys and values can contain any characters, including \0.
//
// Suitable for caching generated shader bytecode between executions.
// Not tuned for extreme performance but should be reasonably fast.
// Does not support keys or values larger than 2GB, which should be reasonable.
// Keys must have non-zero length; values can have zero length.
class LinearDiskCache {
public:
LinearDiskCache();
// Returns the number of items read from the cache.
int OpenAndRead(const char *filename, LinearDiskCacheReader *reader);
void Close();
void Sync();
// Appends a key-value pair to the store.
void Append(const u8 *key, int key_size, const u8 *value, int value_size);
private:
void WriteHeader();
bool ValidateHeader();
FILE *file_;
int num_entries_;
};
#endif // _LINEAR_DISKCACHE

View File

@ -232,8 +232,8 @@ inline u32 ConvertToSingle(u64 x)
}
else
{
// this is said to be undefined
// based on hardware tests
// This is said to be undefined.
// The code is based on hardware tests.
return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff);
}
}

View File

@ -31,7 +31,7 @@ PIXELSHADERUID last_pixel_shader_uid;
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
// It would likely be a lot more efficient to build this incrementally as the attributes
// are set...
void GetPixelShaderId(PIXELSHADERUID &uid, u32 texturemask, u32 dstAlphaEnable)
void GetPixelShaderId(PIXELSHADERUID *uid, u32 texturemask, u32 dstAlphaEnable)
{
u32 projtexcoords = 0;
for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; i++)
@ -43,7 +43,7 @@ void GetPixelShaderId(PIXELSHADERUID &uid, u32 texturemask, u32 dstAlphaEnable)
projtexcoords |= 1 << texcoord;
}
}
uid.values[0] = (u32)bpmem.genMode.numtevstages |
uid->values[0] = (u32)bpmem.genMode.numtevstages |
((u32)bpmem.genMode.numindstages << 4) |
((u32)bpmem.genMode.numtexgens << 7) |
((u32)dstAlphaEnable << 11) |
@ -51,21 +51,21 @@ void GetPixelShaderId(PIXELSHADERUID &uid, u32 texturemask, u32 dstAlphaEnable)
(projtexcoords << 20) |
((u32)bpmem.ztex2.op << 28);
uid.values[0] = (uid.values[0] & ~0x0ff00000) | (projtexcoords << 20);
uid->values[0] = (uid->values[0] & ~0x0ff00000) | (projtexcoords << 20);
// swap table
for (int i = 0; i < 8; i += 2)
((u8*)&uid.values[1])[i / 2] = (bpmem.tevksel[i].hex & 0xf) | ((bpmem.tevksel[i + 1].hex & 0xf) << 4);
((u8*)&uid->values[1])[i / 2] = (bpmem.tevksel[i].hex & 0xf) | ((bpmem.tevksel[i + 1].hex & 0xf) << 4);
uid.values[2] = texturemask;
uid->values[2] = texturemask;
u32 enableZTexture = (!bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable)?1:0;
uid.values[3] = (u32)bpmem.fog.c_proj_fsel.fsel |
uid->values[3] = (u32)bpmem.fog.c_proj_fsel.fsel |
((u32)bpmem.fog.c_proj_fsel.proj << 3) |
((u32)enableZTexture << 4);
int hdr = 4;
u32* pcurvalue = &uid.values[hdr];
u32 *pcurvalue = &uid->values[hdr];
for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i)
{
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[i].colorC;
@ -119,7 +119,7 @@ void GetPixelShaderId(PIXELSHADERUID &uid, u32 texturemask, u32 dstAlphaEnable)
if ((bpmem.genMode.numtevstages % 3) != 2)
++pcurvalue;
uid.tevstages = (u32)(pcurvalue - &uid.values[0] - hdr);
uid->tevstages = (u32)(pcurvalue - &uid->values[0] - hdr);
for (u32 i = 0; i < bpmem.genMode.numindstages; ++i)
{
@ -134,7 +134,7 @@ void GetPixelShaderId(PIXELSHADERUID &uid, u32 texturemask, u32 dstAlphaEnable)
}
// yeah, well ....
uid.indstages = (u32)(pcurvalue - &uid.values[0] - (hdr - 1) - uid.tevstages);
uid->indstages = (u32)(pcurvalue - &uid->values[0] - (hdr - 1) - uid->tevstages);
}
// old tev->pixelshader notes
@ -385,7 +385,7 @@ static void BuildSwapModeTable()
}
}
const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL)
const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, u32 HLSL)
{
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
text[sizeof(text) - 1] = 0x7C; // canary

View File

@ -42,6 +42,7 @@
#define C_COLORMATRIX (C_FOG + 2)
#define PIXELSHADERUID_MAX_VALUES (5 + 32 + 6 + 11)
// DO NOT make anything in this class virtual.
class PIXELSHADERUID
{
public:
@ -100,8 +101,9 @@ public:
}
};
const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL = 0);
void GetPixelShaderId(PIXELSHADERUID &, u32 texturemask, u32 dstAlphaEnable);
const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, u32 HLSL = 0);
void GetPixelShaderId(PIXELSHADERUID *uid, u32 texturemask, u32 dstAlphaEnable);
extern PIXELSHADERUID last_pixel_shader_uid;
#endif // GCOGL_PIXELSHADER_H

View File

@ -25,7 +25,7 @@ namespace TextureConversionShader
{
u16 GetEncodedSampleCount(u32 format);
const char *GenerateEncodingShader(u32 format,bool HLSL = false);
const char *GenerateEncodingShader(u32 format, bool HLSL = false);
void SetShaderParameters(float width, float height, float offsetX, float offsetY, float widthStride, float heightStride,float buffW = 0.0f,float buffH = 0.0f);

View File

@ -29,27 +29,27 @@ VERTEXSHADERUID last_vertex_shader_uid;
// Mash together all the inputs that contribute to the code of a generated vertex shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components)
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components)
{
vid.values[0] = components |
uid->values[0] = components |
(xfregs.numTexGens << 23) |
(xfregs.nNumChans << 27) |
((u32)xfregs.bEnableDualTexTransform << 29);
for (int i = 0; i < 2; ++i) {
vid.values[1+i] = xfregs.colChans[i].color.enablelighting ?
uid->values[1+i] = xfregs.colChans[i].color.enablelighting ?
(u32)xfregs.colChans[i].color.hex :
(u32)xfregs.colChans[i].color.matsource;
vid.values[1+i] |= (xfregs.colChans[i].alpha.enablelighting ?
uid->values[1+i] |= (xfregs.colChans[i].alpha.enablelighting ?
(u32)xfregs.colChans[i].alpha.hex :
(u32)xfregs.colChans[i].alpha.matsource) << 15;
}
// fog
vid.values[1] |= (((u32)bpmem.fog.c_proj_fsel.fsel & 3) << 30);
vid.values[2] |= (((u32)bpmem.fog.c_proj_fsel.fsel >> 2) << 30);
uid->values[1] |= (((u32)bpmem.fog.c_proj_fsel.fsel & 3) << 30);
uid->values[2] |= (((u32)bpmem.fog.c_proj_fsel.fsel >> 2) << 30);
u32* pcurvalue = &vid.values[3];
u32 *pcurvalue = &uid->values[3];
for (int i = 0; i < xfregs.numTexGens; ++i) {
TexMtxInfo tinfo = xfregs.texcoords[i].texmtxinfo;
if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP)
@ -78,16 +78,16 @@ static char text[16384];
#define LIGHTS_POS ""
char *GenerateLightShader(char* p, int index, const LitChannel& chan, const char* dest, int coloralpha);
char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char *dest, int coloralpha);
const char *GenerateVertexShader(u32 components, bool D3D)
const char *GenerateVertexShaderCode(u32 components, bool D3D)
{
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
text[sizeof(text) - 1] = 0x7C; // canary
DVSTARTPROFILE();
_assert_( bpmem.genMode.numtexgens == xfregs.numTexGens);
_assert_( bpmem.genMode.numcolchans == xfregs.nNumChans);
_assert_(bpmem.genMode.numtexgens == xfregs.numTexGens);
_assert_(bpmem.genMode.numcolchans == xfregs.nNumChans);
u32 lightMask = 0;
if (xfregs.nNumChans > 0)
@ -125,9 +125,8 @@ const char *GenerateVertexShader(u32 components, bool D3D)
WRITE(p, "};\n");
// uniforms
// bool bTexMtx = ((components & VB_HAS_TEXMTXIDXALL)<<VB_HAS_UVTEXMTXSHIFT)!=0; unused TODO: keep?
WRITE(p, "uniform s_"I_TRANSFORMMATRICES" "I_TRANSFORMMATRICES" : register(c%d);\n", C_TRANSFORMMATRICES);
WRITE(p, "uniform s_"I_TRANSFORMMATRICES" "I_TRANSFORMMATRICES" : register(c%d);\n", C_TRANSFORMMATRICES);
WRITE(p, "uniform s_"I_TEXMATRICES" "I_TEXMATRICES" : register(c%d);\n", C_TEXMATRICES); // also using tex matrices
WRITE(p, "uniform s_"I_NORMALMATRICES" "I_NORMALMATRICES" : register(c%d);\n", C_NORMALMATRICES);
WRITE(p, "uniform s_"I_POSNORMALMATRIX" "I_POSNORMALMATRIX" : register(c%d);\n", C_POSNORMALMATRIX);
@ -406,7 +405,7 @@ const char *GenerateVertexShader(u32 components, bool D3D)
break;
}
if(xfregs.bEnableDualTexTransform && texinfo.texgentype == XF_TEXGEN_REGULAR) { // only works for regular tex gen types?
if (xfregs.bEnableDualTexTransform && texinfo.texgentype == XF_TEXGEN_REGULAR) { // only works for regular tex gen types?
int postidx = xfregs.texcoords[i].postmtxinfo.index;
WRITE(p, "float4 P0 = "I_POSTTRANSFORMMATRICES".T[%d].t;\n"
"float4 P1 = "I_POSTTRANSFORMMATRICES".T[%d].t;\n"
@ -461,7 +460,7 @@ const char *GenerateVertexShader(u32 components, bool D3D)
}
// coloralpha - 1 if color, 2 if alpha
char* GenerateLightShader(char* p, int index, const LitChannel& chan, const char* dest, int coloralpha)
char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char *dest, int coloralpha)
{
const char* swizzle = "xyzw";
if (coloralpha == 1 ) swizzle = "xyz";

View File

@ -99,8 +99,10 @@ public:
}
};
const char *GenerateVertexShader(u32 components, bool D3D);
void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components);
// components is included in the uid.
const char *GenerateVertexShaderCode(u32 components, bool D3D);
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components);
extern VERTEXSHADERUID last_vertex_shader_uid;
#endif // GCOGL_VERTEXSHADER_H