Merge branch 'master' into zcomploc-support

Conflicts:
	Source/Core/VideoCommon/Src/PixelShaderGen.cpp
This commit is contained in:
Pierre Bourdon
2012-03-22 15:21:52 +01:00
854 changed files with 42647 additions and 21611 deletions

View File

@ -15,6 +15,10 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#if defined(__FreeBSD__)
#define __STDC_CONSTANT_MACROS 1
#endif
#include "AVIDump.h"
#include "HW/VideoInterface.h" //for TargetRefreshRate
#include "VideoConfig.h"
@ -259,12 +263,10 @@ bool AVIDump::CreateFile()
s_Stream->codec->bit_rate = 400000;
s_Stream->codec->width = s_width;
s_Stream->codec->height = s_height;
s_Stream->codec->time_base = (AVRational){1, VideoInterface::TargetRefreshRate};
s_Stream->codec->time_base = (AVRational){1, static_cast<int>(VideoInterface::TargetRefreshRate)};
s_Stream->codec->gop_size = 12;
s_Stream->codec->pix_fmt = g_Config.bUseFFV1 ? PIX_FMT_BGRA : PIX_FMT_YUV420P;
av_set_parameters(s_FormatContext, NULL);
if (!(codec = avcodec_find_encoder(s_Stream->codec->codec_id)) ||
(avcodec_open(s_Stream->codec, codec) < 0))
{
@ -283,14 +285,14 @@ bool AVIDump::CreateFile()
s_OutBuffer = new uint8_t[s_size];
NOTICE_LOG(VIDEO, "Opening file %s for dumping", s_FormatContext->filename);
if (url_fopen(&s_FormatContext->pb, s_FormatContext->filename, URL_WRONLY) < 0)
if (avio_open(&s_FormatContext->pb, s_FormatContext->filename, AVIO_FLAG_WRITE) < 0)
{
WARN_LOG(VIDEO, "Could not open %s", s_FormatContext->filename);
CloseFile();
return false;
}
av_write_header(s_FormatContext);
avformat_write_header(s_FormatContext, NULL);
return true;
}
@ -370,7 +372,7 @@ void AVIDump::CloseFile()
if (s_FormatContext)
{
if (s_FormatContext->pb)
url_fclose(s_FormatContext->pb);
avio_close(s_FormatContext->pb);
av_free(s_FormatContext);
s_FormatContext = NULL;
}

View File

@ -44,9 +44,24 @@ void SetGenerationMode(const BPCmd &bp)
g_renderer->SetGenerationMode();
}
void SetScissor(const BPCmd &bp)
void SetScissor()
{
g_renderer->SetScissorRect();
const int xoff = bpmem.scissorOffset.x * 2 - 342;
const int yoff = bpmem.scissorOffset.y * 2 - 342;
EFBRectangle rc (bpmem.scissorTL.x - xoff - 342, bpmem.scissorTL.y - yoff - 342,
bpmem.scissorBR.x - xoff - 341, bpmem.scissorBR.y - yoff - 341);
if (rc.left < 0) rc.left = 0;
if (rc.top < 0) rc.top = 0;
if (rc.right > EFB_WIDTH) rc.right = EFB_WIDTH;
if (rc.bottom > EFB_HEIGHT) rc.bottom = EFB_HEIGHT;
if (rc.left > rc.right) rc.right = rc.left;
if (rc.top > rc.bottom) rc.bottom = rc.top;
TargetRectangle trc = g_renderer->ConvertEFBRectangle(rc);
g_renderer->SetScissorRect(trc);
}
void SetLineWidth(const BPCmd &bp)

View File

@ -38,7 +38,7 @@ enum
void FlushPipeline();
void SetGenerationMode(const BPCmd &bp);
void SetScissor(const BPCmd &bp);
void SetScissor();
void SetLineWidth(const BPCmd &bp);
void SetDepthMode(const BPCmd &bp);
void SetBlendMode(const BPCmd &bp);

View File

@ -61,7 +61,7 @@ void BPReload()
// Cases in which we DON'T want to reload the BP
continue;
default:
BPCmd bp = {i, 0xFFFFFF, ((u32*)&bpmem)[i]};
BPCmd bp = {i, 0xFFFFFF, static_cast<int>(((u32*)&bpmem)[i])};
BPWritten(bp);
}
}

View File

@ -65,10 +65,10 @@
#define BPMEM_UNKOWN_57 0x57
#define BPMEM_REVBITS 0x58
#define BPMEM_SCISSOROFFSET 0x59
#define BPMEM_UNKNOWN_60 0x60
#define BPMEM_UNKNOWN_61 0x61
#define BPMEM_UNKNOWN_62 0x62
#define BPMEM_TEXMODESYNC 0x63
#define BPMEM_PRELOAD_ADDR 0x60
#define BPMEM_PRELOAD_TMEMEVEN 0x61
#define BPMEM_PRELOAD_TMEMODD 0x62
#define BPMEM_PRELOAD_MODE 0x63
#define BPMEM_LOADTLUT0 0x64
#define BPMEM_LOADTLUT1 0x65
#define BPMEM_TEXINVALIDATE 0x66
@ -487,10 +487,10 @@ union TexImage1
{
struct
{
u32 tmem_offset : 15; // we ignore texture caching for now, we do it ourselves
u32 cache_width : 3;
u32 tmem_even : 15; // tmem line index for even LODs
u32 cache_width : 3;
u32 cache_height : 3;
u32 image_type : 1;
u32 image_type : 1; // 1 if this texture is managed manually (0 means we'll autofetch the texture data whenever it changes)
};
u32 hex;
};
@ -499,7 +499,7 @@ union TexImage2
{
struct
{
u32 tmem_offset : 15; // we ignore texture caching for now, we do it ourselves
u32 tmem_odd : 15; // tmem line index for odd LODs
u32 cache_width : 3;
u32 cache_height : 3;
};
@ -893,6 +893,25 @@ union UPE_Copy
}
};
union BPU_PreloadTileInfo
{
u32 hex;
struct {
u32 count : 15;
u32 type : 2;
};
};
struct BPS_TmemConfig
{
u32 preload_addr;
u32 preload_tmem_even;
u32 preload_tmem_odd;
BPU_PreloadTileInfo preload_tile_info;
u32 tlut_src;
u32 tlut_dest;
u32 texinvalidate;
};
// All of BP memory
@ -951,10 +970,8 @@ struct BPMemory
u32 boundbox1;//56
u32 unknown7[2];//57,58
X10Y10 scissorOffset; //59
u32 unknown8[10]; //5a,5b,5c,5d, 5e,5f,60,61, 62, 63 (GXTexModeSync), 0x60-0x63 have to do with preloaded textures?
u32 tlutXferSrc; //64
u32 tlutXferDest; //65
u32 texinvalidate;//66
u32 unknown8[6]; //5a,5b,5c,5d, 5e,5f
BPS_TmemConfig tmem_config; // 60-66
u32 metric; //67
FieldMode fieldmode;//68
u32 unknown10[7];//69-6F

View File

@ -30,6 +30,7 @@
#include "VertexLoader.h"
#include "VertexShaderManager.h"
#include "Thread.h"
#include "HW/Memmap.h"
using namespace BPFunctions;
@ -37,6 +38,8 @@ u32 mapTexAddress;
bool mapTexFound;
int numWrites;
extern volatile bool g_bSkipCurrentFrame;
static const float s_gammaLUT[] =
{
1.0f,
@ -162,7 +165,7 @@ void BPWritten(const BPCmd& bp)
case BPMEM_SCISSORTL: // Scissor Rectable Top, Left
case BPMEM_SCISSORBR: // Scissor Rectable Bottom, Right
case BPMEM_SCISSOROFFSET: // Scissor Offset
SetScissor(bp);
SetScissor();
break;
case BPMEM_LINEPTWIDTH: // Line Width
SetLineWidth(bp);
@ -257,9 +260,8 @@ void BPWritten(const BPCmd& bp)
// We should be able to get away with deactivating the current bbox tracking
// here. Not sure if there's a better spot to put this.
// the number of lines copied is determined by the y scale * source efb height
#ifdef BBOX_SUPPORT
PixelEngine::bbox_active = false;
#endif
float yScale;
if (PE_copy.scale_invert)
@ -300,14 +302,14 @@ void BPWritten(const BPCmd& bp)
// TODO - figure out a cleaner way.
if (GetConfig(CONFIG_ISWII))
ptr = GetPointer(bpmem.tlutXferSrc << 5);
ptr = GetPointer(bpmem.tmem_config.tlut_src << 5);
else
ptr = GetPointer((bpmem.tlutXferSrc & 0xFFFFF) << 5);
ptr = GetPointer((bpmem.tmem_config.tlut_src & 0xFFFFF) << 5);
if (ptr)
memcpy_gc(texMem + tlutTMemAddr, ptr, tlutXferCount);
else
PanicAlert("Invalid palette pointer %08x %08x %08x", bpmem.tlutXferSrc, bpmem.tlutXferSrc << 5, (bpmem.tlutXferSrc & 0xFFFFF)<< 5);
PanicAlert("Invalid palette pointer %08x %08x %08x", bpmem.tmem_config.tlut_src, bpmem.tmem_config.tlut_src << 5, (bpmem.tmem_config.tlut_src & 0xFFFFF)<< 5);
// TODO(ector) : kill all textures that use this palette
// Not sure if it's a good idea, though. For now, we hash texture palettes
@ -400,28 +402,31 @@ void BPWritten(const BPCmd& bp)
case BPMEM_CLEARBBOX1:
case BPMEM_CLEARBBOX2:
{
#ifdef BBOX_SUPPORT
// which is which? these are GUESSES!
if (bp.address == BPMEM_CLEARBBOX1) {
int right = bp.newvalue >> 10;
int left = bp.newvalue & 0x3ff;
// We should only set these if bbox is calculated properly.
PixelEngine::bbox[0] = left;
PixelEngine::bbox[1] = right;
PixelEngine::bbox_active = true;
// WARN_LOG(VIDEO, "ClearBBox LR: %i, %08x - %i, %i", bp.address, bp.newvalue, left, right);
} else {
int bottom = bp.newvalue >> 10;
int top = bp.newvalue & 0x3ff;
if(g_ActiveConfig.bUseBBox)
{
// Don't compute bounding box if this frame is being skipped!
// Wrong but valid values are better than bogus values...
if(g_bSkipCurrentFrame)
break;
// We should only set these if bbox is calculated properly.
PixelEngine::bbox[2] = top;
PixelEngine::bbox[3] = bottom;
PixelEngine::bbox_active = true;
// WARN_LOG(VIDEO, "ClearBBox TB: %i, %08x - %i, %i", bp.address, bp.newvalue, top, bottom);
if (bp.address == BPMEM_CLEARBBOX1) {
int right = bp.newvalue >> 10;
int left = bp.newvalue & 0x3ff;
// We should only set these if bbox is calculated properly.
PixelEngine::bbox[0] = left;
PixelEngine::bbox[1] = right;
PixelEngine::bbox_active = true;
} else {
int bottom = bp.newvalue >> 10;
int top = bp.newvalue & 0x3ff;
// We should only set these if bbox is calculated properly.
PixelEngine::bbox[2] = top;
PixelEngine::bbox[3] = bottom;
PixelEngine::bbox_active = true;
}
}
#endif
}
break;
case BPMEM_TEXINVALIDATE: // Used, if game has manual control the Texture Cache, which we don't allow
@ -462,14 +467,22 @@ void BPWritten(const BPCmd& bp)
DEBUG_LOG(VIDEO, "Uknown BP Reg 0x57: %08x", bp.newvalue);
break;
case BPMEM_UNKNOWN_60:
case BPMEM_UNKNOWN_61:
case BPMEM_UNKNOWN_62:
// Cases added due to: http://code.google.com/p/dolphin-emu/issues/detail?id=360#c90
// Are these related to BBox?
case BPMEM_PRELOAD_ADDR:
case BPMEM_PRELOAD_TMEMEVEN:
case BPMEM_PRELOAD_TMEMODD: // Used when PRELOAD_MODE is set
break;
case BPMEM_TEXMODESYNC: // Always set to 0 when GX_TexModeSync() is called.
case BPMEM_PRELOAD_MODE: // Set to 0 when GX_TexModeSync() is called.
// if this is different from 0, manual TMEM management is used.
if (bp.newvalue != 0)
{
// NOTE(neobrain): Apparently tmemodd doesn't affect hardware behavior at all (libogc uses it just as a buffer and switches its contents with tmemeven whenever this is called)
BPS_TmemConfig& tmem_cfg = bpmem.tmem_config;
u8* ram_ptr = Memory::GetPointer(tmem_cfg.preload_addr << 5);
u32 tmem_addr = tmem_cfg.preload_tmem_even * TMEM_LINE_SIZE;
u32 size = tmem_cfg.preload_tile_info.count * 32;
memcpy(texMem + tmem_addr, ram_ptr, size);
}
break;
// ------------------------------------------------

View File

@ -56,11 +56,12 @@ static bool bProcessFifoToLoWatermark = false;
static bool bProcessFifoAllDistance = false;
volatile bool isPossibleWaitingSetDrawDone = false;
volatile bool isHiWatermarkActive = false;
volatile bool interruptSet= false;
volatile bool interruptWaiting= false;
volatile bool interruptTokenWaiting = false;
volatile bool interruptFinishWaiting = false;
volatile bool OnOverflow = false;
volatile bool waitingForPEInterruptDisable = false;
bool IsOnThread()
{
@ -86,13 +87,12 @@ void DoState(PointerWrap &p)
p.Do(bProcessFifoToLoWatermark);
p.Do(bProcessFifoAllDistance);
p.Do(isHiWatermarkActive);
p.Do(isPossibleWaitingSetDrawDone);
p.Do(interruptSet);
p.Do(interruptWaiting);
p.Do(interruptTokenWaiting);
p.Do(interruptFinishWaiting);
p.Do(OnOverflow);
}
inline void WriteLow (volatile u32& _reg, u16 lowbits) {Common::AtomicStore(_reg,(_reg & 0xFFFF0000) | lowbits);}
@ -109,6 +109,8 @@ void Init()
m_CPCtrlReg.Hex = 0;
m_CPClearReg.Hex = 0;
m_bboxleft = 0;
m_bboxtop = 0;
m_bboxright = 640;
@ -130,14 +132,17 @@ void Init()
interruptFinishWaiting = false;
interruptTokenWaiting = false;
bProcessFifoToLoWatermark = false;
bProcessFifoAllDistance = false;
isPossibleWaitingSetDrawDone = false;
isHiWatermarkActive = false;
et_UpdateInterrupts = CoreTiming::RegisterEvent("UpdateInterrupts", UpdateInterrupts_Wrapper);
}
void Read16(u16& _rReturnValue, const u32 _Address)
{
INFO_LOG(COMMANDPROCESSOR, "(r): 0x%08x", _Address);
ProcessFifoEvents();
switch (_Address & 0xFFF)
{
case STATUS_REGISTER:
@ -166,11 +171,23 @@ void Read16(u16& _rReturnValue, const u32 _Address)
case FIFO_LO_WATERMARK_HI: _rReturnValue = ReadHigh(fifo.CPLoWatermark); return;
case FIFO_RW_DISTANCE_LO:
_rReturnValue = ReadLow (fifo.CPReadWriteDistance);
if (IsOnThread())
if(fifo.CPWritePointer >= fifo.SafeCPReadPointer)
_rReturnValue = ReadLow (fifo.CPWritePointer - fifo.SafeCPReadPointer);
else
_rReturnValue = ReadLow (fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer - fifo.CPBase + 32);
else
_rReturnValue = ReadLow (fifo.CPReadWriteDistance);
DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_RW_DISTANCE_LO : %04x", _rReturnValue);
return;
case FIFO_RW_DISTANCE_HI:
_rReturnValue = ReadHigh(fifo.CPReadWriteDistance);
if (IsOnThread())
if(fifo.CPWritePointer >= fifo.SafeCPReadPointer)
_rReturnValue = ReadHigh (fifo.CPWritePointer - fifo.SafeCPReadPointer);
else
_rReturnValue = ReadHigh (fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer - fifo.CPBase + 32);
else
_rReturnValue = ReadHigh(fifo.CPReadWriteDistance);
DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_RW_DISTANCE_HI : %04x", _rReturnValue);
return;
case FIFO_WRITE_POINTER_LO:
@ -304,6 +321,7 @@ void Write16(const u16 _Value, const u32 _Address)
UCPClearReg tmpCtrl(_Value);
m_CPClearReg.Hex = tmpCtrl.Hex;
DEBUG_LOG(COMMANDPROCESSOR,"\t write to CLEAR_REGISTER : %04x", _Value);
SetCpClearRegister();
}
break;
@ -350,6 +368,7 @@ void Write16(const u16 _Value, const u32 _Address)
break;
case FIFO_READ_POINTER_HI:
WriteHigh((u32 &)fifo.CPReadPointer, _Value);
fifo.SafeCPReadPointer = fifo.CPReadPointer;
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_READ_POINTER_HI : %04x", _Value);
break;
@ -382,10 +401,6 @@ void Write16(const u16 _Value, const u32 _Address)
case FIFO_RW_DISTANCE_HI:
WriteHigh((u32 &)fifo.CPReadWriteDistance, _Value);
DEBUG_LOG(COMMANDPROCESSOR,"try to write to FIFO_RW_DISTANCE_HI : %04x", _Value);
break;
case FIFO_RW_DISTANCE_LO:
WriteLow((u32 &)fifo.CPReadWriteDistance, _Value & 0xFFE0);
if (fifo.CPReadWriteDistance == 0)
{
GPFifo::ResetGatherPipe();
@ -395,6 +410,10 @@ void Write16(const u16 _Value, const u32 _Address)
ResetVideoBuffer();
}
IncrementCheckContextId();
DEBUG_LOG(COMMANDPROCESSOR,"try to write to FIFO_RW_DISTANCE_HI : %04x", _Value);
break;
case FIFO_RW_DISTANCE_LO:
WriteLow((u32 &)fifo.CPReadWriteDistance, _Value & 0xFFE0);
DEBUG_LOG(COMMANDPROCESSOR,"try to write to FIFO_RW_DISTANCE_LO : %04x", _Value);
break;
@ -404,7 +423,6 @@ void Write16(const u16 _Value, const u32 _Address)
if (!IsOnThread())
RunGpu();
ProcessFifoEvents();
}
void Read32(u32& _rReturnValue, const u32 _Address)
@ -426,6 +444,19 @@ void STACKALIGN GatherPipeBursted()
{
if (!IsOnThread())
RunGpu();
else
{
// In multibuffer mode is not allowed write in the same fifo attached to the GPU.
// Fix Pokemon XD in DC mode.
if((ProcessorInterface::Fifo_CPUEnd == fifo.CPEnd) && (ProcessorInterface::Fifo_CPUBase == fifo.CPBase)
&& fifo.CPReadWriteDistance > 0)
{
waitingForPEInterruptDisable = true;
ProcessFifoAllDistance();
waitingForPEInterruptDisable = false;
}
}
return;
}
@ -441,26 +472,7 @@ void STACKALIGN GatherPipeBursted()
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
if (!IsOnThread())
{
RunGpu();
}
else
{
if(fifo.CPReadWriteDistance == fifo.CPEnd - fifo.CPBase - 32)
{
if(!OnOverflow)
NOTICE_LOG(COMMANDPROCESSOR,"FIFO is almost in overflown, BreakPoint: %i", fifo.bFF_Breakpoint);
OnOverflow = true;
while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable &&
fifo.CPReadWriteDistance > fifo.CPEnd - fifo.CPBase - 64)
Common::YieldCPU();
}
else
{
OnOverflow = false;
}
}
_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase,
"FIFO is overflown by GatherPipe !\nCPU thread is too fast!");
@ -501,17 +513,15 @@ void AbortFrame()
void SetOverflowStatusFromGatherPipe()
{
if (!fifo.bFF_HiWatermarkInt) return;
fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark);
fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark);
bool interrupt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt &&
m_CPCtrlReg.GPLinkEnable && m_CPCtrlReg.GPReadEnable;
isHiWatermarkActive = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt && m_CPCtrlReg.GPReadEnable;
if (interrupt != interruptSet && interrupt)
CommandProcessor::UpdateInterrupts(true);
if (isHiWatermarkActive)
{
interruptSet = true;
INFO_LOG(COMMANDPROCESSOR,"Interrupt set");
ProcessorInterface::SetInterrupt(INT_CAUSE_CP, true);
}
}
void SetCpStatus()
@ -519,14 +529,12 @@ void SetCpStatus()
// overflow & underflow check
fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark);
fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark);
// breakpoint
// breakpoint
if (fifo.bFF_BPEnable)
{
if (fifo.CPBreakpoint == fifo.CPReadPointer)
{
{
if (!fifo.bFF_Breakpoint)
{
INFO_LOG(COMMANDPROCESSOR, "Hit breakpoint at %i", fifo.CPReadPointer);
@ -554,13 +562,18 @@ void SetCpStatus()
bool interrupt = (bpInt || ovfInt || undfInt) && m_CPCtrlReg.GPReadEnable;
isHiWatermarkActive = ovfInt && m_CPCtrlReg.GPReadEnable;
if (interrupt != interruptSet && !interruptWaiting)
{
u64 userdata = interrupt?1:0;
if (IsOnThread())
{
interruptWaiting = true;
CommandProcessor::UpdateInterruptsFromVideoBackend(userdata);
if(!interrupt || bpInt || undfInt)
{
interruptWaiting = true;
CommandProcessor::UpdateInterruptsFromVideoBackend(userdata);
}
}
else
CommandProcessor::UpdateInterrupts(userdata);
@ -583,7 +596,7 @@ void ProcessFifoAllDistance()
if (IsOnThread())
{
while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable &&
fifo.CPReadWriteDistance && !AtBreakpoint())
fifo.CPReadWriteDistance && !AtBreakpoint() && !PixelEngine::WaitingForPEInterrupt())
Common::YieldCPU();
}
bProcessFifoAllDistance = false;
@ -603,13 +616,16 @@ void Shutdown()
void SetCpStatusRegister()
{
// Here always there is one fifo attached to the GPU
m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint;
m_CPStatusReg.ReadIdle = (fifo.CPReadPointer == fifo.CPWritePointer) || (fifo.CPReadPointer == fifo.CPBreakpoint);
m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance || (fifo.CPReadPointer == fifo.CPWritePointer) || (fifo.CPReadPointer == fifo.CPBreakpoint) ;
m_CPStatusReg.CommandIdle = !fifo.CPReadWriteDistance;
m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark;
m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark;
// HACK to compensate for slow response to PE interrupts in Time Splitters: Future Perfect
if (IsOnThread())
PixelEngine::ResumeWaitingForPEInterrupt();
INFO_LOG(COMMANDPROCESSOR,"\t Read from STATUS_REGISTER : %04x", m_CPStatusReg.Hex);
DEBUG_LOG(COMMANDPROCESSOR, "(r) status: iBP %s | fReadIdle %s | fCmdIdle %s | iOvF %s | iUndF %s"
, m_CPStatusReg.Breakpoint ? "ON" : "OFF"
@ -622,14 +638,14 @@ void SetCpStatusRegister()
void SetCpControlRegister()
{
// If the new fifo is being attached We make sure there wont be SetFinish event pending.
// This protection fix eternal darkness booting, because the second SetFinish event when it is booting
// seems invalid or has a bug and hang the game.
if (!fifo.bFF_GPReadEnable && m_CPCtrlReg.GPReadEnable && !m_CPCtrlReg.BPEnable)
{
PixelEngine::ResetSetFinish();
ProcessFifoEvents();
PixelEngine::ResetSetFinish();
}
fifo.bFF_BPInt = m_CPCtrlReg.BPInt;
@ -644,9 +660,6 @@ void SetCpControlRegister()
ProcessorInterface::Fifo_CPUBase = fifo.CPBase;
ProcessorInterface::Fifo_CPUEnd = fifo.CPEnd;
}
// If overflown happens process the fifo to LoWatemark
if (bProcessFifoToLoWatermark)
ProcessFifoToLoWatermark();
if(fifo.bFF_GPReadEnable && !m_CPCtrlReg.GPReadEnable)
{
@ -658,7 +671,6 @@ void SetCpControlRegister()
fifo.bFF_GPReadEnable = m_CPCtrlReg.GPReadEnable;
}
DEBUG_LOG(COMMANDPROCESSOR, "\t GPREAD %s | BP %s | Int %s | OvF %s | UndF %s | LINK %s"
, fifo.bFF_GPReadEnable ? "ON" : "OFF"
, fifo.bFF_BPEnable ? "ON" : "OFF"
@ -670,13 +682,15 @@ void SetCpControlRegister()
}
// NOTE: The implementation of this function should be correct, but we intentionally aren't using it at the moment.
// We don't emulate proper GP timing anyway at the moment, so this code would just slow down emulation.
void SetCpClearRegister()
{
if (IsOnThread())
{
if (!m_CPClearReg.ClearFifoUnderflow && m_CPClearReg.ClearFifoOverflow)
bProcessFifoToLoWatermark = true;
}
// if (IsOnThread())
// {
// if (!m_CPClearReg.ClearFifoUnderflow && m_CPClearReg.ClearFifoOverflow)
// bProcessFifoToLoWatermark = true;
// }
}
} // end of namespace CommandProcessor

View File

@ -25,18 +25,18 @@ class PointerWrap;
extern bool MT;
namespace CommandProcessor
{
extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread.
extern volatile bool isHiWatermarkActive;
extern volatile bool interruptSet;
extern volatile bool interruptWaiting;
extern volatile bool interruptTokenWaiting;
extern volatile bool interruptFinishWaiting;
extern volatile bool OnOverflow;
extern volatile bool waitingForPEInterruptDisable;
// internal hardware addresses
enum
{
@ -158,6 +158,7 @@ void UpdateInterruptsFromVideoBackend(u64 userdata);
bool AllowIdleSkipping();
void SetCpClearRegister();
void SetCpControlRegister();
void SetCpStatusRegister();
void SetOverflowStatusFromGatherPipe();

View File

@ -36,6 +36,9 @@ WNDCLASSEX wndClass;
const TCHAR m_szClassName[] = _T("DolphinEmuWnd");
int g_winstyle;
static volatile bool s_sizing;
static const int TITLE_TEXT_BUF_SIZE = 1024;
TCHAR m_titleTextBuffer[TITLE_TEXT_BUF_SIZE];
static const int WM_SETTEXT_CUSTOM = WM_USER + WM_SETTEXT;
bool IsSizing()
{
@ -225,6 +228,10 @@ LRESULT CALLBACK WndProc( HWND hWnd, UINT iMsg, WPARAM wParam, LPARAM lParam )
PostMessage(m_hParent, WM_USER, WM_USER_SETCURSOR, 0);
return true;
case WM_SETTEXT_CUSTOM:
SendMessage(hWnd, WM_SETTEXT, wParam, lParam);
break;
default:
return DefWindowProc(hWnd, iMsg, wParam, lParam);
}
@ -357,4 +364,27 @@ void SetSize(int width, int height)
MoveWindow(m_hWnd, rc.left, rc.top, rc.right-rc.left, rc.bottom-rc.top, TRUE);
}
void SetWindowText(const TCHAR* text)
{
// the simple way.
// we don't do this because it's a blocking call and the GUI thread might be waiting for us.
//::SetWindowText(m_hWnd, text);
// copy to m_titleTextBuffer in such a way that
// it remains null-terminated and without garbage data at every point in time,
// in case another thread reads it while we're doing this.
for (int i = 0; i < TITLE_TEXT_BUF_SIZE-1; ++i)
{
m_titleTextBuffer[i+1] = 0;
TCHAR c = text[i];
m_titleTextBuffer[i] = c;
if (!c)
break;
}
// the OS doesn't allow posting WM_SETTEXT,
// so we post our own message and convert it to that in WndProc
PostMessage(m_hWnd, WM_SETTEXT_CUSTOM, 0, (LPARAM)m_titleTextBuffer);
}
}

View File

@ -14,6 +14,7 @@ void Close();
void SetSize(int displayWidth, int displayHeight);
bool IsSizing();
void OSDMenu(WPARAM wParam);
void SetWindowText(const TCHAR* text);
}

View File

@ -22,6 +22,7 @@
#include "Atomic.h"
#include "OpcodeDecoding.h"
#include "CommandProcessor.h"
#include "PixelEngine.h"
#include "ChunkFile.h"
#include "Fifo.h"
#include "HW/Memmap.h"
@ -137,8 +138,7 @@ void RunGpuLoop()
CommandProcessor::SetCpStatus();
// check if we are able to run this buffer
while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable &&
fifo.CPReadWriteDistance && (!AtBreakpoint() || CommandProcessor::OnOverflow))
while (GpuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() && !PixelEngine::WaitingForPEInterrupt())
{
if (!GpuRunningState) break;

View File

@ -21,7 +21,6 @@
#include "Common.h"
bool SaveTGA(const char* filename, int width, int height, void* pdata);
bool SaveTexture(const char* filename, u32 textarget, u32 tex, int width, int height);
bool SaveData(const char* filename, const char* pdata);
#endif // _IMAGEWRITE_H

View File

@ -21,6 +21,21 @@
#define WRITE p+=sprintf
int GetLightingShaderId(u32* out)
{
for (int i = 0; i < xfregs.numChan.numColorChans; ++i)
{
out[i] = xfregs.color[i].enablelighting ?
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
out[i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
_assert_(xfregs.numChan.numColorChans <= 2);
return xfregs.numChan.numColorChans;
}
// coloralpha - 1 if color, 2 if alpha
char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char* lightsName, int coloralpha)
{

View File

@ -18,6 +18,9 @@
#ifndef _LIGHTINGSHADERGEN_H_
#define _LIGHTINGSHADERGEN_H_
#include "CommonTypes.h"
int GetLightingShaderId(u32* out);
char *GenerateLightingShader(char *p, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest);
#endif // _LIGHTINGSHADERGEN_H_

View File

@ -171,6 +171,18 @@ u32 VideoBackendHardware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32
static volatile u32 s_doStateRequested = false;
void VideoBackendHardware::InitializeShared()
{
VideoCommon_Init();
s_swapRequested = 0;
s_efbAccessRequested = 0;
s_FifoShuttingDown = 0;
memset((void*)&s_beginFieldArgs, 0, sizeof(s_beginFieldArgs));
memset(&s_accessEFBArgs, 0, sizeof(s_accessEFBArgs));
s_AccessEFBResult = 0;
}
static volatile struct
{
unsigned char **ptr;
@ -238,6 +250,11 @@ bool VideoBackendHardware::Video_IsPossibleWaitingSetDrawDone()
return CommandProcessor::isPossibleWaitingSetDrawDone;
}
bool VideoBackendHardware::Video_IsHiWatermarkActive()
{
return CommandProcessor::isHiWatermarkActive;
}
void VideoBackendHardware::Video_AbortFrame()
{
CommandProcessor::AbortFrame();

View File

@ -31,6 +31,7 @@
#include "CommandProcessor.h"
#include "HW/ProcessorInterface.h"
#include "DLCache.h"
#include "State.h"
namespace PixelEngine
{
@ -154,6 +155,16 @@ void SetFinish_OnMainThread(u64 userdata, int cyclesLate);
void Init()
{
m_Control.Hex = 0;
m_ZConf.Hex = 0;
m_AlphaConf.Hex = 0;
m_DstAlphaConf.Hex = 0;
m_AlphaModeConf.Hex = 0;
m_AlphaRead.Hex = 0;
g_bSignalTokenInterrupt = false;
g_bSignalFinishInterrupt = false;
interruptSetToken = false;
interruptSetFinish = false;
et_SetTokenOnMainThread = CoreTiming::RegisterEvent("SetToken", SetToken_OnMainThread);
et_SetFinishOnMainThread = CoreTiming::RegisterEvent("SetFinish", SetFinish_OnMainThread);
@ -169,7 +180,6 @@ void Init()
void Read16(u16& _uReturnValue, const u32 _iAddress)
{
DEBUG_LOG(PIXELENGINE, "(r16) 0x%08x", _iAddress);
CommandProcessor::ProcessFifoEvents();
switch (_iAddress & 0xFFF)
{
// CPU Direct Access EFB Raster State Config
@ -205,14 +215,45 @@ void Read16(u16& _uReturnValue, const u32 _iAddress)
INFO_LOG(PIXELENGINE, "(r16) TOKEN_REG : %04x", _uReturnValue);
break;
// The return values for these BBOX registers need to be gotten from the bounding box of the object.
// See http://code.google.com/p/dolphin-emu/issues/detail?id=360#c74 for more details.
case PE_BBOX_LEFT:
{
// Left must be even and 606px max
_uReturnValue = std::min((u16) 606, bbox[0]) & ~1;
// 0x80, 0xa0, 0x80, 0xa0 makes Paper Mario happy.
case PE_BBOX_LEFT: _uReturnValue = bbox[0]; INFO_LOG(PIXELENGINE, "R: BBOX_LEFT = %i", bbox[0]); bbox_active = false; break;
case PE_BBOX_RIGHT: _uReturnValue = bbox[1]; INFO_LOG(PIXELENGINE, "R: BBOX_RIGHT = %i", bbox[1]); bbox_active = false; break;
case PE_BBOX_TOP: _uReturnValue = bbox[2]; INFO_LOG(PIXELENGINE, "R: BBOX_TOP = %i", bbox[2]); bbox_active = false; break;
case PE_BBOX_BOTTOM: _uReturnValue = bbox[3]; INFO_LOG(PIXELENGINE, "R: BBOX_BOTTOM = %i", bbox[3]); bbox_active = false; break;
INFO_LOG(PIXELENGINE, "R: BBOX_LEFT = %i", _uReturnValue);
bbox_active = false;
break;
}
case PE_BBOX_RIGHT:
{
// Right must be odd and 607px max
_uReturnValue = std::min((u16) 607, bbox[1]) | 1;
INFO_LOG(PIXELENGINE, "R: BBOX_RIGHT = %i", _uReturnValue);
bbox_active = false;
break;
}
case PE_BBOX_TOP:
{
// Top must be even and 478px max
_uReturnValue = std::min((u16) 478, bbox[2]) & ~1;
INFO_LOG(PIXELENGINE, "R: BBOX_TOP = %i", _uReturnValue);
bbox_active = false;
break;
}
case PE_BBOX_BOTTOM:
{
// Bottom must be odd and 479px max
_uReturnValue = std::min((u16) 479, bbox[3]) | 1;
INFO_LOG(PIXELENGINE, "R: BBOX_BOTTOM = %i", _uReturnValue);
bbox_active = false;
break;
}
case PE_PERF_0L:
case PE_PERF_0H:
@ -227,6 +268,10 @@ void Read16(u16& _uReturnValue, const u32 _iAddress)
case PE_PERF_5L:
case PE_PERF_5H:
INFO_LOG(PIXELENGINE, "(r16) perf counter @ %08x", _iAddress);
// git r90a2096a24f4 (svn r3663) added the PE_PERF cases, without setting
// _uReturnValue to anything, this reverts to the previous behaviour which allows
// The timer in SMS:Scrubbing Serena Beach to countdown correctly
_uReturnValue = 1;
break;
default:
@ -281,7 +326,6 @@ void Write16(const u16 _iValue, const u32 _iAddress)
break;
case PE_TOKEN_REG:
//LOG(PIXELENGINE,"WEIRD: program wrote token: %i",_iValue);
PanicAlert("(w16) WTF? PowerPC program wrote token: %i", _iValue);
//only the gx pipeline is supposed to be able to write here
//g_token = _iValue;
@ -292,7 +336,6 @@ void Write16(const u16 _iValue, const u32 _iAddress)
break;
}
CommandProcessor::ProcessFifoEvents();
}
void Write32(const u32 _iValue, const u32 _iAddress)
@ -316,20 +359,16 @@ void UpdateInterrupts()
void UpdateTokenInterrupt(bool active)
{
if(interruptSetToken != active)
{
ProcessorInterface::SetInterrupt(INT_CAUSE_PE_TOKEN, active);
interruptSetToken = active;
}
}
void UpdateFinishInterrupt(bool active)
{
if(interruptSetFinish != active)
{
ProcessorInterface::SetInterrupt(INT_CAUSE_PE_FINISH, active);
interruptSetFinish = active;
}
if (active)
State::ProcessRequestedStates(0);
}
// TODO(mb2): Refactor SetTokenINT_OnMainThread(u64 userdata, int cyclesLate).
@ -348,8 +387,6 @@ void SetToken_OnMainThread(u64 userdata, int cyclesLate)
CommandProcessor::interruptTokenWaiting = false;
IncrementCheckContextId();
//}
//else
// LOGV(PIXELENGINE, 1, "VIDEO Backend wrote token: %i", CommandProcessor::fifo.PEToken);
}
void SetFinish_OnMainThread(u64 userdata, int cyclesLate)
@ -426,4 +463,17 @@ void ResetSetToken()
}
CommandProcessor::interruptTokenWaiting = false;
}
bool WaitingForPEInterrupt()
{
return !CommandProcessor::waitingForPEInterruptDisable && (CommandProcessor::interruptFinishWaiting || CommandProcessor::interruptTokenWaiting || interruptSetFinish || interruptSetToken);
}
void ResumeWaitingForPEInterrupt()
{
interruptSetFinish = false;
interruptSetToken = false;
CommandProcessor::interruptFinishWaiting = false;
CommandProcessor::interruptTokenWaiting = false;
}
} // end of namespace PixelEngine

View File

@ -80,7 +80,8 @@ void SetToken(const u16 _token, const int _bSetTokenAcknowledge);
void SetFinish(void);
void ResetSetFinish(void);
void ResetSetToken(void);
bool AllowIdleSkipping();
bool WaitingForPEInterrupt();
void ResumeWaitingForPEInterrupt();
// Bounding box functionality. Paper Mario (both) are a couple of the few games that use it.
extern u16 bbox[4];

View File

@ -27,129 +27,264 @@
#include "VideoConfig.h"
#include "NativeVertexFormat.h"
PIXELSHADERUID last_pixel_shader_uid;
static int AlphaPreTest();
static void StageHash(int stage, u32* out)
{
out[0] |= bpmem.combiners[stage].colorC.hex & 0xFFFFFF; // 24
u32 alphaC = bpmem.combiners[stage].alphaC.hex & 0xFFFFF0; // 24, strip out tswap and rswap for now
out[0] |= (alphaC&0xF0) << 24; // 8
out[1] |= alphaC >> 8; // 16
// reserve 3 bits for bpmem.tevorders[stage/2].getTexMap
out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 19; // 3
out[1] |= bpmem.tevorders[stage/2].getEnable(stage&1) << 22; // 1
// reserve 3 bits for bpmem.tevorders[stage/2].getColorChan
bool bHasIndStage = bpmem.tevind[stage].IsActive() && bpmem.tevind[stage].bt < bpmem.genMode.numindstages;
out[2] |= bHasIndStage << 2; // 1
bool needstexcoord = false;
if (bHasIndStage)
{
out[2] |= (bpmem.tevind[stage].hex & 0x17FFFF) << 3; // 21, TODO: needs an explanation
needstexcoord = true;
}
TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stage].colorC;
TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stage].alphaC;
if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC
|| cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC
|| cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC
|| cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC
|| ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA
|| ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
{
out[0] |= bpmem.combiners[stage].alphaC.rswap;
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap1 << 24; // 2
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap2 << 26; // 2
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap1 << 28; // 2
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap2 << 30; // 2
out[1] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&1) << 23;
out[2] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&0x6) >> 1;
}
out[3] |= bpmem.tevorders[stage/2].getEnable(stage&1);
if (bpmem.tevorders[stage/2].getEnable(stage&1))
{
if (bHasIndStage) needstexcoord = true;
out[0] |= bpmem.combiners[stage].alphaC.tswap;
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap1 << 1; // 2
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap2 << 3; // 2
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap1 << 5; // 2
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap2 << 7; // 2
out[1] |= bpmem.tevorders[stage/2].getTexMap(stage&1) << 16;
}
if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST
|| ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
{
out[3] |= bpmem.tevksel[stage/2].getKC(stage&1) << 9; // 5
out[3] |= bpmem.tevksel[stage/2].getKA(stage&1) << 14; // 5
}
if (needstexcoord)
{
out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 16;
}
}
// Mash together all the inputs that contribute to the code of a generated pixel shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
// It would likely be a lot more efficient to build this incrementally as the attributes
// are set...
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode)
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{
u32 numstages = bpmem.genMode.numtevstages + 1;
u32 projtexcoords = 0;
for (u32 i = 0; i < numstages; i++)
memset(uid->values, 0, sizeof(uid->values));
uid->values[0] |= bpmem.genMode.numtevstages; // 4
uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4
uid->values[0] |= dstAlphaMode << 8; // 2
bool DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth;
uid->values[0] |= DepthTextureEnable << 10; // 1
bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
uid->values[0] |= enablePL << 11; // 1
if (!enablePL) uid->values[0] |= xfregs.numTexGen.numTexGens << 12; // 4
u32 alphaPreTest = AlphaPreTest()+1;
uid->values[0] |= alphaPreTest << 16; // 2
if (alphaPreTest == 1 || (alphaPreTest && !DepthTextureEnable && dstAlphaMode == DSTALPHA_ALPHA_PASS))
{
if (bpmem.tevorders[i/2].getEnable(i & 1))
{
int texcoord = bpmem.tevorders[i / 2].getTexCoord(i & 1);
if (xfregs.texMtxInfo[i].projection)
projtexcoords |= 1 << texcoord;
}
}
uid->values[0] = (u32)bpmem.genMode.numtevstages |
((u32)bpmem.genMode.numindstages << 4) |
((u32)bpmem.genMode.numtexgens << 7) |
((u32)dstAlphaMode << 11) |
((u32)((bpmem.alphaFunc.hex >> 16) & 0xff) << 13) |
(projtexcoords << 21) |
((u32)bpmem.ztex2.op << 29);
// swap table
for (int i = 0; i < 8; i += 2)
((u8*)&uid->values[1])[i / 2] = (bpmem.tevksel[i].hex & 0xf) | ((bpmem.tevksel[i + 1].hex & 0xf) << 4);
u32 enableZTexture = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth ? 1 : 0;
uid->values[2] = (u32)bpmem.fog.c_proj_fsel.fsel |
((u32)bpmem.fog.c_proj_fsel.proj << 3) |
((u32)enableZTexture << 4) | ((u32)bpmem.fogRange.Base.Enabled << 5) |
((u32)bpmem.zcontrol.zcomploc << 6);
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
for (int i = 0; i < 2; ++i) {
uid->values[3 + i] = xfregs.color[i].enablelighting ?
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
uid->values[3 + i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
}
uid->values[4] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31;
int hdr = 5;
u32 *pcurvalue = &uid->values[hdr];
for (u32 i = 0; i < numstages; ++i)
{
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[i].colorC;
TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[i].alphaC;
u32 val0 = cc.hex & 0xffffff;
u32 val1 = ac.hex & 0xffffff;
val0 |= bpmem.tevksel[i / 2].getKC(i & 1) << 24;
val1 |= bpmem.tevksel[i / 2].getKA(i & 1) << 24;
pcurvalue[0] = val0;
pcurvalue[1] = val1;
pcurvalue += 2;
// Courtesy of PreAlphaTest, we're done already ;)
// NOTE: The comment header of generated shaders depends on the value of bpmem.genmode.numindstages.. shouldnt really bother about that though.
uid->num_values = 1;
return;
}
for (u32 i = 0; i < numstages / 2; ++i)
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i)
{
u32 val0, val1;
if (bpmem.tevorders[i].hex & 0x40)
val0 = bpmem.tevorders[i].hex & 0x3ff;
if (18+i < 32)
uid->values[0] |= xfregs.texMtxInfo[i].projection << (18+i); // 1
else
val0 = bpmem.tevorders[i].hex & 0x380;
if (bpmem.tevorders[i].hex & 0x40000)
val1 = (bpmem.tevorders[i].hex & 0x3ff000) >> 12;
else
val1 = (bpmem.tevorders[i].hex & 0x380000) >> 12;
switch (i % 3) {
case 0: pcurvalue[0] = val0|(val1<<10); break;
case 1: pcurvalue[0] |= val0<<20; pcurvalue[1] = val1; pcurvalue++; break;
case 2: pcurvalue[1] |= (val0<<10)|(val1<<20); pcurvalue++; break;
default: PanicAlert("Unknown case for Tev Stages / 2: %08x", (i % 3));
}
uid->values[1] |= xfregs.texMtxInfo[i].projection << (i - 14); // 1
}
if (numstages & 1) { // odd
u32 val0;
if (bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x40)
val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x3ff;
else
val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x380;
uid->values[1] = bpmem.genMode.numindstages << 2; // 3
u32 indirectStagesUsed = 0;
for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
indirectStagesUsed |= (1 << bpmem.tevind[i].bt);
switch (bpmem.genMode.numtevstages % 3)
{
case 0: pcurvalue[0] = val0; break;
case 1: pcurvalue[0] |= val0 << 20; break;
case 2: pcurvalue[1] |= val0 << 10; pcurvalue++; break;
default: PanicAlert("Unknown case for Tev Stages: %08x", bpmem.genMode.numtevstages % 3);
}
}
assert(indirectStagesUsed == (indirectStagesUsed & 0xF));
if ((bpmem.genMode.numtevstages % 3) != 2)
++pcurvalue;
uid->values[1] |= indirectStagesUsed << 5; // 4;
uid->tevstages = (u32)(pcurvalue - &uid->values[0] - hdr);
for (u32 i = 0; i < bpmem.genMode.numindstages; ++i)
for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
{
u32 val = bpmem.tevind[i].hex & 0x1fffff; // 21 bits
switch (i % 3)
if (indirectStagesUsed & (1 << i))
{
case 0: pcurvalue[0] = val; break;
case 1: pcurvalue[0] |= val << 21; pcurvalue[1] = val >> 11; ++pcurvalue; break;
case 2: pcurvalue[0] |= val << 10; ++pcurvalue; break;
default: PanicAlert("Unknown case for Ind Stages: %08x", (i % 3));
uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (9 + 3*i); // 1
if (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens)
uid->values[1] |= bpmem.tevindref.getTexCoord(i) << (10 + 3*i); // 2
}
}
// yeah, well ....
uid->indstages = (u32)(pcurvalue - &uid->values[0] - (hdr - 1) - uid->tevstages);
u32* ptr = &uid->values[2];
for (unsigned int i = 0; i < bpmem.genMode.numtevstages+1; ++i)
{
StageHash(i, ptr);
ptr += 4; // max: ptr = &uid->values[66]
}
ptr[0] |= bpmem.alphaFunc.comp0; // 3
ptr[0] |= bpmem.alphaFunc.comp1 << 3; // 3
ptr[0] |= bpmem.alphaFunc.logic << 6; // 2
if (alphaPreTest == 0 || alphaPreTest == 2)
{
ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 8; // 3
if (DepthTextureEnable)
{
ptr[0] |= bpmem.ztex2.op << 11; // 2
ptr[0] |= bpmem.zcontrol.zcomploc << 13; // 1
ptr[0] |= bpmem.zmode.testenable << 14; // 1
ptr[0] |= bpmem.zmode.updateenable << 15; // 1
}
}
if (dstAlphaMode != DSTALPHA_ALPHA_PASS)
{
if (bpmem.fog.c_proj_fsel.fsel != 0)
{
ptr[0] |= bpmem.fog.c_proj_fsel.proj << 16; // 1
ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1
}
}
++ptr;
if (enablePL)
{
ptr += GetLightingShaderId(ptr);
*ptr++ = components;
}
uid->num_values = ptr - uid->values;
}
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{
memset(uid->values, 0, sizeof(uid->values));
u32* ptr = uid->values;
*ptr++ = dstAlphaMode; // 0
*ptr++ = bpmem.genMode.hex; // 1
*ptr++ = bpmem.ztex2.hex; // 2
*ptr++ = bpmem.zcontrol.hex; // 3
*ptr++ = bpmem.zmode.hex; // 4
*ptr++ = g_ActiveConfig.bEnablePerPixelDepth; // 5
*ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 6
*ptr++ = xfregs.numTexGen.hex; // 7
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
*ptr++ = xfregs.color[0].hex;
*ptr++ = xfregs.alpha[0].hex;
*ptr++ = xfregs.color[1].hex;
*ptr++ = xfregs.alpha[1].hex;
*ptr++ = components;
}
for (unsigned int i = 0; i < 8; ++i)
*ptr++ = xfregs.texMtxInfo[i].hex; // 8-15
for (unsigned int i = 0; i < 16; ++i)
*ptr++ = bpmem.tevind[i].hex; // 16-31
*ptr++ = bpmem.tevindref.hex; // 32
for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i) // up to 16 times
{
*ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i
*ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i
*ptr++ = bpmem.tevind[i].hex; // 35+5*i
*ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i
*ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i
}
ptr = &uid->values[113];
*ptr++ = bpmem.alphaFunc.hex; // 113
*ptr++ = bpmem.fog.c_proj_fsel.hex; // 114
*ptr++ = bpmem.fogRange.Base.hex; // 115
_assert_((ptr - uid->values) == uid->GetNumValues());
}
void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components)
{
if (!g_ActiveConfig.bEnableShaderDebugging)
return;
PIXELSHADERUIDSAFE new_id;
GetSafePixelShaderId(&new_id, dstAlphaMode, components);
if (!(old_id == new_id))
{
std::string new_code(GeneratePixelShaderCode(dstAlphaMode, api, components));
if (old_code != new_code)
{
_assert_(old_id.GetNumValues() == new_id.GetNumValues());
char msg[8192];
char* ptr = msg;
ptr += sprintf(ptr, "Pixel shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n");
const int N = new_id.GetNumValues();
for (int i = 0; i < N/2; ++i)
ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1],
new_id.values[2*i], new_id.values[2*i+1]);
if (N % 2)
ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]);
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%spsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file(szTemp);
file << msg;
file << "\n\nOld shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code;
file.close();
PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp);
}
}
}
// old tev->pixelshader notes
@ -166,7 +301,6 @@ static void SampleTexture(char *&p, const char *destination, const char *texcoor
// static void WriteAlphaCompare(char *&p, int num, int comp);
static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode);
static void WriteFog(char *&p);
static int AlphaPreTest();
static const char *tevKSelTableC[] = // KCSEL
{
@ -334,12 +468,6 @@ static const char *tevRasTable[] =
"float4(0.0f, 0.0f, 0.0f, 0.0f)", // zero
};
static const char *alphaRef[2] =
{
I_ALPHA"[0].r",
I_ALPHA"[0].g"
};
//static const char *tevTexFunc[] = { "tex2D", "texRECT" };
static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
@ -354,23 +482,14 @@ static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" };
#define WRITE p+=sprintf
static const char *swapColors = "rgba";
static char swapModeTable[4][5];
static char text[16384];
static bool DepthTextureEnable;
struct RegisterState
{
bool ColorNeedOverflowControl;
bool AlphaNeedOverflowControl;
bool AuxStored;
};
static RegisterState RegisterStates[4];
static void BuildSwapModeTable()
{
static const char *swapColors = "rgba";
for (int i = 0; i < 4; i++)
{
swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1];
@ -386,14 +505,14 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
text[sizeof(text) - 1] = 0x7C; // canary
BuildSwapModeTable();
BuildSwapModeTable(); // Needed for WriteStage
int numStages = bpmem.genMode.numtevstages + 1;
int numTexgen = bpmem.genMode.numtexgens;
char *p = text;
WRITE(p, "//Pixel Shader for TEV stages\n");
WRITE(p, "//%i TEV stages, %i texgens, %i IND stages\n",
numStages, numTexgen, bpmem.genMode.numindstages);
WRITE(p, "//%i TEV stages, %i texgens, XXX IND stages\n",
numStages, numTexgen/*, bpmem.genMode.numindstages*/);
int nIndirectStagesUsed = 0;
if (bpmem.genMode.numindstages > 0)
@ -441,6 +560,10 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
WRITE(p, "uniform float4 "I_KCOLORS"[4] : register(c%d);\n", C_KCOLORS);
WRITE(p, "uniform float4 "I_ALPHA"[1] : register(c%d);\n", C_ALPHA);
WRITE(p, "uniform float4 "I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS);
if (ApiType & API_D3D9)
{
WRITE(p, "uniform float4 "I_VTEXSCALE"[4] : register(c%d);\n", C_VTEXSCALE);
}
WRITE(p, "uniform float4 "I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS);
WRITE(p, "uniform float4 "I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE);
WRITE(p, "uniform float4 "I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX);
@ -506,10 +629,6 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
{
// alpha test will always fail, so restart the shader and just make it an empty function
WRITE(p, "ocol0 = 0;\n");
if(DepthTextureEnable)
WRITE(p, "depth = 1.f;\n");
if(dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
WRITE(p, "ocol1 = 0;\n");
WRITE(p, "discard;\n");
if(ApiType != API_D3D11)
WRITE(p, "return;\n");
@ -524,10 +643,12 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
WRITE(p, " float4 c0 = "I_COLORS"[1], c1 = "I_COLORS"[2], c2 = "I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"
" float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n"
" float4 alphabump=0;\n"
" float3 tevcoord;\n"
" float2 wrappedcoord, tempcoord;\n"
" float4 cc0, cc1, cc2, cprev,crastemp,ckonsttemp;\n\n");
" float4 alphabump=float4(0.0f,0.0f,0.0f,0.0f);\n"
" float3 tevcoord=float3(0.0f, 0.0f, 0.0f);\n"
" float2 wrappedcoord=float2(0.0f,0.0f), tempcoord=float2(0.0f,0.0f);\n"
" float4 cc0=float4(0.0f,0.0f,0.0f,0.0f), cc1=float4(0.0f,0.0f,0.0f,0.0f);\n"
" float4 cc2=float4(0.0f,0.0f,0.0f,0.0f), cprev=float4(0.0f,0.0f,0.0f,0.0f);\n"
" float4 crastemp=float4(0.0f,0.0f,0.0f,0.0f),ckonsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n\n");
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
@ -593,16 +714,6 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
}
}
RegisterStates[0].AlphaNeedOverflowControl = false;
RegisterStates[0].ColorNeedOverflowControl = false;
RegisterStates[0].AuxStored = false;
for(int i = 1; i < 4; i++)
{
RegisterStates[i].AlphaNeedOverflowControl = true;
RegisterStates[i].ColorNeedOverflowControl = true;
RegisterStates[i].AuxStored = false;
}
for (int i = 0; i < numStages; i++)
WriteStage(p, i, ApiType); //build the equation for this stage
@ -610,23 +721,13 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
{
// The results of the last texenv stage are put onto the screen,
// regardless of the used destination register
if(bpmem.combiners[numStages - 1].colorC.dest != 0)
{
bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored;
WRITE(p, "prev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]);
RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl;
}
if(bpmem.combiners[numStages - 1].alphaC.dest != 0)
{
bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AuxStored;
WRITE(p, "prev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]);
RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl;
}
WRITE(p, "prev.rgb = %s;\n",tevCOutputTable[bpmem.combiners[numStages-1].colorC.dest]);
WRITE(p, "prev.a = %s;\n",tevAOutputTable[bpmem.combiners[numStages-1].alphaC.dest]);
}
// emulation of unisgned 8 overflow when casting if needed
if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
WRITE(p, "prev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
// emulation of unsigned 8 overflow when casting
WRITE(p, "prev = frac(4.0f + prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
// TODO: Why are we doing a second alpha pretest here?
if (!WriteAlphaTest(p, ApiType, dstAlphaMode))
{
// alpha test will always fail, so restart the shader and just make it an empty function
@ -741,10 +842,6 @@ static const char *TEVCMPAlphaOPTable[16] =
static void WriteStage(char *&p, int n, API_TYPE ApiType)
{
char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1);
bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
bool bHasIndStage = bpmem.tevind[n].IsActive() && bpmem.tevind[n].bt < bpmem.genMode.numindstages;
@ -753,8 +850,11 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
if (!bHasTexCoord)
texcoord = 0;
WRITE(p, "// TEV stage %d\n", n);
if (bHasIndStage)
{
WRITE(p, "// indirect op\n");
// perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
if (bpmem.tevind[n].bs != ITBA_OFF)
{
@ -781,11 +881,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
}
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
{ // s matrix
_assert_(bpmem.tevind[n].mid >= 5);
int mtxidx = 2*(bpmem.tevind[n].mid-5);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n);
}
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
{ // t matrix
_assert_(bpmem.tevind[n].mid >= 9);
int mtxidx = 2*(bpmem.tevind[n].mid-9);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n);
}
@ -824,11 +926,15 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC;
bool bCRas = cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC || cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC;
bool bARas = ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA;
if(bCRas || bARas)
// blah1
if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC
|| cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC
|| cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC
|| cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC
|| ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA
|| ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
{
char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
WRITE(p, "rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap);
WRITE(p, "crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n");
}
@ -836,7 +942,6 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
if (bpmem.tevorders[n/2].getEnable(n&1))
{
int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
if(!bHasIndStage)
{
// calc tevcord
@ -846,20 +951,20 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
WRITE(p, "tevcoord.xy = float2(0.0f, 0.0f);\n");
}
char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
SampleTexture(p, "textemp", "tevcoord", texswap, texmap, ApiType);
}
else
WRITE(p, "textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
int kc = bpmem.tevksel[n / 2].getKC(n & 1);
int ka = bpmem.tevksel[n / 2].getKA(n & 1);
bool bCKonst = cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST;
bool bAKonst = ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST;
if (bCKonst || bAKonst )
// blah2
if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST
|| ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
{
int kc = bpmem.tevksel[n / 2].getKC(n & 1);
int ka = bpmem.tevksel[n / 2].getKA(n & 1);
WRITE(p, "konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]);
if(kc > 7 || ka > 7)
{
@ -871,100 +976,35 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
}
}
if(cc.a == TEVCOLORARG_CPREV
|| cc.a == TEVCOLORARG_APREV
|| cc.b == TEVCOLORARG_CPREV
|| cc.b == TEVCOLORARG_APREV
|| cc.c == TEVCOLORARG_CPREV
|| cc.c == TEVCOLORARG_APREV
|| ac.a == TEVALPHAARG_APREV
|| ac.b == TEVALPHAARG_APREV
|| ac.c == TEVALPHAARG_APREV)
{
if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
{
WRITE(p, "cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[0].AlphaNeedOverflowControl = false;
RegisterStates[0].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cprev = prev;\n");
}
RegisterStates[0].AuxStored = true;
}
if(cc.a == TEVCOLORARG_CPREV || cc.a == TEVCOLORARG_APREV
|| cc.b == TEVCOLORARG_CPREV || cc.b == TEVCOLORARG_APREV
|| cc.c == TEVCOLORARG_CPREV || cc.c == TEVCOLORARG_APREV
|| ac.a == TEVALPHAARG_APREV || ac.b == TEVALPHAARG_APREV || ac.c == TEVALPHAARG_APREV)
WRITE(p, "cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
if(cc.a == TEVCOLORARG_C0
|| cc.a == TEVCOLORARG_A0
|| cc.b == TEVCOLORARG_C0
|| cc.b == TEVCOLORARG_A0
|| cc.c == TEVCOLORARG_C0
|| cc.c == TEVCOLORARG_A0
|| ac.a == TEVALPHAARG_A0
|| ac.b == TEVALPHAARG_A0
|| ac.c == TEVALPHAARG_A0)
{
if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl)
{
WRITE(p, "cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[1].AlphaNeedOverflowControl = false;
RegisterStates[1].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cc0 = c0;\n");
}
RegisterStates[1].AuxStored = true;
}
if(cc.a == TEVCOLORARG_C1
|| cc.a == TEVCOLORARG_A1
|| cc.b == TEVCOLORARG_C1
|| cc.b == TEVCOLORARG_A1
|| cc.c == TEVCOLORARG_C1
|| cc.c == TEVCOLORARG_A1
|| ac.a == TEVALPHAARG_A1
|| ac.b == TEVALPHAARG_A1
|| ac.c == TEVALPHAARG_A1)
{
if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl)
{
WRITE(p, "cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[2].AlphaNeedOverflowControl = false;
RegisterStates[2].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cc1 = c1;\n");
}
RegisterStates[2].AuxStored = true;
}
if(cc.a == TEVCOLORARG_C0 || cc.a == TEVCOLORARG_A0
|| cc.b == TEVCOLORARG_C0 || cc.b == TEVCOLORARG_A0
|| cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0
|| ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || ac.c == TEVALPHAARG_A0)
WRITE(p, "cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
if(cc.a == TEVCOLORARG_C2
|| cc.a == TEVCOLORARG_A2
|| cc.b == TEVCOLORARG_C2
|| cc.b == TEVCOLORARG_A2
|| cc.c == TEVCOLORARG_C2
|| cc.c == TEVCOLORARG_A2
|| ac.a == TEVALPHAARG_A2
|| ac.b == TEVALPHAARG_A2
|| ac.c == TEVALPHAARG_A2)
{
if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl)
{
if(cc.a == TEVCOLORARG_C1 || cc.a == TEVCOLORARG_A1
|| cc.b == TEVCOLORARG_C1 || cc.b == TEVCOLORARG_A1
|| cc.c == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_A1
|| ac.a == TEVALPHAARG_A1 || ac.b == TEVALPHAARG_A1 || ac.c == TEVALPHAARG_A1)
WRITE(p, "cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
if(cc.a == TEVCOLORARG_C2 || cc.a == TEVCOLORARG_A2
|| cc.b == TEVCOLORARG_C2 || cc.b == TEVCOLORARG_A2
|| cc.c == TEVCOLORARG_C2 || cc.c == TEVCOLORARG_A2
|| ac.a == TEVALPHAARG_A2 || ac.b == TEVALPHAARG_A2 || ac.c == TEVALPHAARG_A2)
WRITE(p, "cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[3].AlphaNeedOverflowControl = false;
RegisterStates[3].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cc2 = c2;\n");
}
RegisterStates[3].AuxStored = true;
}
RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0);
RegisterStates[cc.dest].AuxStored = false;
WRITE(p, "// color combine\n");
if (cc.clamp)
WRITE(p, "%s = saturate(", tevCOutputTable[cc.dest]);
else
@ -995,7 +1035,7 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
WRITE(p, "%s", tevBiasTable[cc.bias]);
if (cc.shift > 0)
if (cc.shift > TEVSCALE_1)
WRITE(p, ")");
}
else
@ -1011,8 +1051,7 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
WRITE(p, ")");
WRITE(p,";\n");
RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0);
RegisterStates[ac.dest].AuxStored = false;
WRITE(p, "// alpha combine\n");
// combine the alpha channel
if (ac.clamp)
WRITE(p, "%s = saturate(", tevAOutputTable[ac.dest]);
@ -1058,14 +1097,21 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
if (ac.clamp)
WRITE(p, ")");
WRITE(p, ";\n\n");
WRITE(p, "// TEV done\n");
}
void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType)
{
if (ApiType == API_D3D11)
WRITE(p, "%s=Tex%d.Sample(samp%d,%s.xy * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap);
WRITE(p, "%s=Tex%d.Sample(samp%d, %s.xy * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap);
else if (ApiType & API_D3D9)
{
// D3D9 uses different pixel to texel mapping, so we need to offset our sampling address by half a pixel (assuming native and virtual texture dimensions match each other, otherwise some math is involved).
// Read the MSDN article "Directly Mapping Texels to Pixels (Direct3D 9)" for further info.
WRITE(p, "%s=tex2D(samp%d, (%s.xy + 0.5f*"I_VTEXSCALE"[%d].%s) * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap, texcoords, texmap/2, (texmap&1)?"zw":"xy", texmap, texswap);
}
else
WRITE(p, "%s=tex2D(samp%d,%s.xy * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap, texcoords, texmap, texswap);
WRITE(p, "%s=tex2D(samp%d, %s.xy * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap, texcoords, texmap, texswap);
}
static const char *tevAlphaFuncsTable[] =
@ -1123,6 +1169,11 @@ static int AlphaPreTest()
static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode)
{
static const char *alphaRef[2] =
{
I_ALPHA"[0].r",
I_ALPHA"[0].g"
};
int Pretest = AlphaPreTest();
if(Pretest >= 0)

View File

@ -24,64 +24,63 @@
#define I_KCOLORS "k"
#define I_ALPHA "alphaRef"
#define I_TEXDIMS "texdim"
#define I_VTEXSCALE "vtexscale"
#define I_ZBIAS "czbias"
#define I_INDTEXSCALE "cindscale"
#define I_INDTEXMTX "cindmtx"
#define I_FOG "cfog"
#define I_PLIGHTS "cLights"
#define I_PMATERIALS "cmtrl"
#define I_PMATERIALS "cmtrl"
#define C_COLORMATRIX 0 // 0
#define C_COLORS 0 // 0
#define C_KCOLORS (C_COLORS + 4) // 4
#define C_ALPHA (C_KCOLORS + 4) // 8
#define C_TEXDIMS (C_ALPHA + 1) // 9
#define C_ZBIAS (C_TEXDIMS + 8) //17
#define C_INDTEXSCALE (C_ZBIAS + 2) //19
#define C_INDTEXMTX (C_INDTEXSCALE + 2) //21
#define C_FOG (C_INDTEXMTX + 6) //27
#define C_PLIGHTS (C_FOG + 3)
#define C_PMATERIALS (C_PLIGHTS + 40)
#define C_PENVCONST_END (C_PMATERIALS + 4)
#define PIXELSHADERUID_MAX_VALUES (5 + 32 + 6 + 11 + 2)
#define C_VTEXSCALE (C_TEXDIMS + 8) //17 - virtual texture scaling factor (e.g. custom textures, scaled EFB copies)
#define C_ZBIAS (C_VTEXSCALE + 4) //21
#define C_INDTEXSCALE (C_ZBIAS + 2) //23
#define C_INDTEXMTX (C_INDTEXSCALE + 2) //25
#define C_FOG (C_INDTEXMTX + 6) //31
#define C_PLIGHTS (C_FOG + 3) //34
#define C_PMATERIALS (C_PLIGHTS + 40) //74
#define C_PENVCONST_END (C_PMATERIALS + 4) //78
#define PIXELSHADERUID_MAX_VALUES 70
#define PIXELSHADERUID_MAX_VALUES_SAFE 120
// DO NOT make anything in this class virtual.
class PIXELSHADERUID
template<bool safe>
class _PIXELSHADERUID
{
public:
u32 values[PIXELSHADERUID_MAX_VALUES];
u16 tevstages, indstages;
u32 values[safe ? PIXELSHADERUID_MAX_VALUES_SAFE : PIXELSHADERUID_MAX_VALUES];
int num_values;
PIXELSHADERUID()
_PIXELSHADERUID()
{
memset(values, 0, PIXELSHADERUID_MAX_VALUES * 4);
tevstages = indstages = 0;
}
PIXELSHADERUID(const PIXELSHADERUID& r)
_PIXELSHADERUID(const _PIXELSHADERUID& r)
{
tevstages = r.tevstages;
indstages = r.indstages;
int N = GetNumValues();
_assert_(N <= PIXELSHADERUID_MAX_VALUES);
for (int i = 0; i < N; ++i)
values[i] = r.values[i];
num_values = r.num_values;
if (safe) memcpy(values, r.values, PIXELSHADERUID_MAX_VALUES_SAFE);
else memcpy(values, r.values, r.GetNumValues() * sizeof(values[0]));
}
int GetNumValues() const
{
return tevstages + indstages + 4;
if (safe) return (sizeof(values) / sizeof(u32));
else return num_values;
}
bool operator <(const PIXELSHADERUID& _Right) const
bool operator <(const _PIXELSHADERUID& _Right) const
{
if (values[0] < _Right.values[0])
return true;
else if (values[0] > _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i)
if (N < _Right.GetNumValues())
return true;
else if (N > _Right.GetNumValues())
return false;
for (int i = 0; i < N; ++i)
{
if (values[i] < _Right.values[i])
return true;
@ -91,12 +90,12 @@ public:
return false;
}
bool operator ==(const PIXELSHADERUID& _Right) const
bool operator ==(const _PIXELSHADERUID& _Right) const
{
if (values[0] != _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i)
if (N != _Right.GetNumValues())
return false;
for (int i = 0; i < N; ++i)
{
if (values[i] != _Right.values[i])
return false;
@ -104,6 +103,8 @@ public:
return true;
}
};
typedef _PIXELSHADERUID<false> PIXELSHADERUID;
typedef _PIXELSHADERUID<true> PIXELSHADERUIDSAFE;
// Different ways to achieve rendering with destination alpha
enum DSTALPHA_MODE
@ -114,8 +115,11 @@ enum DSTALPHA_MODE
};
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode);
extern PIXELSHADERUID last_pixel_shader_uid;
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
// Used to make sure that our optimized pixel shader IDs don't lose any possible shader code changes
void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components);
#endif // GCOGL_PIXELSHADER_H

View File

@ -36,9 +36,11 @@ static bool s_bFogRangeAdjustChanged;
static int nLightsChanged[2]; // min,max
static float lastRGBAfull[2][4][4];
static u8 s_nTexDimsChanged;
static u8 s_nVirtualTexScalesChanged;
static u8 s_nIndTexScaleChanged;
static u32 lastAlpha;
static u32 lastTexDims[8]; // width | height << 16 | wrap_s << 28 | wrap_t << 30
static float lastVirtualTexScales[16]; // even fields: width ratio; odd fields: height ratio
static u32 lastZBias;
static int nMaterialsChanged;
@ -61,6 +63,7 @@ void PixelShaderManager::Init()
{
lastAlpha = 0;
memset(lastTexDims, 0, sizeof(lastTexDims));
memset(lastVirtualTexScales, 0, sizeof(lastVirtualTexScales));
lastZBias = 0;
memset(lastRGBAfull, 0, sizeof(lastRGBAfull));
Dirty();
@ -70,6 +73,7 @@ void PixelShaderManager::Dirty()
{
s_nColorsChanged[0] = s_nColorsChanged[1] = 15;
s_nTexDimsChanged = 0xFF;
s_nVirtualTexScalesChanged = 0xFF;
s_nIndTexScaleChanged = 0xFF;
s_nIndTexMtxChanged = 15;
s_bAlphaChanged = s_bZBiasChanged = s_bZTextureTypeChanged = s_bDepthRangeChanged = true;
@ -83,7 +87,7 @@ void PixelShaderManager::Shutdown()
}
void PixelShaderManager::SetConstants()
void PixelShaderManager::SetConstants(API_TYPE api_type)
{
for (int i = 0; i < 2; ++i)
{
@ -109,6 +113,16 @@ void PixelShaderManager::SetConstants()
s_nTexDimsChanged = 0;
}
if ((api_type & API_D3D9) && s_nVirtualTexScalesChanged)
{
for (int i = 0; i < 8; i += 2)
{
if (s_nVirtualTexScalesChanged & (3<<i))
SetPSVirtualTexScalePair(i/2);
}
s_nVirtualTexScalesChanged = 0;
}
if (s_bAlphaChanged)
{
SetPSConstant4f(C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f);
@ -338,6 +352,13 @@ void PixelShaderManager::SetPSTextureDims(int texid)
SetPSConstant4fv(C_TEXDIMS + texid, fdims);
}
void PixelShaderManager::SetPSVirtualTexScalePair(int texpairid)
{
PRIM_LOG("vtexscale%d: %f %f %f %f\n", texpairid, lastVirtualTexScales[texpairid*4], lastVirtualTexScales[texpairid*4+1],
lastVirtualTexScales[texpairid*4+2], lastVirtualTexScales[texpairid*4+3]);
SetPSConstant4fv(C_VTEXSCALE + texpairid, &lastVirtualTexScales[texpairid*4]);
}
// This one is high in profiles (0.5%). TODO: Move conversion out, only store the raw color value
// and update it when the shader constant is set, only.
void PixelShaderManager::SetColorChanged(int type, int num, bool high)
@ -376,14 +397,25 @@ void PixelShaderManager::SetDestAlpha(const ConstantAlpha& alpha)
}
}
void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt)
void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height, u32 virtual_width, u32 virtual_height, u32 wraps, u32 wrapt, API_TYPE api_type)
{
u32 wh = width | (height << 16) | (wraps << 28) | (wrapt << 30);
if (lastTexDims[texmapid] != wh)
bool refresh = lastTexDims[texmapid] != wh;
if (api_type & API_D3D9)
{
lastTexDims[texmapid] = wh;
refresh |= (lastVirtualTexScales[texmapid*2] != (float)width / (float)virtual_width);
refresh |= (lastVirtualTexScales[texmapid*2+1] != (float)height / (float)virtual_height);
}
if (refresh)
{
lastTexDims[texmapid] = wh;
lastVirtualTexScales[texmapid*2] = (float)width / (float)virtual_width;
lastVirtualTexScales[texmapid*2+1] = (float)height / (float)virtual_height;
s_nTexDimsChanged |= 1 << texmapid;
}
s_nVirtualTexScalesChanged |= 1 << texmapid;
}
}
void PixelShaderManager::SetZTextureBias(u32 bias)

View File

@ -26,18 +26,20 @@
class PixelShaderManager
{
static void SetPSTextureDims(int texid);
static void SetPSVirtualTexScalePair(int texpairid);
public:
static void Init();
static void Dirty();
static void Shutdown();
static void SetConstants(); // sets pixel shader constants
static void SetConstants(API_TYPE api_type); // sets pixel shader constants
// constant management, should be called after memory is committed
static void SetColorChanged(int type, int index, bool high);
static void SetAlpha(const AlphaFunc& alpha);
static void SetDestAlpha(const ConstantAlpha& alpha);
static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt);
static void SetTexDims(int texmapid, u32 width, u32 height, u32 virtual_width, u32 virtual_height, u32 wraps, u32 wrapt, API_TYPE api_type);
static void SetZTextureBias(u32 bias);
static void SetViewportChanged();
static void SetIndMatrixChanged(int matrixidx);

View File

@ -42,6 +42,7 @@
#include "Host.h"
#include "XFMemory.h"
#include "FifoPlayer/FifoRecorder.h"
#include "AVIDump.h"
#include <cmath>
#include <string>
@ -52,7 +53,6 @@ int OSDChoice, OSDTime;
Renderer *g_renderer = NULL;
bool s_bLastFrameDumped = false;
std::mutex Renderer::s_criticalScreenshot;
std::string Renderer::s_sScreenshotName;
@ -81,15 +81,28 @@ bool Renderer::s_EnableDLCachingAfterRecording;
unsigned int Renderer::prev_efb_format = (unsigned int)-1;
Renderer::Renderer()
Renderer::Renderer() : frame_data(NULL), bLastFrameDumped(false)
{
UpdateActiveConfig();
#if defined _WIN32 || defined HAVE_LIBAV
bAVIDumping = false;
#endif
}
Renderer::~Renderer()
{
// invalidate previous efb format
prev_efb_format = (unsigned int)-1;
#if defined _WIN32 || defined HAVE_LIBAV
if (g_ActiveConfig.bDumpFrames && bLastFrameDumped && bAVIDumping)
AVIDump::Stop();
#else
if (pFrameDump.IsOpen())
pFrameDump.Close();
#endif
delete[] frame_data;
}
void Renderer::RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma)

View File

@ -41,7 +41,7 @@
extern int frameCount;
extern int OSDChoice, OSDTime;
extern bool s_bLastFrameDumped;
extern bool bLastFrameDumped;
// Renderer really isn't a very good name for this class - it's more like "Misc".
// The long term goal is to get rid of this class and replace it with others that make
@ -54,7 +54,7 @@ public:
virtual void SetColorMask() = 0;
virtual void SetBlendMode(bool forceUpdate) = 0;
virtual bool SetScissorRect() = 0;
virtual void SetScissorRect(const TargetRectangle& rc) = 0;
virtual void SetGenerationMode() = 0;
virtual void SetDepthMode() = 0;
virtual void SetLogicOpMode() = 0;
@ -132,9 +132,6 @@ public:
protected:
static std::mutex s_criticalScreenshot;
static std::string s_sScreenshotName;
static void CalculateTargetScale(int x, int y, int &scaledX, int &scaledY);
static bool CalculateTargetSize(int multiplier = 1);
static void CalculateXYScale(const TargetRectangle& dst_rect);
@ -143,6 +140,16 @@ protected:
static void RecordVideoMemory();
static volatile bool s_bScreenshot;
static std::mutex s_criticalScreenshot;
static std::string s_sScreenshotName;
#if defined _WIN32 || defined HAVE_LIBAV
bool bAVIDumping;
#else
File::IOFile pFrameDump;
#endif
char* frame_data;
bool bLastFrameDumped;
// The framebuffer size
static int s_target_width;
@ -175,16 +182,4 @@ extern Renderer *g_renderer;
void UpdateViewport(Matrix44& vpCorrection);
template <typename R>
void GetScissorRect(MathUtil::Rectangle<R> &rect)
{
const int xoff = bpmem.scissorOffset.x * 2 - 342;
const int yoff = bpmem.scissorOffset.y * 2 - 342;
rect.left = (R)(bpmem.scissorTL.x - xoff - 342);
rect.top = (R)(bpmem.scissorTL.y - yoff - 342);
rect.right = (R)(bpmem.scissorBR.x - xoff - 341);
rect.bottom = (R)(bpmem.scissorBR.y - yoff - 341);
}
#endif // _COMMON_RENDERBASE_H_

View File

@ -1,3 +1,19 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "MemoryUtil.h"
@ -23,28 +39,19 @@ enum
TextureCache *g_texture_cache;
GC_ALIGNED16(u8 *TextureCache::temp) = NULL;
GC_ALIGNED16(u8 *TextureCache::temp) = NULL;
TextureCache::TexCache TextureCache::textures;
bool TextureCache::DeferredInvalidate;
TextureCache::TCacheEntryBase::~TCacheEntryBase()
{
if (0 == addr)
return;
if (!isRenderTarget && !g_ActiveConfig.bSafeTextureCache)
{
u32 *const ptr = (u32*)Memory::GetPointer(addr);
if (ptr && *ptr == hash)
*ptr = oldpixel;
}
}
TextureCache::TextureCache()
{
if (!temp)
temp =(u8*) AllocateAlignedMemory(TEMP_SIZE,16);
temp = (u8*)AllocateAlignedMemory(TEMP_SIZE,16);
TexDecoder_SetTexFmtOverlayOptions(g_ActiveConfig.bTexFmtOverlayEnable, g_ActiveConfig.bTexFmtOverlayCenter);
if(g_ActiveConfig.bHiresTextures && !g_ActiveConfig.bDumpTextures)
HiresTextures::Init(SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
@ -88,12 +95,11 @@ TextureCache::~TextureCache()
void TextureCache::Cleanup()
{
TexCache::iterator
iter = textures.begin(),
tcend = textures.end();
TexCache::iterator iter = textures.begin();
TexCache::iterator tcend = textures.end();
while (iter != tcend)
{
if (frameCount > TEXTURE_KILL_THRESHOLD + iter->second->frameCount)
if (frameCount > TEXTURE_KILL_THRESHOLD + iter->second->frameCount) // TODO: Deleting EFB copies might not be a good idea here...
{
delete iter->second;
textures.erase(iter++);
@ -135,7 +141,7 @@ void TextureCache::MakeRangeDynamic(u32 start_address, u32 size)
const int rangePosition = iter->second->IntersectsMemoryRange(start_address, size);
if (0 == rangePosition)
{
iter->second->hash = 0;
iter->second->SetHashes(TEXHASH_INVALID);
}
}
}
@ -166,20 +172,19 @@ void TextureCache::ClearRenderTargets()
TexCache::iterator
iter = textures.begin(),
tcend = textures.end();
for (; iter!=tcend; ++iter)
iter->second->isRenderTarget = false;
if (iter->second->type != TCET_EC_DYNAMIC)
iter->second->type = TCET_NORMAL;
}
TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
u32 address, unsigned int width, unsigned int height, int texformat,
unsigned int tlutaddr, int tlutfmt, bool UseNativeMips, unsigned int maxlevel)
unsigned int tlutaddr, int tlutfmt, bool UseNativeMips, unsigned int maxlevel, bool from_tmem)
{
// necessary?
if (0 == address)
return NULL;
u8* ptr = Memory::GetPointer(address);
// TexelSizeInNibbles(format)*width*height/16;
const unsigned int bsw = TexDecoder_GetBlockWidthInTexels(texformat) - 1;
const unsigned int bsh = TexDecoder_GetBlockHeightInTexels(texformat) - 1;
@ -188,111 +193,85 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
unsigned int expandedHeight = (height + bsh) & (~bsh);
const unsigned int nativeW = width;
const unsigned int nativeH = height;
bool isPow2;
u64 hash_value = 0;
u64 texHash = 0;
u32 texID = address;
u64 tex_hash = TEXHASH_INVALID; // Hash assigned to texcache entry (also used to generate filenames used for texture dumping and custom texture lookup)
u64 tlut_hash = TEXHASH_INVALID;
u32 full_format = texformat;
const u32 texture_size = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
const u32 palette_size = TexDecoder_GetPaletteSize(texformat);
bool texture_is_dynamic = false;
unsigned int texLevels;
PC_TexFormat pcfmt = PC_TEX_FMT_NONE;
const bool isPaletteTexture = (texformat == GX_TF_C4 || texformat == GX_TF_C8 || texformat == GX_TF_C14X2);
if (isPaletteTexture)
full_format = texformat | (tlutfmt << 16);
// hires texture loading and texture dumping require accurate hashes
if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures)
const u32 texture_size = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
u8* src_data;
if (from_tmem) src_data = &texMem[bpmem.tex[stage/4].texImage1[stage%4].tmem_even * TMEM_LINE_SIZE];
else src_data = Memory::GetPointer(address);
tex_hash = GetHash64(src_data, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
if (isPaletteTexture)
{
texHash = GetHash64(ptr, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
const u32 palette_size = TexDecoder_GetPaletteSize(texformat);
tlut_hash = GetHash64(&texMem[tlutaddr], palette_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
if (isPaletteTexture)
{
// WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up)
// tlut size can be up to 32768B (GX_TF_C14X2) but Safer == Slower.
// This trick (to change the texID depending on the TLUT addr) is a trick to get around
// an issue with metroid prime's fonts, where it has multiple sets of fonts on top of
// each other stored in a single texture, and uses the palette to make different characters
// visible or invisible. Thus, unless we want to recreate the textures for every drawn character,
// we must make sure that texture with different tluts get different IDs.
const u64 tlutHash = GetHash64(texMem + tlutaddr, palette_size,
g_ActiveConfig.iSafeTextureCache_ColorSamples);
texHash ^= tlutHash;
if (g_ActiveConfig.bSafeTextureCache)
texID ^= ((u32)tlutHash) ^ (u32)(tlutHash >> 32);
}
if (g_ActiveConfig.bSafeTextureCache)
hash_value = texHash;
// NOTE: For non-paletted textures, texID is equal to the texture address.
// A paletted texture, however, may have multiple texIDs assigned though depending on the currently used tlut.
// This (changing texID depending on the tlut_hash) is a trick to get around
// an issue with Metroid Prime's fonts (it has multiple sets of fonts on each other
// stored in a single texture and uses the palette to make different characters
// visible or invisible. Thus, unless we want to recreate the textures for every drawn character,
// we must make sure that a paletted texture gets assigned multiple IDs for each tlut used.
//
// TODO: Because texID isn't always the same as the address now, CopyRenderTargetToTexture might be broken now
texID ^= ((u32)tlut_hash) ^(u32)(tlut_hash >> 32);
tex_hash ^= tlut_hash;
}
TCacheEntryBase *entry = textures[texID];
if (entry)
{
if (!g_ActiveConfig.bSafeTextureCache)
{
if (entry->isRenderTarget || entry->isDynamic)
{
if (!g_ActiveConfig.bCopyEFBToTexture)
{
hash_value = GetHash64(ptr, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
// 1. Calculate reference hash:
// calculated from RAM texture data for normal textures. Hashes for paletted textures are modified by tlut_hash. 0 for virtual EFB copies.
if (g_ActiveConfig.bCopyEFBToTexture && entry->IsEfbCopy())
tex_hash = TEXHASH_INVALID;
if (isPaletteTexture)
{
hash_value ^= GetHash64(&texMem[tlutaddr], palette_size,
g_ActiveConfig.iSafeTextureCache_ColorSamples);
}
}
else
{
hash_value = 0;
}
}
else
{
hash_value = *(u32*)ptr;
}
}
else if ((entry->isRenderTarget || entry->isDynamic) && g_ActiveConfig.bCopyEFBToTexture)
// 2. a) For EFB copies, only the hash and the texture address need to match
if (entry->IsEfbCopy() && tex_hash == entry->hash && address == entry->addr)
{
hash_value = 0;
}
if (entry->type != TCET_EC_VRAM)
entry->type = TCET_NORMAL;
if (((entry->isRenderTarget || entry->isDynamic) && hash_value == entry->hash && address == entry->addr)
|| ((address == entry->addr) && (hash_value == entry->hash) && full_format == entry->format && entry->mipLevels == maxlevel))
{
entry->isDynamic = false;
// TODO: Print a warning if the format changes! In this case, we could reinterpret the internal texture object data to the new pixel format (similiar to what is already being done in Renderer::ReinterpretPixelFormat())
goto return_entry;
}
// 2. b) For normal textures, all texture parameters need to match
if (address == entry->addr && tex_hash == entry->hash && full_format == entry->format &&
entry->num_mipmaps == maxlevel && entry->native_width == nativeW && entry->native_height == nativeH)
{
goto return_entry;
}
// 3. If we reach this line, we'll have to upload the new texture data to VRAM.
// If we're lucky, the texture parameters didn't change and we can reuse the internal texture object instead of destroying and recreating it.
//
// TODO: Don't we need to force texture decoding to RGBA8 for dynamic EFB copies?
// TODO: Actually, it should be enough if the internal texture format matches...
if ((entry->type == TCET_NORMAL && width == entry->native_width && height == entry->native_height && full_format == entry->format && entry->num_mipmaps == maxlevel)
|| (entry->type == TCET_EC_DYNAMIC && entry->native_width == width && entry->native_height == height))
{
// reuse the texture
}
else
{
// Let's reload the new texture data into the same texture,
// instead of destroying it and having to create a new one.
// Might speed up movie playback very, very slightly.
texture_is_dynamic = (entry->isRenderTarget || entry->isDynamic) && !g_ActiveConfig.bCopyEFBToTexture;
if (!entry->isRenderTarget &&
((!entry->isDynamic && width == entry->realW && height == entry->realH && full_format == entry->format && entry->mipLevels == maxlevel)
|| (entry->isDynamic && entry->realW == width && entry->realH == height)))
{
// reuse the texture
}
else
{
// delete the texture and make a new one
delete entry;
entry = NULL;
}
// delete the texture and make a new one
delete entry;
entry = NULL;
}
}
if (g_ActiveConfig.bHiresTextures)
{
// Load Custom textures
@ -301,23 +280,24 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
unsigned int newWidth = width;
unsigned int newHeight = height;
sprintf(texPathTemp, "%s_%08x_%i", SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str(), (u32) (texHash & 0x00000000FFFFFFFFLL), texformat);
sprintf(texPathTemp, "%s_%08x_%i", SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str(), (u32) (tex_hash & 0x00000000FFFFFFFFLL), texformat);
pcfmt = HiresTextures::GetHiresTex(texPathTemp, &newWidth, &newHeight, texformat, temp);
if (pcfmt != PC_TEX_FMT_NONE)
{
expandedWidth = width = newWidth;
expandedHeight = height = newHeight;
// TODO: shouldn't generating mips be forced on now?
// native mips with a custom texture wouldn't make sense
}
}
// TODO: RGBA8 textures are stored non-continuously in tmem, that might cause problems when preloading is enabled
if (pcfmt == PC_TEX_FMT_NONE)
pcfmt = TexDecoder_Decode(temp, ptr, expandedWidth,
expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures);
pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth,
expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures);
bool isPow2;
unsigned int texLevels;
UseNativeMips = UseNativeMips && (width == nativeW && height == nativeH); // Only load native mips if their dimensions fit to our virtual texture dimensions
isPow2 = !((width & (width - 1)) || (height & (height - 1)));
texLevels = (isPow2 && UseNativeMips && maxlevel) ?
GetPow2(std::max(width, height)) : !isPow2;
@ -329,40 +309,27 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
if (NULL == entry) {
textures[texID] = entry = g_texture_cache->CreateTexture(width, height, expandedWidth, texLevels, pcfmt);
// Sometimes, we can get around recreating a texture if only the number of mip levels gets changes
// Sometimes, we can get around recreating a texture if only the number of mip levels changes
// e.g. if our texture cache entry got too many mipmap levels we can limit the number of used levels by setting the appropriate render states
// Thus, we don't update this member for every Load, but just whenever the texture gets recreated
entry->mipLevels = maxlevel;
//
// TODO: Won't we end up recreating textures all the time because maxlevel doesn't necessarily equal texLevels?
entry->num_mipmaps = maxlevel; // TODO: Does this actually work? We can't really adjust mipmap settings per-stage...
entry->type = TCET_NORMAL;
GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true);
}
entry->addr = address;
entry->format = full_format;
entry->size_in_bytes = texture_size;
entry->virtualW = width;
entry->virtualH = height;
entry->realW = nativeW;
entry->realH = nativeH;
entry->isRenderTarget = false;
entry->isNonPow2 = false;
entry->isDynamic = texture_is_dynamic;
entry->oldpixel = *(u32*)ptr;
if (g_ActiveConfig.bSafeTextureCache || entry->isDynamic)
entry->hash = hash_value;
else
// don't like rand() here
entry->hash = *(u32*)ptr = (u32)(((double)rand() / RAND_MAX) * 0xFFFFFFFF);
entry->SetGeneralParameters(address, texture_size, full_format, entry->num_mipmaps);
entry->SetDimensions(nativeW, nativeH, width, height);
entry->hash = tex_hash;
if (entry->IsEfbCopy() && !g_ActiveConfig.bCopyEFBToTexture) entry->type = TCET_EC_DYNAMIC;
else entry->type = TCET_NORMAL;
// load texture
entry->Load(width, height, expandedWidth, 0, (texLevels == 0));
// load mips
// load mips - TODO: Loading mipmaps from tmem is untested!
if (texLevels > 1 && pcfmt != PC_TEX_FMT_NONE)
{
const unsigned int bsdepth = TexDecoder_GetTexelSizeInNibbles(texformat);
@ -370,20 +337,31 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
unsigned int level = 1;
unsigned int mipWidth = (width + 1) >> 1;
unsigned int mipHeight = (height + 1) >> 1;
ptr += texture_size;
u8* ptr_even = NULL, *ptr_odd = NULL;
if (from_tmem)
{
ptr_even = &texMem[bpmem.tex[stage/4].texImage1[stage%4].tmem_even * TMEM_LINE_SIZE + texture_size];
ptr_odd = &texMem[bpmem.tex[stage/4].texImage2[stage%4].tmem_odd * TMEM_LINE_SIZE];
}
src_data += texture_size;
while ((mipHeight || mipWidth) && (level < texLevels))
{
u8** ptr;
if (from_tmem) ptr = (level % 2) ? &ptr_odd : &ptr_even;
else ptr = &src_data;
const unsigned int currentWidth = (mipWidth > 0) ? mipWidth : 1;
const unsigned int currentHeight = (mipHeight > 0) ? mipHeight : 1;
expandedWidth = (currentWidth + bsw) & (~bsw);
expandedHeight = (currentHeight + bsh) & (~bsh);
TexDecoder_Decode(temp, ptr, expandedWidth, expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures);
entry->Load(currentWidth, currentHeight, expandedWidth, level);
TexDecoder_Decode(temp, *ptr, expandedWidth, expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures);
entry->Load(currentWidth, currentHeight, expandedWidth, level, false);
ptr += ((std::max(mipWidth, bsw) * std::max(mipHeight, bsh) * bsdepth) >> 1);
*ptr += ((std::max(mipWidth, bsw) * std::max(mipHeight, bsh) * bsdepth) >> 1);
mipWidth >>= 1;
mipHeight >>= 1;
++level;
@ -404,7 +382,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
sprintf(szTemp, "%s/%s_%08x_%i.png", szDir.c_str(),
SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str(),
(u32) (texHash & 0x00000000FFFFFFFFLL), texformat);
(u32) (tex_hash & 0x00000000FFFFFFFFLL), texformat);
if (false == File::Exists(szTemp))
entry->Save(szTemp);
@ -426,10 +404,49 @@ return_entry:
void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, unsigned int srcFormat,
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf)
{
// Emulation methods:
// - EFB to RAM:
// Encodes the requested EFB data at its native resolution to the emulated RAM using shaders.
// Load() decodes the data from there again (using TextureDecoder) if the EFB copy is being used as a texture again.
// Advantage: CPU can read data from the EFB copy and we don't lose any important updates to the texture
// Disadvantage: Encoding+decoding steps often are redundant because only some games read or modify EFB copies before using them as textures.
// - EFB to texture:
// Copies the requested EFB data to a texture object in VRAM, performing any color conversion using shaders.
// Advantage: Works for many games, since in most cases EFB copies aren't read or modified at all before being used as a texture again.
// Since we don't do any further encoding or decoding here, this method is much faster.
// It also allows enhancing the visual quality by doing scaled EFB copies.
// - hybrid EFB copies:
// 1a) Whenever this function gets called, encode the requested EFB data to RAM (like EFB to RAM)
// 1b) Set type to TCET_EC_DYNAMIC for all texture cache entries in the destination address range.
// If EFB copy caching is enabled, further checks will (try to) prevent redundant EFB copies.
// 2) Check if a texture cache entry for the specified dstAddr already exists (i.e. if an EFB copy was triggered to that address before):
// 2a) Entry doesn't exist:
// - Also copy the requested EFB data to a texture object in VRAM (like EFB to texture)
// - Create a texture cache entry for the target (type = TCET_EC_VRAM)
// - Store a hash of the encoded RAM data in the texcache entry.
// 2b) Entry exists AND type is TCET_EC_VRAM:
// - Like case 2a, but reuse the old texcache entry instead of creating a new one.
// 2c) Entry exists AND type is TCET_EC_DYNAMIC:
// - Only encode the texture to RAM (like EFB to RAM) and store a hash of the encoded data in the existing texcache entry.
// - Do NOT copy the requested EFB data to a VRAM object. Reason: the texture is dynamic, i.e. the CPU is modifying it. Storing a VRAM copy is useless, because we'd always end up deleting it and reloading the data from RAM anyway.
// 3) If the EFB copy gets used as a texture, compare the source RAM hash with the hash you stored when encoding the EFB data to RAM.
// 3a) If the two hashes match AND type is TCET_EC_VRAM, reuse the VRAM copy you created
// 3b) If the two hashes differ AND type is TCET_EC_VRAM, screw your existing VRAM copy. Set type to TCET_EC_DYNAMIC.
// Redecode the source RAM data to a VRAM object. The entry basically behaves like a normal texture now.
// 3c) If type is TCET_EC_DYNAMIC, treat the EFB copy like a normal texture.
// Advantage: Non-dynamic EFB copies can be visually enhanced like with EFB to texture.
// Compatibility is as good as EFB to RAM.
// Disadvantage: Slower than EFB to texture and often even slower than EFB to RAM.
// EFB copy cache depends on accurate texture hashing being enabled. However, with accurate hashing you end up being as slow as without a copy cache anyway.
//
// Disadvantage of all methods: Calling this function requires the GPU to perform a pipeline flush which stalls any further CPU processing.
//
// For historical reasons, Dolphin doesn't actually implement "pure" EFB to RAM emulation, but only EFB to texture and hybrid EFB copies.
float colmat[28] = {0};
float *const fConstAdd = colmat + 16;
float *const ColorMask = colmat + 20;
ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f;
ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f;
unsigned int cbufid = -1;
@ -437,7 +454,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
{
switch (dstFormat)
{
case 0: // Z4
case 0: // Z4
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
cbufid = 0;
break;
@ -447,7 +464,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
cbufid = 1;
break;
case 3: // Z16
case 3: // Z16
colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
cbufid = 24;
break;
@ -472,8 +489,10 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
cbufid = 5;
break;
case 12: // Z16L
colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1.0f;
case 12: // Z16L - copy lower 16 depth bits
// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits stored as alpha)
// Used e.g. in Zelda: Skyward Sword
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
cbufid = 6;
break;
@ -483,7 +502,6 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
cbufid = 7;
break;
}
}
else if (isIntensity)
{
@ -627,15 +645,18 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
unsigned int scaled_tex_w = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledX(tex_w) : tex_w;
unsigned int scaled_tex_h = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledY(tex_h) : tex_h;
bool texture_is_dynamic = false;
TCacheEntryBase *entry = textures[dstAddr];
if (entry)
{
if ((entry->isRenderTarget && entry->virtualW == scaled_tex_w && entry->virtualH == scaled_tex_h)
|| (entry->isDynamic && entry->realW == tex_w && entry->realH == tex_h))
if ((entry->type == TCET_EC_VRAM && entry->virtual_width == scaled_tex_w && entry->virtual_height == scaled_tex_h)
|| (entry->type == TCET_EC_DYNAMIC && entry->native_width == tex_w && entry->native_height == tex_h))
{
texture_is_dynamic = entry->isDynamic;
if (entry->type == TCET_EC_DYNAMIC)
{
scaled_tex_w = tex_w;
scaled_tex_h = tex_h;
}
}
else
{
@ -645,32 +666,16 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
}
}
if (texture_is_dynamic)
{
scaled_tex_w = tex_w;
scaled_tex_h = tex_h;
}
if (NULL == entry)
{
// create the texture
textures[dstAddr] = entry = g_texture_cache->CreateRenderTargetTexture(scaled_tex_w, scaled_tex_h);
entry->addr = dstAddr;
entry->hash = 0;
entry->realW = tex_w;
entry->realH = tex_h;
entry->virtualW = scaled_tex_w;
entry->virtualH = scaled_tex_h;
entry->format = dstFormat;
entry->mipLevels = 0;
entry->isRenderTarget = true;
entry->isNonPow2 = true;
entry->isDynamic = false;
// TODO: Using the wrong dstFormat, dumb...
entry->SetGeneralParameters(dstAddr, 0, dstFormat, 0);
entry->SetDimensions(tex_w, tex_h, scaled_tex_w, scaled_tex_h);
entry->SetHashes(TEXHASH_INVALID);
entry->type = TCET_EC_VRAM;
}
entry->frameCount = frameCount;

View File

@ -1,3 +1,19 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _TEXTURECACHEBASE_H
#define _TEXTURECACHEBASE_H
@ -14,47 +30,56 @@
class TextureCache
{
public:
enum TexCacheEntryType
{
TCET_NORMAL,
TCET_EC_VRAM, // EFB copy which sits in VRAM and is ready to be used
TCET_EC_DYNAMIC, // EFB copy which sits in RAM and needs to be decoded before being used
};
struct TCacheEntryBase
{
// TODO: organize
#define TEXHASH_INVALID 0
// common members
u32 addr;
u32 size_in_bytes;
u64 hash;
//u32 paletteHash;
u32 oldpixel;
//u32 pal_hash;
u32 format;
enum TexCacheEntryType type;
unsigned int num_mipmaps;
unsigned int native_width, native_height; // Texture dimensions from the GameCube's point of view
unsigned int virtual_width, virtual_height; // Texture dimensions from OUR point of view - for hires textures or scaled EFB copies
// used to delete textures which haven't been used for TEXTURE_KILL_THRESHOLD frames
int frameCount;
unsigned int realW, realH; // Texture dimensions from the GameCube's point of view
unsigned int virtualW, virtualH; // Texture dimensions from OUR point of view
// Real and virtual dimensions are usually the same, but may be
// different if e.g. we use high-res textures. Then, realW,realH will
// be the dimensions of the original GameCube texture and
// virtualW,virtualH will be the dimensions of the high-res texture.
unsigned int mipLevels;
void SetGeneralParameters(u32 addr, u32 size, u32 format, unsigned int num_mipmaps)
{
this->addr = addr;
this->size_in_bytes = size;
this->format = format;
this->num_mipmaps = num_mipmaps;
}
bool isRenderTarget; // copied from EFB
bool isDynamic; // Used for hybrid EFB copies to enable checks for CPU modifications
bool isNonPow2; // doesn't seem to be used anywhere
void SetDimensions(unsigned int native_width, unsigned int native_height, unsigned int virtual_width, unsigned int virtual_height)
{
this->native_width = native_width;
this->native_height = native_height;
this->virtual_width = virtual_width;
this->virtual_height = virtual_height;
}
void SetHashes(u64 hash/*, u32 pal_hash*/)
{
this->hash = hash;
//this->pal_hash = pal_hash;
}
//TCacheEntryBase()
//{
// // TODO: remove these
// isRenderTarget = 0;
// hash = 0;
// //paletteHash = 0;
// oldpixel = 0;
// addr = 0;
// size_in_bytes = 0;
// frameCount = 0;
// isNonPow2 = true;
// w = 0;
// h = 0;
// scaledW = 0;
// scaledH = 0;
//}
virtual ~TCacheEntryBase();
@ -62,13 +87,15 @@ public:
virtual bool Save(const char filename[]) = 0;
virtual void Load(unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int level, bool autogen_mips = false) = 0;
unsigned int expanded_width, unsigned int level, bool autogen_mips) = 0;
virtual void FromRenderTarget(u32 dstAddr, unsigned int dstFormat,
unsigned int srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf, unsigned int cbufid,
const float *colmat) = 0;
int IntersectsMemoryRange(u32 range_address, u32 range_size) const;
bool IsEfbCopy() { return (type == TCET_EC_VRAM || type == TCET_EC_DYNAMIC); }
};
virtual ~TextureCache(); // needs virtual for DX11 dtor
@ -87,7 +114,7 @@ public:
virtual TCacheEntryBase* CreateRenderTargetTexture(unsigned int scaled_tex_w, unsigned int scaled_tex_h) = 0;
static TCacheEntryBase* Load(unsigned int stage, u32 address, unsigned int width, unsigned int height,
int format, unsigned int tlutaddr, int tlutfmt, bool UseNativeMips, unsigned int maxlevel);
int format, unsigned int tlutaddr, int tlutfmt, bool UseNativeMips, unsigned int maxlevel, bool from_tmem);
static void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, unsigned int srcFormat,
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf);

View File

@ -20,8 +20,8 @@
#include "Hash.h"
enum
{
TMEM_SIZE = 1024*1024,
HALFTMEM_SIZE = 512*1024
TMEM_SIZE = 1024*1024,
TMEM_LINE_SIZE = 32,
};
extern GC_ALIGNED16(u8 texMem[TMEM_SIZE]);

View File

@ -101,7 +101,7 @@ void LOADERDECL PosMtx_Write()
void LOADERDECL UpdateBoundingBox()
{
if (!PixelEngine::bbox_active)
if (!PixelEngine::bbox_active)
return;
// Truly evil hack, reading backwards from the write pointer. If we were writing to write-only
@ -111,7 +111,7 @@ void LOADERDECL UpdateBoundingBox()
// Then convert to screen space and update the bounding box.
float p[3] = {data[0], data[1], data[2]};
const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4;
const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4;
const float *proj_matrix = &g_fProjectionMatrix[0];
float t[3];
@ -119,40 +119,23 @@ void LOADERDECL UpdateBoundingBox()
t[1] = p[0] * world_matrix[4] + p[1] * world_matrix[5] + p[2] * world_matrix[6] + world_matrix[7];
t[2] = p[0] * world_matrix[8] + p[1] * world_matrix[9] + p[2] * world_matrix[10] + world_matrix[11];
float o[4];
o[2] = t[0] * proj_matrix[8] + t[1] * proj_matrix[9] + t[2] * proj_matrix[10] + proj_matrix[11];
// Depth culling
if (o[2] < 0.0) {
// No pixels are likely to be drawn - don't update bounding box.
return;
}
float o[3];
o[0] = t[0] * proj_matrix[0] + t[1] * proj_matrix[1] + t[2] * proj_matrix[2] + proj_matrix[3];
o[1] = t[0] * proj_matrix[4] + t[1] * proj_matrix[5] + t[2] * proj_matrix[6] + proj_matrix[7];
o[3] = t[0] * proj_matrix[12] + t[1] * proj_matrix[13] + t[2] * proj_matrix[14] + proj_matrix[15];
o[0] /= o[3];
o[1] /= o[3];
o[2] = t[0] * proj_matrix[12] + t[1] * proj_matrix[13] + t[2] * proj_matrix[14] + proj_matrix[15];
// should possibly adjust for viewport?
o[0] = (o[0] + 1.0f) * 320.0f;
o[1] = (o[1] + 1.0f) * 240.0f;
o[0] /= o[2];
o[1] /= o[2];
if (o[0] < PixelEngine::bbox[0]) PixelEngine::bbox[0] = (u16)std::max(0.0f, o[0]);
if (o[0] > PixelEngine::bbox[1]) PixelEngine::bbox[1] = (u16)std::min(640.0f, o[0]);
if (o[1] < PixelEngine::bbox[2]) PixelEngine::bbox[2] = (u16)std::max(0.0f, o[1]);
if (o[1] > PixelEngine::bbox[3]) PixelEngine::bbox[3] = (u16)std::min(480.0f, o[1]);
// Max width seems to be 608, while max height is 480
// Here height is set to 484 as BBox bottom always seems to be off by a few pixels
o[0] = (o[0] + 1.0f) * 304.0f;
o[1] = (1.0f - o[1]) * 242.0f;
// Hardware tests bounding boxes in 2x2 blocks => left and top are even, right and bottom are odd
PixelEngine::bbox[0] &= ~1;
PixelEngine::bbox[1] |= 1;
PixelEngine::bbox[2] &= ~1;
PixelEngine::bbox[3] |= 1;
/*
if (GetAsyncKeyState(VK_LSHIFT)) {
ERROR_LOG(VIDEO, "XForm: %f %f %f to %f %f", p[0], p[1], p[2], o[0], o[1]);
ERROR_LOG(VIDEO, "%i %i %i %i", g_VideoInitialize.pBBox[0], g_VideoInitialize.pBBox[1], g_VideoInitialize.pBBox[2], g_VideoInitialize.pBBox[3]);
}*/
if (o[0] < PixelEngine::bbox[0]) PixelEngine::bbox[0] = (u16) std::max(0.0f, o[0]);
if (o[0] > PixelEngine::bbox[1]) PixelEngine::bbox[1] = (u16) o[0];
if (o[1] < PixelEngine::bbox[2]) PixelEngine::bbox[2] = (u16) std::max(0.0f, o[1]);
if (o[1] > PixelEngine::bbox[3]) PixelEngine::bbox[3] = (u16) o[1];
}
void LOADERDECL TexMtx_ReadDirect_UByte()
@ -290,9 +273,8 @@ void VertexLoader::CompileVertexTranslator()
// OK, so we just got a point. Let's go back and read it for the bounding box.
#ifdef BBOX_SUPPORT
WriteCall(UpdateBoundingBox);
#endif
if(g_ActiveConfig.bUseBBox)
WriteCall(UpdateBoundingBox);
// Normals
vtx_decl.num_normals = 0;

View File

@ -179,8 +179,8 @@ void Pos_ReadIndex_Float_SSSE3(int Index)
if(Index < MaxSize)
{
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
const __m128i a = _mm_loadu_si128((__m128i*)pData);
__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2);
GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData));
GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2));
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;

View File

@ -353,8 +353,8 @@ void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3()
{
u16 Index = DataReadU16();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
const __m128i a = _mm_loadl_epi64((__m128i*)pData);
const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32);
GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData));
GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32));
u8* p = VertexManager::s_pCurBufferPointer;
_mm_storel_epi64((__m128i*)p, b);
LOG_TEX2();

View File

@ -26,25 +26,19 @@
#include "VertexShaderGen.h"
#include "VideoConfig.h"
VERTEXSHADERUID last_vertex_shader_uid;
// Mash together all the inputs that contribute to the code of a generated vertex shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components)
{
memset(uid->values, 0, sizeof(uid->values));
uid->values[0] = components |
(xfregs.numTexGen.numTexGens << 23) |
(xfregs.numChan.numColorChans << 27) |
(xfregs.dualTexTrans.enabled << 29);
for (int i = 0; i < 2; ++i) {
uid->values[1+i] = xfregs.color[i].enablelighting ?
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
uid->values[1+i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
// TODO: If pixel lighting is enabled, do we even have to bother about storing lighting related registers here?
GetLightingShaderId(&uid->values[1]);
uid->values[2] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31;
u32 *pcurvalue = &uid->values[3];
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) {
@ -69,6 +63,69 @@ void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components)
}
}
void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components)
{
// Just store all used registers here without caring whether we need all bits or less.
memset(uid->values, 0, sizeof(uid->values));
u32* ptr = uid->values;
*ptr++ = components;
*ptr++ = xfregs.numTexGen.hex;
*ptr++ = xfregs.numChan.hex;
*ptr++ = xfregs.dualTexTrans.hex;
for (int i = 0; i < 2; ++i) {
*ptr++ = xfregs.color[i].hex;
*ptr++ = xfregs.alpha[i].hex;
}
*ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
for (unsigned int i = 0; i < 8; ++i) {
*ptr++ = xfregs.texMtxInfo[i].hex;
*ptr++ = xfregs.postMtxInfo[i].hex;
}
_assert_((ptr - uid->values) == uid->GetNumValues());
}
void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components)
{
if (!g_ActiveConfig.bEnableShaderDebugging)
return;
VERTEXSHADERUIDSAFE new_id;
GetSafeVertexShaderId(&new_id, components);
if (!(old_id == new_id))
{
std::string new_code(GenerateVertexShaderCode(components, api));
if (old_code != new_code)
{
_assert_(old_id.GetNumValues() == new_id.GetNumValues());
char msg[8192];
char* ptr = msg;
ptr += sprintf(ptr, "Vertex shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n");
const int N = new_id.GetNumValues();
for (int i = 0; i < N/2; ++i)
ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1],
new_id.values[2*i], new_id.values[2*i+1]);
if (N % 2)
ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]);
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%svsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file(szTemp);
file << msg;
file << "\n\nOld shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code;
file.close();
PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp);
}
}
}
static char text[16384];
#define WRITE p+=sprintf
@ -244,7 +301,8 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type)
else
WRITE(p, "o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
// TODO: This probably isn't necessary if pixel lighting is enabled.
p = GenerateLightingShader(p, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_");
if(xfregs.numChan.numColorChans < 2)

View File

@ -48,17 +48,18 @@
#define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64)
#define C_VENVCONST_END (C_DEPTHPARAMS + 4)
class VERTEXSHADERUID
template<bool safe>
class _VERTEXSHADERUID
{
#define NUM_VSUID_VALUES_SAFE 25
public:
u32 values[9];
u32 values[safe ? NUM_VSUID_VALUES_SAFE : 9];
VERTEXSHADERUID()
_VERTEXSHADERUID()
{
memset(values, 0, sizeof(values));
}
VERTEXSHADERUID(const VERTEXSHADERUID& r)
_VERTEXSHADERUID(const _VERTEXSHADERUID& r)
{
for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i)
values[i] = r.values[i];
@ -66,10 +67,11 @@ public:
int GetNumValues() const
{
return (((values[0] >> 23) & 0xf) * 3 + 3) / 4 + 3; // numTexGens*3/4+1
if (safe) return NUM_VSUID_VALUES_SAFE;
else return (((values[0] >> 23) & 0xf) * 3 + 3) / 4 + 3; // numTexGens*3/4+1
}
bool operator <(const VERTEXSHADERUID& _Right) const
bool operator <(const _VERTEXSHADERUID& _Right) const
{
if (values[0] < _Right.values[0])
return true;
@ -86,7 +88,7 @@ public:
return false;
}
bool operator ==(const VERTEXSHADERUID& _Right) const
bool operator ==(const _VERTEXSHADERUID& _Right) const
{
if (values[0] != _Right.values[0])
return false;
@ -99,14 +101,18 @@ public:
return true;
}
};
typedef _VERTEXSHADERUID<false> VERTEXSHADERUID;
typedef _VERTEXSHADERUID<true> VERTEXSHADERUIDSAFE;
// components is included in the uid.
char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type);
const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type);
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components);
extern VERTEXSHADERUID last_vertex_shader_uid;
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components);
void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components);
// Used to make sure that our optimized vertex shader IDs don't lose any possible shader code changes
void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components);
#endif // GCOGL_VERTEXSHADER_H

View File

@ -150,6 +150,12 @@ void VertexShaderManager::Init()
memset(&xfregs, 0, sizeof(xfregs));
memset(xfmem, 0, sizeof(xfmem));
ResetView();
// TODO: should these go inside ResetView()?
Matrix44::LoadIdentity(s_viewportCorrection);
memset(g_fProjectionMatrix, 0, sizeof(g_fProjectionMatrix));
for (int i = 0; i < 4; ++i)
g_fProjectionMatrix[i*5] = 1.0f;
}
void VertexShaderManager::Shutdown()

View File

@ -56,12 +56,6 @@ enum
MAX_XFB_HEIGHT = 574
};
// If this is enabled, bounding boxes will be computed for everything drawn.
// This can theoretically have a big speed hit in some geom heavy games. Needs more work.
// Helps some effects in Paper Mario (but they aren't quite right yet).
// Do testing to figure out if the speed hit is bad?
// #define BBOX_SUPPORT
// Logging
// ----------
void HandleGLError();
@ -70,8 +64,7 @@ void HandleGLError();
// This structure should only be used to represent a rectangle in EFB
// coordinates, where the origin is at the upper left and the frame dimensions
// are 640 x 528.
struct EFBRectangle : public MathUtil::Rectangle<int>
{};
typedef MathUtil::Rectangle<int> EFBRectangle;
// This structure should only be used to represent a rectangle in standard target
// coordinates, where the origin is at the lower left and the frame dimensions

View File

@ -57,12 +57,8 @@ void VideoConfig::Load(const char *ini_file)
iniFile.Get("Settings", "Crop", &bCrop, false);
iniFile.Get("Settings", "UseXFB", &bUseXFB, 0);
iniFile.Get("Settings", "UseRealXFB", &bUseRealXFB, 0);
iniFile.Get("Settings", "UseNativeMips", &bUseNativeMips, false);
iniFile.Get("Settings", "SafeTextureCache", &bSafeTextureCache, true); // Settings
//Safe texture cache params
iniFile.Get("Settings", "UseNativeMips", &bUseNativeMips, false);
iniFile.Get("Settings", "SafeTextureCacheColorSamples", &iSafeTextureCache_ColorSamples,128);
iniFile.Get("Settings", "ShowFPS", &bShowFPS, false); // Settings
iniFile.Get("Settings", "ShowInputDisplay", &bShowInputDisplay, false);
iniFile.Get("Settings", "OverlayStats", &bOverlayStats, false);
@ -96,6 +92,8 @@ void VideoConfig::Load(const char *ini_file)
iniFile.Get("Settings", "EnableOpenCL", &bEnableOpenCL, false);
iniFile.Get("Settings", "OMPDecoder", &bOMPDecoder, false);
iniFile.Get("Settings", "EnableShaderDebugging", &bEnableShaderDebugging, false);
iniFile.Get("Enhancements", "ForceFiltering", &bForceFiltering, 0);
iniFile.Get("Enhancements", "MaxAnisotropy", &iMaxAnisotropy, 0); // NOTE - this is x in (1 << x)
iniFile.Get("Enhancements", "PostProcessingShader", &sPostProcessingShader, "");
@ -132,7 +130,6 @@ void VideoConfig::GameIniLoad(const char *ini_file)
iniFile.GetIfExists("Video_Settings", "UseXFB", &bUseXFB);
iniFile.GetIfExists("Video_Settings", "UseRealXFB", &bUseRealXFB);
iniFile.GetIfExists("Video_Settings", "UseNativeMips", &bUseNativeMips);
iniFile.GetIfExists("Video_Settings", "SafeTextureCache", &bSafeTextureCache);
iniFile.GetIfExists("Video_Settings", "SafeTextureCacheColorSamples", &iSafeTextureCache_ColorSamples);
iniFile.GetIfExists("Video_Settings", "DLOptimize", &iCompileDLsLevel);
iniFile.GetIfExists("Video_Settings", "HiresTextures", &bHiresTextures);
@ -170,6 +167,7 @@ void VideoConfig::GameIniLoad(const char *ini_file)
iniFile.GetIfExists("Video", "PH_ZNear", &sPhackvalue[0]);
iniFile.GetIfExists("Video", "PH_ZFar", &sPhackvalue[1]);
iniFile.GetIfExists("Video", "ZTPSpeedupHack", &bZTPSpeedHack);
iniFile.GetIfExists("Video", "UseBBox", &bUseBBox);
}
void VideoConfig::VerifyValidity()
@ -193,11 +191,7 @@ void VideoConfig::Save(const char *ini_file)
iniFile.Set("Settings", "UseXFB", bUseXFB);
iniFile.Set("Settings", "UseRealXFB", bUseRealXFB);
iniFile.Set("Settings", "UseNativeMips", bUseNativeMips);
iniFile.Set("Settings", "SafeTextureCache", bSafeTextureCache);
//safe texture cache params
iniFile.Set("Settings", "SafeTextureCacheColorSamples", iSafeTextureCache_ColorSamples);
iniFile.Set("Settings", "ShowFPS", bShowFPS);
iniFile.Set("Settings", "ShowInputDisplay", bShowInputDisplay);
iniFile.Set("Settings", "OverlayStats", bOverlayStats);
@ -231,6 +225,8 @@ void VideoConfig::Save(const char *ini_file)
iniFile.Set("Settings", "EnableOpenCL", bEnableOpenCL);
iniFile.Set("Settings", "OMPDecoder", bOMPDecoder);
iniFile.Set("Settings", "EnableShaderDebugging", bEnableShaderDebugging);
iniFile.Set("Enhancements", "ForceFiltering", bForceFiltering);
iniFile.Set("Enhancements", "MaxAnisotropy", iMaxAnisotropy);
iniFile.Set("Enhancements", "PostProcessingShader", sPostProcessingShader);
@ -274,7 +270,6 @@ void VideoConfig::GameIniSave(const char* default_ini, const char* game_ini)
SET_IF_DIFFERS("Video_Settings", "UseXFB", bUseXFB);
SET_IF_DIFFERS("Video_Settings", "UseRealXFB", bUseRealXFB);
SET_IF_DIFFERS("Video_Settings", "UseNativeMips", bUseNativeMips);
SET_IF_DIFFERS("Video_Settings", "SafeTextureCache", bSafeTextureCache);
SET_IF_DIFFERS("Video_Settings", "SafeTextureCacheColorSamples", iSafeTextureCache_ColorSamples);
SET_IF_DIFFERS("Video_Settings", "DLOptimize", iCompileDLsLevel);
SET_IF_DIFFERS("Video_Settings", "HiresTextures", bHiresTextures);

View File

@ -129,12 +129,12 @@ struct VideoConfig
bool bOSDHotKey;
bool bCopyEFBToTexture;
bool bCopyEFBScaled;
bool bSafeTextureCache;
int iSafeTextureCache_ColorSamples;
int iPhackvalue[4];
std::string sPhackvalue[2];
float fAspectRatioHackW, fAspectRatioHackH;
bool bZTPSpeedHack; // The Legend of Zelda: Twilight Princess
bool bUseBBox;
bool bEnablePixelLighting;
bool bEnablePerPixelDepth;
@ -147,6 +147,9 @@ struct VideoConfig
// D3D only config, mostly to be merged into the above
int iAdapter;
// Debugging
bool bEnableShaderDebugging;
// Static config per API
// TODO: Move this out of VideoConfig
struct

View File

@ -60,3 +60,14 @@ void VideoCommon_RunLoop(bool enable)
{
EmulatorState(enable);
}
void VideoCommon_Init()
{
memset(arraybases, 0, sizeof(arraybases));
memset(arraystrides, 0, sizeof(arraystrides));
memset(&MatrixIndexA, 0, sizeof(MatrixIndexA));
memset(&MatrixIndexB, 0, sizeof(MatrixIndexB));
memset(&g_VtxDesc, 0, sizeof(g_VtxDesc));
memset(g_VtxAttr, 0, sizeof(g_VtxAttr));
memset(texMem, 0, TMEM_SIZE);
}

View File

@ -23,5 +23,6 @@
void VideoCommon_DoState(PointerWrap &p);
void VideoCommon_RunLoop(bool enable);
void VideoCommon_Init();
#endif // _VIDEOSTATE_H

View File

@ -154,6 +154,8 @@ void XFRegWritten(int transferSize, u32 baseAddress, u32 *pData)
case XFMEM_SETTEXMTXINFO+5:
case XFMEM_SETTEXMTXINFO+6:
case XFMEM_SETTEXMTXINFO+7:
VertexManager::Flush();
nextAddress = XFMEM_SETTEXMTXINFO + 8;
break;
@ -165,6 +167,8 @@ void XFRegWritten(int transferSize, u32 baseAddress, u32 *pData)
case XFMEM_SETPOSMTXINFO+5:
case XFMEM_SETPOSMTXINFO+6:
case XFMEM_SETPOSMTXINFO+7:
VertexManager::Flush();
nextAddress = XFMEM_SETPOSMTXINFO + 8;
break;