Moving pixel engine and command processor from core to video common. This will break previous save states. Adds decoding single pixels to texture decoder.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4391 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
donkopunchstania
2009-10-10 21:19:39 +00:00
parent 5049fcf9f5
commit 56214e9103
34 changed files with 1889 additions and 1460 deletions

View File

@ -875,7 +875,7 @@ struct BPMemory
u32 clearcolorAR; //4f
u32 clearcolorGB; //50
u32 clearZValue; //51
u32 triggerEFBCopy; //52
UPE_Copy triggerEFBCopy; //52
u32 copyfilter[2]; //53,54
u32 boundbox0;//55
u32 boundbox1;//56

View File

@ -22,6 +22,7 @@
#include "Render.h"
#include "VideoCommon.h"
#include "PixelShaderManager.h"
#include "PixelEngine.h"
#include "BPFunctions.h"
#include "BPStructs.h"
#include "TextureDecoder.h"
@ -162,7 +163,7 @@ void BPWritten(const BPCmd& bp)
switch (bp.newvalue & 0xFF)
{
case 0x02:
g_VideoInitialize.pSetPEFinish(); // may generate interrupt
PixelEngine::SetFinish(); // may generate interrupt
DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF));
break;
@ -172,11 +173,11 @@ void BPWritten(const BPCmd& bp)
}
break;
case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID
g_VideoInitialize.pSetPEToken(static_cast<u16>(bp.newvalue & 0xFFFF), FALSE);
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), FALSE);
DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF));
break;
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
g_VideoInitialize.pSetPEToken(static_cast<u16>(bp.newvalue & 0xFFFF), TRUE);
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), TRUE);
DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF));
break;
// ------------------------
@ -194,8 +195,7 @@ void BPWritten(const BPCmd& bp)
rc.right = (int)(bpmem.copyTexSrcXY.x + bpmem.copyTexSrcWH.x + 1);
rc.bottom = (int)(bpmem.copyTexSrcXY.y + bpmem.copyTexSrcWH.y + 1);
UPE_Copy PE_copy;
PE_copy.Hex = bpmem.triggerEFBCopy;
UPE_Copy PE_copy = bpmem.triggerEFBCopy;
// Check if we are to copy from the EFB or draw to the XFB
if (PE_copy.copy_to_xfb == 0)

View File

@ -0,0 +1,748 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// NOTES (mb2):
// * GP/CPU sync can be done by several way:
// - MP1 use BP (breakpoint) in movie-menus and mostly PEtoken in 3D
// - ZWW as Crazy Taxi: PEfinish (GXSetDrawDone)
// - SMS: BP, PEToken, PEfinish
// - ZTP: seems to use PEfinish only
// - Animal Crossing: PEfinish at start but there's a bug...
// There's tons of HiWmk/LoWmk ping pong -> Another sync fashion?
// - Super Monkey Ball Adventures: PEToken. Oddity: read&check-PEToken-value-loop stays
// in its JITed block (never fall in Advance() until the game-watchdog's stuff).
// That's why we can't let perform the AdvanceCallBack as usual.
// The PEToken is volatile now and in the fifo struct.
// - Super Monkey Ball: PEFinish. This game has the lamest way to deal with fifo sync for our MT's stuff.
// A hack is mandatory. DONE and should be ok for other games.
// *What I guess (thx to asynchronous DualCore mode):
// PPC have a frame-finish watchdog. Handled by system timming stuff like the decrementer.
// (DualCore mode): I have observed, after ZTP logos, a fifo-recovery start when DECREMENTER_EXCEPTION is throwned.
// The frame setting (by GP) took too much time and didn't finish properly due to this watchdog.
// Faster GX plugins required, indeed :p
// * BPs are needed for some game GP/CPU sync.
// But it could slowdown (MP1 at least) because our GP in DC is faster than "expected" in some area.
// eg: in movie-menus in MP1, BP are reached quickly.
// The bad thing is that involve too much PPC work (int ack, lock GP, reset BP, new BP addr, unlock BP...) hence the slowdown.
// Anyway, emulation should more accurate like this and it emulate some sort of better load balancing.
// Eather way in those area a more accurate GP timing could be done by slowing down the GP or something less stupid.
// Not functional and not used atm (breaks MP2).
// * funny, in revs before those with this note, BP irq wasn't cleared (a bug indeed) and MP1 menus was faster.
// BP irq was raised and ack just once but never cleared. However it's sufficient for MP1 to work.
// This hack is used atm. Known BPs handling doesn't work well (btw, BP irq clearing might be done by CPIntEnable raising edge).
// The hack seems to be responsible of the movie stutering in MP1 menus.
// TODO (mb2):
// * raise watermark Ov/Un irq: POINTLESS since emulated GP timings can't be accuratly set.
// Only 3 choices IMHO for a correct emulated load balancing in DC mode:
// - make our own GP watchdog hack that can lock CPU if GP too slow. STARTED
// - hack directly something in PPC timings (dunno how)
// - boost GP so we can consider it as infinitely fast compared to CPU.
// * raise ReadIdle/CmdIdle flags and observe behaviour of MP1 & ZTP (at least)
// * Clean useless comments and debug stuff in Read16, Write16, GatherPipeBursted when sync will be fixed for DC
// * (reminder) do the same in:
// PeripheralInterface.cpp, PixelEngine.cpp, OGL->BPStructs.cpp, fifo.cpp... ok just check change log >>
// TODO
// * Kick GPU from dispatcher, not from writes
// * Thunking framework
// * Cleanup of messy now unnecessary safety code in jit
#include "Common.h"
#include "VideoCommon.h"
#include "MathUtil.h"
#include "Thread.h"
#include "Atomic.h"
#include "Fifo.h"
#include "ChunkFile.h"
#include "CommandProcessor.h"
namespace CommandProcessor
{
// look for 1002 verts, breakpoint there, see why next draw is flushed
// TODO(ector): Warn on bbox read/write
// Fifo Status Register
union UCPStatusReg
{
struct
{
unsigned OverflowHiWatermark : 1;
unsigned UnderflowLoWatermark : 1;
unsigned ReadIdle : 1;
unsigned CommandIdle : 1;
unsigned Breakpoint : 1;
unsigned : 11;
};
u16 Hex;
UCPStatusReg() {Hex = 0; }
UCPStatusReg(u16 _hex) {Hex = _hex; }
};
// Fifo Control Register
union UCPCtrlReg
{
struct
{
unsigned GPReadEnable : 1;
unsigned CPIntEnable : 1;
unsigned FifoOverflowIntEnable : 1;
unsigned FifoUnderflowIntEnable : 1;
unsigned GPLinkEnable : 1;
unsigned BPEnable : 1;
unsigned : 10;
};
u16 Hex;
UCPCtrlReg() {Hex = 0; }
UCPCtrlReg(u16 _hex) {Hex = _hex; }
};
// Fifo Control Register
union UCPClearReg
{
struct
{
unsigned ClearFifoOverflow : 1;
unsigned ClearFifoUnderflow : 1;
unsigned ClearMetrices : 1;
unsigned : 13;
};
u16 Hex;
UCPClearReg() {Hex = 0; }
UCPClearReg(u16 _hex) {Hex = _hex; }
};
// STATE_TO_SAVE
// variables
UCPStatusReg m_CPStatusReg;
UCPCtrlReg m_CPCtrlReg;
UCPClearReg m_CPClearReg;
int m_bboxleft;
int m_bboxtop;
int m_bboxright;
int m_bboxbottom;
u16 m_tokenReg;
SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread
static u32 fake_GPWatchdogLastToken = 0;
static Common::Event s_fifoIdleEvent;
enum
{
GATHER_PIPE_SIZE = 32,
INT_CAUSE_CP = 0x800
};
void DoState(PointerWrap &p)
{
p.Do(m_CPStatusReg);
p.Do(m_CPCtrlReg);
p.Do(m_CPClearReg);
p.Do(m_bboxleft);
p.Do(m_bboxtop);
p.Do(m_bboxright);
p.Do(m_bboxbottom);
p.Do(m_tokenReg);
p.Do(fifo);
}
// function
void UpdateFifoRegister();
void UpdateInterrupts();
//inline void WriteLow (u32& _reg, u16 lowbits) {_reg = (_reg & 0xFFFF0000) | lowbits;}
//inline void WriteHigh(u32& _reg, u16 highbits) {_reg = (_reg & 0x0000FFFF) | ((u32)highbits << 16);}
inline void WriteLow (volatile u32& _reg, u16 lowbits) {Common::AtomicStore(_reg,(_reg & 0xFFFF0000) | lowbits);}
inline void WriteHigh(volatile u32& _reg, u16 highbits) {Common::AtomicStore(_reg,(_reg & 0x0000FFFF) | ((u32)highbits << 16));}
inline u16 ReadLow (u32 _reg) {return (u16)(_reg & 0xFFFF);}
inline u16 ReadHigh (u32 _reg) {return (u16)(_reg >> 16);}
int et_UpdateInterrupts;
// for GP watchdog hack
void IncrementGPWDToken()
{
Common::AtomicIncrement(fifo.Fake_GPWDToken);
}
// Check every FAKE_GP_WATCHDOG_PERIOD if a PE-frame-finish occured
// if not then lock CPUThread until GP finish a frame.
void WaitForFrameFinish()
{
while ((fake_GPWatchdogLastToken == fifo.Fake_GPWDToken) && fifo.bFF_GPReadEnable && (fifo.CPReadWriteDistance > 0) && !(fifo.bFF_BPEnable && fifo.bFF_Breakpoint))
s_fifoIdleEvent.MsgWait();
fake_GPWatchdogLastToken = fifo.Fake_GPWDToken;
}
void UpdateInterrupts_Wrapper(u64 userdata, int cyclesLate)
{
UpdateInterrupts();
}
void Init()
{
m_CPStatusReg.Hex = 0;
m_CPStatusReg.CommandIdle = 1;
m_CPStatusReg.ReadIdle = 1;
m_CPCtrlReg.Hex = 0;
m_bboxleft = 0;
m_bboxtop = 0;
m_bboxright = 640;
m_bboxbottom = 480;
m_tokenReg = 0;
fake_GPWatchdogLastToken = 0;
memset(&fifo,0,sizeof(fifo));
fifo.CPCmdIdle = 1 ;
fifo.CPReadIdle = 1;
s_fifoIdleEvent.Init();
et_UpdateInterrupts = g_VideoInitialize.pRegisterEvent("UpdateInterrupts", UpdateInterrupts_Wrapper);
}
void Shutdown()
{
s_fifoIdleEvent.Shutdown();
}
void Read16(u16& _rReturnValue, const u32 _Address)
{
DEBUG_LOG(COMMANDPROCESSOR, "(r): 0x%08x", _Address);
switch (_Address & 0xFFF)
{
case STATUS_REGISTER:
//TODO?: if really needed
//m_CPStatusReg.CommandIdle = fifo.CPCmdIdle;
// uncomment: change a bit the behaviour MP1. Not very useful though
m_CPStatusReg.ReadIdle = fifo.CPReadIdle;
//m_CPStatusReg.CommandIdle = fifo.CPReadIdle;
// hack: CPU will always believe fifo is empty and on idle
//m_CPStatusReg.ReadIdle = 1;
//m_CPStatusReg.CommandIdle = 1;
_rReturnValue = m_CPStatusReg.Hex;
INFO_LOG(COMMANDPROCESSOR, "\t iBP %s | fREADIDLE %s | fCMDIDLE %s | iOvF %s | iUndF %s"
, m_CPStatusReg.Breakpoint ? "ON" : "OFF"
, m_CPStatusReg.ReadIdle ? "ON" : "OFF"
, m_CPStatusReg.CommandIdle ? "ON" : "OFF"
, m_CPStatusReg.OverflowHiWatermark ? "ON" : "OFF"
, m_CPStatusReg.UnderflowLoWatermark ? "ON" : "OFF"
);
return;
case CTRL_REGISTER: _rReturnValue = m_CPCtrlReg.Hex; return;
case CLEAR_REGISTER: _rReturnValue = m_CPClearReg.Hex; return;
case FIFO_TOKEN_REGISTER: _rReturnValue = m_tokenReg; return;
case FIFO_BOUNDING_BOX_LEFT: _rReturnValue = m_bboxleft; return;
case FIFO_BOUNDING_BOX_RIGHT: _rReturnValue = m_bboxright; return;
case FIFO_BOUNDING_BOX_TOP: _rReturnValue = m_bboxtop; return;
case FIFO_BOUNDING_BOX_BOTTOM: _rReturnValue = m_bboxbottom; return;
case FIFO_BASE_LO: _rReturnValue = ReadLow (fifo.CPBase); return;
case FIFO_BASE_HI: _rReturnValue = ReadHigh(fifo.CPBase); return;
case FIFO_END_LO: _rReturnValue = ReadLow (fifo.CPEnd); return;
case FIFO_END_HI: _rReturnValue = ReadHigh(fifo.CPEnd); return;
case FIFO_HI_WATERMARK_LO: _rReturnValue = ReadLow (fifo.CPHiWatermark); return;
case FIFO_HI_WATERMARK_HI: _rReturnValue = ReadHigh(fifo.CPHiWatermark); return;
case FIFO_LO_WATERMARK_LO: _rReturnValue = ReadLow (fifo.CPLoWatermark); return;
case FIFO_LO_WATERMARK_HI: _rReturnValue = ReadHigh(fifo.CPLoWatermark); return;
// TODO: cases cleanup
case FIFO_RW_DISTANCE_LO:
//_rReturnValue = ReadLow (fifo.CPReadWriteDistance);
// hack: CPU will always believe fifo is empty and on idle
_rReturnValue = 0;
DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_RW_DISTANCE_LO : %04x", _rReturnValue);
return;
case FIFO_RW_DISTANCE_HI:
//_rReturnValue = ReadHigh(fifo.CPReadWriteDistance);
// hack: CPU will always believe fifo is empty and on idle
_rReturnValue = 0;
DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_RW_DISTANCE_HI : %04x", _rReturnValue);
return;
case FIFO_WRITE_POINTER_LO:
_rReturnValue = ReadLow (fifo.CPWritePointer);
DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_WRITE_POINTER_LO : %04x", _rReturnValue);
return;
case FIFO_WRITE_POINTER_HI:
_rReturnValue = ReadHigh(fifo.CPWritePointer);
DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_WRITE_POINTER_HI : %04x", _rReturnValue);
return;
case FIFO_READ_POINTER_LO:
//_rReturnValue = ReadLow (fifo.CPReadPointer);
// hack: CPU will always believe fifo is empty and on idle
_rReturnValue = ReadLow (fifo.CPWritePointer);
DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_READ_POINTER_LO : %04x", _rReturnValue);
return;
case FIFO_READ_POINTER_HI:
//_rReturnValue = ReadHigh(fifo.CPReadPointer);
// hack: CPU will always believe fifo is empty and on idle
_rReturnValue = ReadHigh(fifo.CPWritePointer);
DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_READ_POINTER_HI : %04x", _rReturnValue);
return;
case FIFO_BP_LO: _rReturnValue = ReadLow (fifo.CPBreakpoint); return;
case FIFO_BP_HI: _rReturnValue = ReadHigh(fifo.CPBreakpoint); return;
case CP_PERF0_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF0_L: %04x", _rReturnValue); break; // XF counters
case CP_PERF0_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF0_H: %04x", _rReturnValue); break;
case CP_PERF1_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF1_L: %04x", _rReturnValue); break;
case CP_PERF1_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF1_H: %04x", _rReturnValue); break;
case CP_PERF2_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF2_L: %04x", _rReturnValue); break;
case CP_PERF2_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF2_H: %04x", _rReturnValue); break;
case CP_PERF3_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF3_L: %04x", _rReturnValue); break;
case CP_PERF3_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF3_H: %04x", _rReturnValue); break;
// case 0x64:
// return 4; //Number of clocks per vertex.. todo: calculate properly
//add all the other regs here? are they ever read?
default:
WARN_LOG(COMMANDPROCESSOR, "(r16) unknown CP reg @ %08x", _Address);
_rReturnValue = 0;
return;
}
}
bool AllowIdleSkipping()
{
return !g_VideoInitialize.bUseDualCore || (!m_CPCtrlReg.CPIntEnable && !m_CPCtrlReg.BPEnable);
}
void Write16(const u16 _Value, const u32 _Address)
{
INFO_LOG(COMMANDPROCESSOR, "(write16): 0x%04x @ 0x%08x",_Value,_Address);
//Spin until queue is empty - it WILL become empty because this is the only thread
//that submits data
if (g_VideoInitialize.bUseDualCore)
{
// Force complete fifo flush if we attempt to set/reset the fifo (API GXSetGPFifo or equivalent)
// It's kind of an API hack but it works for lots of games... and I hope it's the same way for every games.
// TODO: HLE for GX fifo's APIs?
// Here is the hack:
// - if (attempt to overwrite CTRL_REGISTER by 0x0000)
// // then we assume CPReadWriteDistance will be overwrited very soon.
// - if (fifo is not empty)
// // (not 100% sure): shouln't happen unless PPC think having trouble with the sync
// // and it attempt a fifo recovery (look for PI_FIFO_RESET in log).
// // If we want to emulate self fifo recovery we need proper GX metrics emulation... yeah sure :p
// - spin until fifo is empty
// - else
// - normal write16
if (((_Address&0xFFF) == CTRL_REGISTER) && (_Value == 0)) // API hack
{
// weird MP1 redo that right after linking fifo with GP... hmmm
/*_dbg_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance == 0,
"WTF! Something went wrong with GP/PPC the sync! -> CPReadWriteDistance: 0x%08X\n"
" - The fifo is not empty but we are going to lock it anyway.\n"
" - \"Normaly\", this is due to fifo-hang-so-lets-attempt-recovery.\n"
" - The bad news is dolphin don't support special recovery features like GXfifo's metric yet.\n"
" - The good news is, the time you read that message, the fifo should be empty now :p\n"
" - Anyway, fifo flush will be forced if you press OK and dolphin might continue to work...\n"
" - We aren't betting on that :)", fifo.CPReadWriteDistance);
*/
DEBUG_LOG(COMMANDPROCESSOR, "*********************** GXSetGPFifo very soon? ***********************");
// (mb2) We don't sleep here since it could be a perf issue for super monkey ball (yup only this game IIRC)
// Touching that game is a no-go so I don't want to take the risk :p
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance > 0 && !(fifo.bFF_BPEnable && fifo.bFF_Breakpoint) )
s_fifoIdleEvent.MsgWait();
}
}
switch (_Address & 0xFFF)
{
case STATUS_REGISTER:
{
UCPStatusReg tmpStatus(_Value);
// set the flags to "all is okay"
m_CPStatusReg.OverflowHiWatermark = 0;
m_CPStatusReg.UnderflowLoWatermark = 0;
// TOCHECK (mb2): could BP irq be cleared here too?
//if (tmpStatus.Breakpoint!=m_CPStatusReg.Breakpoint) _asm int 3
// breakpoint
/*if (tmpStatus.Breakpoint)
{
m_CPStatusReg.Breakpoint = 0;
}
//fifo.bFF_Breakpoint = m_CPStatusReg.Breakpoint;
fifo.bFF_Breakpoint = m_CPStatusReg.Breakpoint ? true : false;
//LOG(COMMANDPROCESSOR,"fifo.bFF_Breakpoint : %i",fifo.bFF_Breakpoint);
*/
// update interrupts
UpdateInterrupts();
INFO_LOG(COMMANDPROCESSOR,"\t write to STATUS_REGISTER : %04x", _Value);
}
break;
case CTRL_REGISTER:
{
UCPCtrlReg tmpCtrl(_Value);
Common::AtomicStore(fifo.bFF_GPReadEnable, tmpCtrl.GPReadEnable);
Common::AtomicStore(fifo.bFF_GPLinkEnable, tmpCtrl.GPLinkEnable);
Common::AtomicStore(fifo.bFF_BPEnable, tmpCtrl.BPEnable);
// TOCHECK (mb2): could BP irq be cleared with w16 to STATUS_REGISTER?
// funny hack: eg in MP1 if we disable the clear breakpoint ability by commenting this block
// the game is of course faster but looks stable too.
// Well, the hack is more stable than the "proper" way actualy :p ... it breaks MP2 when ship lands
// So I let the hack for now.
// Checkmate re-enabled it, so please test
// TODO (mb2): fix this!
// BP interrupt is cleared here
//if (m_CPCtrlReg.CPIntEnable && !tmpCtrl.Hex) // falling edge
// raising edge or falling egde
if ((!m_CPCtrlReg.CPIntEnable && tmpCtrl.CPIntEnable) || (m_CPCtrlReg.CPIntEnable && !tmpCtrl.Hex))
{
m_CPStatusReg.Breakpoint = 0;
Common::AtomicStore(fifo.bFF_Breakpoint, 0);
}
m_CPCtrlReg.Hex = tmpCtrl.Hex;
UpdateInterrupts();
DEBUG_LOG(COMMANDPROCESSOR,"\t write to CTRL_REGISTER : %04x", _Value);
DEBUG_LOG(COMMANDPROCESSOR, "\t GPREAD %s | CPULINK %s | BP %s || CPIntEnable %s | OvF %s | UndF %s"
, fifo.bFF_GPReadEnable ? "ON" : "OFF"
, fifo.bFF_GPLinkEnable ? "ON" : "OFF"
, fifo.bFF_BPEnable ? "ON" : "OFF"
, m_CPCtrlReg.CPIntEnable ? "ON" : "OFF"
, m_CPCtrlReg.FifoOverflowIntEnable ? "ON" : "OFF"
, m_CPCtrlReg.FifoUnderflowIntEnable ? "ON" : "OFF"
);
}
break;
case PERF_SELECT:
{
WARN_LOG(COMMANDPROCESSOR, "write to PERF_SELECT: %04x", _Value);
// Seems to select which set of perf counters should be exposed.
}
break;
case CLEAR_REGISTER:
{
// ????
UCPClearReg tmpClearReg(_Value);
m_CPClearReg.Hex = 0;
INFO_LOG(COMMANDPROCESSOR,"\t write to CLEAR_REGISTER : %04x",_Value);
}
break;
// Fifo Registers
case FIFO_TOKEN_REGISTER:
m_tokenReg = _Value;
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_TOKEN_REGISTER : %04x", _Value);
break;
case FIFO_BASE_LO:
WriteLow ((u32 &)fifo.CPBase, _Value);
fifo.CPBase &= 0xFFFFFFE0;
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_BASE_LO : %04x", _Value);
break;
case FIFO_BASE_HI:
WriteHigh((u32 &)fifo.CPBase, _Value);
fifo.CPBase &= 0xFFFFFFE0;
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_BASE_HI : %04x", _Value);
break;
case FIFO_END_LO:
WriteLow ((u32 &)fifo.CPEnd, _Value);
fifo.CPEnd &= 0xFFFFFFE0;
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_END_LO : %04x", _Value);
break;
case FIFO_END_HI:
WriteHigh((u32 &)fifo.CPEnd, _Value);
fifo.CPEnd &= 0xFFFFFFE0;
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_END_HI : %04x", _Value);
break;
// Hm. Should we really & these with FFFFFFE0?
// (mb2): never seen 32B not aligned values for those following regs.
// fifo.CPEnd is the only value that could be not 32B aligned so far.
case FIFO_WRITE_POINTER_LO:
WriteLow ((u32 &)fifo.CPWritePointer, _Value); fifo.CPWritePointer &= 0xFFFFFFE0;
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_WRITE_POINTER_LO : %04x", _Value);
break;
case FIFO_WRITE_POINTER_HI:
WriteHigh((u32 &)fifo.CPWritePointer, _Value); fifo.CPWritePointer &= 0xFFFFFFE0;
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_WRITE_POINTER_HI : %04x", _Value);
break;
case FIFO_READ_POINTER_LO:
WriteLow ((u32 &)fifo.CPReadPointer, _Value); fifo.CPReadPointer &= 0xFFFFFFE0;
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_READ_POINTER_LO : %04x", _Value);
break;
case FIFO_READ_POINTER_HI:
WriteHigh((u32 &)fifo.CPReadPointer, _Value); fifo.CPReadPointer &= 0xFFFFFFE0;
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_READ_POINTER_HI : %04x", _Value);
break;
case FIFO_HI_WATERMARK_LO:
WriteLow ((u32 &)fifo.CPHiWatermark, _Value);
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_HI_WATERMARK_LO : %04x", _Value);
break;
case FIFO_HI_WATERMARK_HI:
WriteHigh((u32 &)fifo.CPHiWatermark, _Value);
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_HI_WATERMARK_HI : %04x", _Value);
break;
case FIFO_LO_WATERMARK_LO:
WriteLow ((u32 &)fifo.CPLoWatermark, _Value);
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_LO_WATERMARK_LO : %04x", _Value);
break;
case FIFO_LO_WATERMARK_HI:
WriteHigh((u32 &)fifo.CPLoWatermark, _Value);
DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_LO_WATERMARK_HI : %04x", _Value);
break;
case FIFO_BP_LO:
WriteLow ((u32 &)fifo.CPBreakpoint, _Value);
DEBUG_LOG(COMMANDPROCESSOR,"write to FIFO_BP_LO : %04x", _Value);
break;
case FIFO_BP_HI:
WriteHigh((u32 &)fifo.CPBreakpoint, _Value);
DEBUG_LOG(COMMANDPROCESSOR,"write to FIFO_BP_HI : %04x", _Value);
break;
// Super monkey try to overwrite CPReadWriteDistance by an old saved RWD value. Which is lame for us.
// hack: We have to force CPU to think fifo is alway empty and on idle.
// When we fall here CPReadWriteDistance should be always null and the game should always want to overwrite it by 0.
// So, we can skip it.
case FIFO_RW_DISTANCE_HI:
//WriteHigh((u32 &)fifo.CPReadWriteDistance, _Value);
DEBUG_LOG(COMMANDPROCESSOR,"try to write to FIFO_RW_DISTANCE_HI : %04x", _Value);
break;
case FIFO_RW_DISTANCE_LO:
//WriteLow((u32 &)fifo.CPReadWriteDistance, _Value);
DEBUG_LOG(COMMANDPROCESSOR,"try to write to FIFO_RW_DISTANCE_LO : %04x", _Value);
break;
default:
WARN_LOG(COMMANDPROCESSOR, "(w16) unknown CP reg write %04x @ %08x", _Value, _Address);
}
// TODO(mb2): better. Check if it help: avoid CPReadPointer overwrites when stupidly done like in Super Monkey Ball
if ((!fifo.bFF_GPReadEnable && fifo.CPReadIdle) || !g_VideoInitialize.bUseDualCore) // TOCHECK(mb2): check again if thread safe?
UpdateFifoRegister();
}
void Read32(u32& _rReturnValue, const u32 _Address)
{
_rReturnValue = 0;
_dbg_assert_msg_(COMMANDPROCESSOR, 0, "Read32 from CommandProccessor at 0x%08x", _Address);
}
void Write32(const u32 _Data, const u32 _Address)
{
_dbg_assert_msg_(COMMANDPROCESSOR, 0, "Write32 at CommandProccessor at 0x%08x", _Address);
}
void STACKALIGN GatherPipeBursted()
{
// if we aren't linked, we don't care about gather pipe data
if (!fifo.bFF_GPLinkEnable)
return;
if (g_VideoInitialize.bUseDualCore)
{
// update the fifo-pointer
fifo.CPWritePointer += GATHER_PIPE_SIZE;
if (fifo.CPWritePointer >= fifo.CPEnd)
fifo.CPWritePointer = fifo.CPBase;
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
// High watermark overflow handling (hacked way)
if (fifo.CPReadWriteDistance > fifo.CPHiWatermark)
{
// we should raise an Ov interrupt for an accurate fifo emulation and let PPC deal with it.
// But it slowdowns things because of if(interrupt blah blah){} blocks for each 32B fifo transactions.
// CPU would be a bit more loaded too by its interrupt handling...
// Eather way, CPU would have the ability to resume another thread.
// To be clear: this spin loop is like a critical section spin loop in the emulated GX thread hence "hacked way"
// Yes, in real life, the only purpose of the low watermark interrupt is just for cooling down OV contention.
// - @ game start -> watermark init: Overflow enabled, Underflow disabled
// - if (OV is raised)
// - CPU stop to write to fifo
// - enable Underflow interrupt (this only happens if OV is raised)
// - do other things
// - if (Underflow is raised (implicite: AND if an OV has been raised))
// - CPU can write to fifo
// - disable Underflow interrupt
INFO_LOG(COMMANDPROCESSOR, "(GatherPipeBursted): CPHiWatermark reached");
// Wait for GPU to catch up
while (!(fifo.bFF_BPEnable && fifo.bFF_Breakpoint) && fifo.CPReadWriteDistance > fifo.CPLoWatermark)
s_fifoIdleEvent.MsgWait();
}
// check if we are in sync
_assert_msg_(COMMANDPROCESSOR, fifo.CPWritePointer == *(g_VideoInitialize.Fifo_CPUWritePointer), "FIFOs linked but out of sync");
_assert_msg_(COMMANDPROCESSOR, fifo.CPBase == *(g_VideoInitialize.Fifo_CPUBase), "FIFOs linked but out of sync");
_assert_msg_(COMMANDPROCESSOR, fifo.CPEnd == *(g_VideoInitialize.Fifo_CPUEnd), "FIFOs linked but out of sync");
}
else
{
fifo.CPWritePointer += GATHER_PIPE_SIZE;
if (fifo.CPWritePointer >= fifo.CPEnd)
fifo.CPWritePointer = fifo.CPBase;
// check if we are in sync
_assert_msg_(COMMANDPROCESSOR, fifo.CPWritePointer == *(g_VideoInitialize.Fifo_CPUWritePointer), "FIFOs linked but out of sync");
_assert_msg_(COMMANDPROCESSOR, fifo.CPBase == *(g_VideoInitialize.Fifo_CPUBase), "FIFOs linked but out of sync");
_assert_msg_(COMMANDPROCESSOR, fifo.CPEnd == *(g_VideoInitialize.Fifo_CPUEnd), "FIFOs linked but out of sync");
UpdateFifoRegister();
}
}
// This is mostly used in single core mode
void CatchUpGPU()
{
// check if we are able to run this buffer
if ((fifo.bFF_GPReadEnable) && !(fifo.bFF_BPEnable && fifo.bFF_Breakpoint))
{
// HyperIris: Memory_GetPtr is an expensive call, call it less, run faster
u8 *ptr = Memory_GetPtr(fifo.CPReadPointer);
while (fifo.CPReadWriteDistance > 0)
{
// check if we are on a breakpoint
if (fifo.bFF_BPEnable)
{
//MessageBox(0,"Breakpoint enabled",0,0);
if ((fifo.CPReadPointer & ~0x1F) == (fifo.CPBreakpoint & ~0x1F))
{
//_assert_msg_(POWERPC,0,"BP: %08x",fifo.CPBreakpoint);
//LOG(COMMANDPROCESSOR,"!!! BP irq raised");
fifo.bFF_Breakpoint = 1;
m_CPStatusReg.Breakpoint = 1;
UpdateInterrupts();
break;
}
}
// read the data and send it to the VideoPlugin
fifo.CPReadPointer += 32;
// We are going to do FP math on the main thread so have to save the current state
SaveSSEState();
LoadDefaultSSEState();
Fifo_SendFifoData(ptr,32);
LoadSSEState();
// adjust
ptr += 32;
fifo.CPReadWriteDistance -= 32;
// increase the ReadPtr
if (fifo.CPReadPointer >= fifo.CPEnd)
{
fifo.CPReadPointer = fifo.CPBase;
// adjust, take care
ptr = Memory_GetPtr(fifo.CPReadPointer);
INFO_LOG(COMMANDPROCESSOR, "BUFFER LOOP");
}
}
}
}
// __________________________________________________________________________________________________
// !!! Temporary (I hope): re-used in DC mode
// UpdateFifoRegister
// It's no problem if the gfx falls behind a little bit. Better make sure to stop the cpu thread
// when the distance is way huge, though.
// So:
// CPU thread
/// 0. Write data (done before entering this)
// 1. Compute distance
// 2. If distance > threshold, sleep and goto 1
// GPU thread
// 1. Compute distance
// 2. If distance < threshold, sleep and goto 1 (or wait for trigger?)
// 3. Read and use a bit of data, goto 1
void UpdateFifoRegister()
{
// update the distance
int wp = fifo.CPWritePointer;
int rp = fifo.CPReadPointer;
int dist;
if (wp >= rp)
dist = wp - rp;
else
dist = (wp - fifo.CPBase) + (fifo.CPEnd - rp);
Common::AtomicStore(fifo.CPReadWriteDistance, dist);
if (!g_VideoInitialize.bUseDualCore)
CatchUpGPU();
}
void UpdateInterrupts()
{
if (m_CPCtrlReg.CPIntEnable &&
(fifo.bFF_BPEnable && fifo.bFF_Breakpoint))
{
g_VideoInitialize.pSetInterrupt(INT_CAUSE_CP, true);
}
else
{
g_VideoInitialize.pSetInterrupt(INT_CAUSE_CP, false);
}
}
void UpdateInterruptsFromVideoPlugin()
{
if (fifo.bFF_Breakpoint) // implicit since only BP trigger (see fifo.cpp) can call this
m_CPStatusReg.Breakpoint = 1;
g_VideoInitialize.pScheduleEvent_Threadsafe(0, et_UpdateInterrupts, 0);
}
void SetFifoIdleFromVideoPlugin()
{
s_fifoIdleEvent.Set();
}
} // end of namespace CommandProcessor

View File

@ -0,0 +1,96 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _COMMANDPROCESSOR_H
#define _COMMANDPROCESSOR_H
#include "Common.h"
#include "pluginspecs_video.h"
class PointerWrap;
extern bool MT;
namespace CommandProcessor
{
// internal hardware addresses
enum
{
STATUS_REGISTER = 0x00,
CTRL_REGISTER = 0x02,
CLEAR_REGISTER = 0x04,
PERF_SELECT = 0x06,
FIFO_TOKEN_REGISTER = 0x0E,
FIFO_BOUNDING_BOX_LEFT = 0x10,
FIFO_BOUNDING_BOX_RIGHT = 0x12,
FIFO_BOUNDING_BOX_TOP = 0x14,
FIFO_BOUNDING_BOX_BOTTOM = 0x16,
FIFO_BASE_LO = 0x20,
FIFO_BASE_HI = 0x22,
FIFO_END_LO = 0x24,
FIFO_END_HI = 0x26,
FIFO_HI_WATERMARK_LO = 0x28,
FIFO_HI_WATERMARK_HI = 0x2a,
FIFO_LO_WATERMARK_LO = 0x2c,
FIFO_LO_WATERMARK_HI = 0x2e,
FIFO_RW_DISTANCE_LO = 0x30,
FIFO_RW_DISTANCE_HI = 0x32,
FIFO_WRITE_POINTER_LO = 0x34,
FIFO_WRITE_POINTER_HI = 0x36,
FIFO_READ_POINTER_LO = 0x38,
FIFO_READ_POINTER_HI = 0x3A,
FIFO_BP_LO = 0x3C,
FIFO_BP_HI = 0x3E,
CP_PERF0_L = 0x40,
CP_PERF0_H = 0x42,
CP_PERF1_L = 0x44,
CP_PERF1_H = 0x46,
CP_PERF2_L = 0x48,
CP_PERF2_H = 0x4a,
CP_PERF3_L = 0x4c,
CP_PERF3_H = 0x4e,
};
extern SCPFifoStruct fifo;
// Init
void Init();
void Shutdown();
void DoState(PointerWrap &p);
// Read
void Read16(u16& _rReturnValue, const u32 _Address);
void Write16(const u16 _Data, const u32 _Address);
void Read32(u32& _rReturnValue, const u32 _Address);
void Write32(const u32 _Data, const u32 _Address);
// for CGPFIFO
void CatchUpGPU();
void GatherPipeBursted();
void UpdateInterrupts();
void UpdateInterruptsFromVideoPlugin();
void SetFifoIdleFromVideoPlugin();
bool AllowIdleSkipping();
// for DC GP watchdog hack
void IncrementGPWDToken();
void WaitForFrameFinish();
} // namespace CommandProcessor
#endif // _COMMANDPROCESSOR_H

View File

@ -23,6 +23,7 @@
#include "Thread.h"
#include "Atomic.h"
#include "OpcodeDecoding.h"
#include "CommandProcessor.h"
#include "Fifo.h"
@ -127,7 +128,7 @@ void Fifo_SendFifoData(u8* _uData, u32 len)
void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
{
fifoStateRun = true;
SCPFifoStruct &_fifo = *video_initialize.pCPFifo;
SCPFifoStruct &_fifo = CommandProcessor::fifo;
s32 distToSend;
while (fifoStateRun)
@ -178,7 +179,7 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
if (_fifo.bFF_BPEnable && (readPtr == _fifo.CPBreakpoint))
{
Common::AtomicStore(_fifo.bFF_Breakpoint, 1);
video_initialize.pUpdateInterrupts();
CommandProcessor::UpdateInterruptsFromVideoPlugin();
}
// Update CPReadPointer and RWDistance
@ -189,7 +190,7 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
} while (_fifo.bFF_GPReadEnable && _fifo.CPReadWriteDistance && !(_fifo.bFF_BPEnable && _fifo.bFF_Breakpoint));
Common::AtomicStore(_fifo.CPReadIdle, 1);
video_initialize.pSetFifoIdle();
CommandProcessor::SetFifoIdleFromVideoPlugin();
}
else
{

View File

@ -29,6 +29,7 @@
#include "VideoCommon.h"
#include "Profiler.h"
#include "OpcodeDecoding.h"
#include "CommandProcessor.h"
#include "VertexLoaderManager.h"
@ -170,7 +171,7 @@ bool FifoCommandRunnable()
g_VideoInitialize.pSysMessage(szTemp);
g_VideoInitialize.pLog(szTemp, TRUE);
{
SCPFifoStruct &fifo = *g_VideoInitialize.pCPFifo;
SCPFifoStruct &fifo = CommandProcessor::fifo;
char szTmp[256];
// sprintf(szTmp, "Illegal command %02x (at %08x)",cmd_byte,g_pDataReader->GetPtr());

View File

@ -0,0 +1,380 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// http://developer.nvidia.com/object/General_FAQ.html#t6 !!!!!
#include "Common.h"
#include "VideoCommon.h"
#include "ChunkFile.h"
#include "Atomic.h"
#include "PixelEngine.h"
#include "CommandProcessor.h"
namespace PixelEngine
{
union UPEZConfReg
{
u16 Hex;
struct
{
unsigned ZCompEnable : 1; // Z Comparator Enable
unsigned Function : 3;
unsigned ZUpdEnable : 1;
unsigned : 11;
};
};
union UPEAlphaConfReg
{
u16 Hex;
struct
{
unsigned BMMath : 1; // GX_BM_BLEND || GX_BM_SUBSTRACT
unsigned BMLogic : 1; // GX_BM_LOGIC
unsigned Dither : 1;
unsigned ColorUpdEnable : 1;
unsigned AlphaUpdEnable : 1;
unsigned DstFactor : 3;
unsigned SrcFactor : 3;
unsigned Substract : 1; // Additive mode by default
unsigned BlendOperator : 4;
};
};
union UPEDstAlphaConfReg
{
u16 Hex;
struct
{
unsigned DstAlpha : 8;
unsigned Enable : 1;
unsigned : 7;
};
};
union UPEAlphaModeConfReg
{
u16 Hex;
struct
{
unsigned Threshold : 8;
unsigned CompareMode : 8;
};
};
// Not sure about this reg...
union UPEAlphaReadReg
{
u16 Hex;
struct
{
unsigned ReadMode : 3;
unsigned : 13;
};
};
// fifo Control Register
union UPECtrlReg
{
struct
{
unsigned PETokenEnable : 1;
unsigned PEFinishEnable : 1;
unsigned PEToken : 1; // write only
unsigned PEFinish : 1; // write only
unsigned : 12;
};
u16 Hex;
UPECtrlReg() {Hex = 0; }
UPECtrlReg(u16 _hex) {Hex = _hex; }
};
// STATE_TO_SAVE
static UPEZConfReg m_ZConf;
static UPEAlphaConfReg m_AlphaConf;
static UPEDstAlphaConfReg m_DstAlphaConf;
static UPEAlphaModeConfReg m_AlphaModeConf;
static UPEAlphaReadReg m_AlphaRead;
static UPECtrlReg m_Control;
//static u16 m_Token; // token value most recently encountered
static bool g_bSignalTokenInterrupt;
static bool g_bSignalFinishInterrupt;
static int et_SetTokenOnMainThread;
static int et_SetFinishOnMainThread;
u16 bbox[4];
bool bbox_active;
enum
{
INT_CAUSE_PE_TOKEN = 0x200, // GP Token
INT_CAUSE_PE_FINISH = 0x400, // GP Finished
};
void DoState(PointerWrap &p)
{
p.Do(m_ZConf);
p.Do(m_AlphaConf);
p.Do(m_DstAlphaConf);
p.Do(m_AlphaModeConf);
p.Do(m_AlphaRead);
p.Do(m_Control);
p.Do(CommandProcessor::fifo.PEToken);
p.Do(g_bSignalTokenInterrupt);
p.Do(g_bSignalFinishInterrupt);
p.Do(bbox);
p.Do(bbox_active);
}
void UpdateInterrupts();
void SetToken_OnMainThread(u64 userdata, int cyclesLate);
void SetFinish_OnMainThread(u64 userdata, int cyclesLate);
void Init()
{
m_Control.Hex = 0;
et_SetTokenOnMainThread = g_VideoInitialize.pRegisterEvent("SetToken", SetToken_OnMainThread);
et_SetFinishOnMainThread = g_VideoInitialize.pRegisterEvent("SetFinish", SetFinish_OnMainThread);
bbox[0] = 0x80;
bbox[1] = 0xA0;
bbox[2] = 0x80;
bbox[3] = 0xA0;
bbox_active = false;
}
void Read16(u16& _uReturnValue, const u32 _iAddress)
{
DEBUG_LOG(PIXELENGINE, "(r16) 0x%08x", _iAddress);
switch (_iAddress & 0xFFF)
{
// CPU Direct Access EFB Raster State Config
case PE_ZCONF:
_uReturnValue = m_ZConf.Hex;
INFO_LOG(PIXELENGINE, "(r16) ZCONF");
break;
case PE_ALPHACONF:
// Most games read this early. no idea why.
_uReturnValue = m_AlphaConf.Hex;
INFO_LOG(PIXELENGINE, "(r16) ALPHACONF");
break;
case PE_DSTALPHACONF:
_uReturnValue = m_DstAlphaConf.Hex;
INFO_LOG(PIXELENGINE, "(r16) DSTALPHACONF");
break;
case PE_ALPHAMODE:
_uReturnValue = m_AlphaModeConf.Hex;
INFO_LOG(PIXELENGINE, "(r16) ALPHAMODE");
break;
case PE_ALPHAREAD:
_uReturnValue = m_AlphaRead.Hex;
WARN_LOG(PIXELENGINE, "(r16) ALPHAREAD");
break;
case PE_CTRL_REGISTER:
_uReturnValue = m_Control.Hex;
INFO_LOG(PIXELENGINE, "(r16) CTRL_REGISTER : %04x", _uReturnValue);
break;
case PE_TOKEN_REG:
_uReturnValue = CommandProcessor::fifo.PEToken;
INFO_LOG(PIXELENGINE, "(r16) TOKEN_REG : %04x", _uReturnValue);
break;
// The return values for these BBOX registers need to be gotten from the bounding box of the object.
// See http://code.google.com/p/dolphin-emu/issues/detail?id=360#c74 for more details.
// 0x80, 0xa0, 0x80, 0xa0 makes Paper Mario happy.
case PE_BBOX_LEFT: _uReturnValue = bbox[0]; INFO_LOG(PIXELENGINE, "R: BBOX_LEFT = %i", bbox[0]); bbox_active = false; break;
case PE_BBOX_RIGHT: _uReturnValue = bbox[1]; INFO_LOG(PIXELENGINE, "R: BBOX_RIGHT = %i", bbox[1]); bbox_active = false; break;
case PE_BBOX_TOP: _uReturnValue = bbox[2]; INFO_LOG(PIXELENGINE, "R: BBOX_TOP = %i", bbox[2]); bbox_active = false; break;
case PE_BBOX_BOTTOM: _uReturnValue = bbox[3]; INFO_LOG(PIXELENGINE, "R: BBOX_BOTTOM = %i", bbox[3]); bbox_active = false; break;
case PE_PERF_0L:
case PE_PERF_0H:
case PE_PERF_1L:
case PE_PERF_1H:
case PE_PERF_2L:
case PE_PERF_2H:
case PE_PERF_3L:
case PE_PERF_3H:
case PE_PERF_4L:
case PE_PERF_4H:
case PE_PERF_5L:
case PE_PERF_5H:
INFO_LOG(PIXELENGINE, "(r16) perf counter @ %08x", _iAddress);
break;
default:
INFO_LOG(PIXELENGINE, "(r16) unknown @ %08x", _iAddress);
_uReturnValue = 1;
break;
}
}
void Write16(const u16 _iValue, const u32 _iAddress)
{
switch (_iAddress & 0xFFF)
{
// CPU Direct Access EFB Raster State Config
case PE_ZCONF:
m_ZConf.Hex = _iValue;
INFO_LOG(PIXELENGINE, "(w16) ZCONF: %02x", _iValue);
break;
case PE_ALPHACONF:
m_AlphaConf.Hex = _iValue;
INFO_LOG(PIXELENGINE, "(w16) ALPHACONF: %02x", _iValue);
break;
case PE_DSTALPHACONF:
m_DstAlphaConf.Hex = _iValue;
INFO_LOG(PIXELENGINE, "(w16) DSTALPHACONF: %02x", _iValue);
break;
case PE_ALPHAMODE:
m_AlphaModeConf.Hex = _iValue;
INFO_LOG(PIXELENGINE, "(w16) ALPHAMODE: %02x", _iValue);
break;
case PE_ALPHAREAD:
m_AlphaRead.Hex = _iValue;
INFO_LOG(PIXELENGINE, "(w16) ALPHAREAD: %02x", _iValue);
break;
case PE_CTRL_REGISTER:
{
UPECtrlReg tmpCtrl(_iValue);
if (tmpCtrl.PEToken) g_bSignalTokenInterrupt = false;
if (tmpCtrl.PEFinish) g_bSignalFinishInterrupt = false;
m_Control.PETokenEnable = tmpCtrl.PETokenEnable;
m_Control.PEFinishEnable = tmpCtrl.PEFinishEnable;
m_Control.PEToken = 0; // this flag is write only
m_Control.PEFinish = 0; // this flag is write only
DEBUG_LOG(PIXELENGINE, "(w16) CTRL_REGISTER: 0x%04x", _iValue);
UpdateInterrupts();
}
break;
case PE_TOKEN_REG:
//LOG(PIXELENGINE,"WEIRD: program wrote token: %i",_iValue);
PanicAlert("(w16) WTF? PowerPC program wrote token: %i", _iValue);
//only the gx pipeline is supposed to be able to write here
//g_token = _iValue;
break;
default:
WARN_LOG(PIXELENGINE, "(w16) unknown %04x @ %08x", _iValue, _iAddress);
break;
}
}
void Write32(const u32 _iValue, const u32 _iAddress)
{
WARN_LOG(PIXELENGINE, "(w32) 0x%08x @ 0x%08x IGNORING...",_iValue,_iAddress);
}
bool AllowIdleSkipping()
{
return !g_VideoInitialize.bUseDualCore|| (!m_Control.PETokenEnable && !m_Control.PEFinishEnable);
}
void UpdateInterrupts()
{
// check if there is a token-interrupt
if (g_bSignalTokenInterrupt & m_Control.PETokenEnable)
g_VideoInitialize.pSetInterrupt(INT_CAUSE_PE_TOKEN, true);
else
g_VideoInitialize.pSetInterrupt(INT_CAUSE_PE_TOKEN, false);
// check if there is a finish-interrupt
if (g_bSignalFinishInterrupt & m_Control.PEFinishEnable)
g_VideoInitialize.pSetInterrupt(INT_CAUSE_PE_FINISH, true);
else
g_VideoInitialize.pSetInterrupt(INT_CAUSE_PE_FINISH, false);
}
// TODO(mb2): Refactor SetTokenINT_OnMainThread(u64 userdata, int cyclesLate).
// Think about the right order between tokenVal and tokenINT... one day maybe.
// Cleanup++
// Called only if BPMEM_PE_TOKEN_INT_ID is ack by GP
void SetToken_OnMainThread(u64 userdata, int cyclesLate)
{
//if (userdata >> 16)
//{
g_bSignalTokenInterrupt = true;
//_dbg_assert_msg_(PIXELENGINE, (CommandProcessor::fifo.PEToken == (userdata&0xFFFF)), "WTF? BPMEM_PE_TOKEN_INT_ID's token != BPMEM_PE_TOKEN_ID's token" );
INFO_LOG(PIXELENGINE, "VIDEO Plugin raises INT_CAUSE_PE_TOKEN (btw, token: %04x)", CommandProcessor::fifo.PEToken);
UpdateInterrupts();
//}
//else
// LOGV(PIXELENGINE, 1, "VIDEO Plugin wrote token: %i", CommandProcessor::fifo.PEToken);
}
void SetFinish_OnMainThread(u64 userdata, int cyclesLate)
{
g_bSignalFinishInterrupt = 1;
UpdateInterrupts();
}
// SetToken
// THIS IS EXECUTED FROM VIDEO THREAD
void SetToken(const u16 _token, const int _bSetTokenAcknowledge)
{
// TODO?: set-token-value and set-token-INT could be merged since set-token-INT own the token value.
if (_bSetTokenAcknowledge) // set token INT
{
// This seems smelly...
CommandProcessor::IncrementGPWDToken(); // for DC watchdog hack since PEToken seems to be a frame-finish too
g_VideoInitialize.pScheduleEvent_Threadsafe(
0, et_SetTokenOnMainThread, _token | (_bSetTokenAcknowledge << 16));
}
else // set token value
{
// we do it directly from videoThread because of
// Super Monkey Ball
// XXX: No 16-bit atomic store available, so cheat and use 32-bit.
// That's what we've always done. We're counting on fifo.PEToken to be
// 4-byte padded.
Common::AtomicStore(*(volatile u32*)&CommandProcessor::fifo.PEToken, _token);
}
}
// SetFinish
// THIS IS EXECUTED FROM VIDEO THREAD (BPStructs.cpp) when a new frame has been drawn
void SetFinish()
{
CommandProcessor::IncrementGPWDToken(); // for DC watchdog hack
g_VideoInitialize.pScheduleEvent_Threadsafe(
0, et_SetFinishOnMainThread, 0);
INFO_LOG(PIXELENGINE, "VIDEO Set Finish");
}
} // end of namespace PixelEngine

View File

@ -0,0 +1,78 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _PIXELENGINE_H
#define _PIXELENGINE_H
#include "CommonTypes.h"
class PointerWrap;
// internal hardware addresses
enum
{
PE_ZCONF = 0x00, // Z Config
PE_ALPHACONF = 0x02, // Alpha Config
PE_DSTALPHACONF = 0x04, // Destination Alpha Config
PE_ALPHAMODE = 0x06, // Alpha Mode Config
PE_ALPHAREAD = 0x08, // Alpha Read
PE_CTRL_REGISTER = 0x0a, // Control
PE_TOKEN_REG = 0x0e, // Token
PE_BBOX_LEFT = 0x10, // Flip Left
PE_BBOX_RIGHT = 0x12, // Flip Right
PE_BBOX_TOP = 0x14, // Flip Top
PE_BBOX_BOTTOM = 0x16, // Flip Bottom
// These have not yet been RE:d. They are the perf counters.
PE_PERF_0L = 0x18,
PE_PERF_0H = 0x1a,
PE_PERF_1L = 0x1c,
PE_PERF_1H = 0x1e,
PE_PERF_2L = 0x20,
PE_PERF_2H = 0x22,
PE_PERF_3L = 0x24,
PE_PERF_3H = 0x26,
PE_PERF_4L = 0x28,
PE_PERF_4H = 0x2a,
PE_PERF_5L = 0x2c,
PE_PERF_5H = 0x2e,
};
namespace PixelEngine
{
void Init();
void DoState(PointerWrap &p);
// Read
void Read16(u16& _uReturnValue, const u32 _iAddress);
// Write
void Write16(const u16 _iValue, const u32 _iAddress);
void Write32(const u32 _iValue, const u32 _iAddress);
// gfx plugin support
void SetToken(const u16 _token, const int _bSetTokenAcknowledge);
void SetFinish(void);
bool AllowIdleSkipping();
// Bounding box functionality. Paper Mario (both) are a couple of the few games that use it.
extern u16 bbox[4];
extern bool bbox_active;
} // end of namespace PixelEngine
#endif

View File

@ -10,6 +10,8 @@ files = [
'XFMemory.cpp',
'XFStructs.cpp',
'BPStructs.cpp',
'CommandProcessor.cpp',
'PixelEngine.cpp',
'memcpy_amd.cpp',
'OpcodeDecoding.cpp',
'TextureDecoder.cpp',

View File

@ -325,6 +325,11 @@ inline u32 makecol(int r, int g, int b, int a)
return (a << 24)|(r << 16)|(g << 8)|b;
}
inline u32 makeRGBA(int r, int g, int b, int a)
{
return (a<<24)|(b<<16)|(g<<8)|r;
}
void decodeDXTBlock(u32 *dst, const DXTBlock *src, int pitch)
{
// S3TC Decoder (Note: GCN decodes differently from PC so we can't use native support)
@ -664,6 +669,338 @@ PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, in
return retval;
}
inline u32 decode565RGBA(u16 val)
{
int r,g,b,a;
r=Convert5To8((val>>11) & 0x1f);
g=Convert6To8((val>>5 ) & 0x3f);
b=Convert5To8((val ) & 0x1f);
a=0xFF;
return r | (g<<8) | (b << 16) | (a << 24);
}
inline u32 decodeIA8Swapped(u16 val)
{
int a = val & 0xFF;
int i = val >> 8;
return i | (i<<8) | (i<<16) | (a<<24);
}
inline u32 decode5A3RGBA(u16 val)
{
int r,g,b,a;
if ((val&0x8000))
{
r=Convert5To8((val>>10) & 0x1f);
g=Convert5To8((val>>5 ) & 0x1f);
b=Convert5To8((val ) & 0x1f);
a=0xFF;
}
else
{
a=Convert3To8((val>>12) & 0x7);
r=Convert4To8((val>>8 ) & 0xf);
g=Convert4To8((val>>4 ) & 0xf);
b=Convert4To8((val ) & 0xf);
}
return r | (g<<8) | (b << 16) | (a << 24);
}
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt)
{
/* General formula for computing texture offset
//
u16 sBlk = s / blockWidth;
u16 tBlk = t / blockHeight;
u16 widthBlks = (width / blockWidth) + 1;
u32 base = (tBlk * widthBlks + sBlk) * blockWidth * blockHeight;
u16 blkS = s & (blockWidth - 1);
u16 blkT = t & (blockHeight - 1);
u32 blkOff = blkT * blockWidth + blkS;
*/
switch (texformat)
{
case GX_TF_C4:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 3;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5;
u16 blkS = s & 7;
u16 blkT = t & 7;
u32 blkOff = (blkT << 3) + blkS;
int rs = (blkOff & 1)?0:4;
u32 offset = base + (blkOff >> 1);
u8 val = (*(src + offset) >> rs) & 0xF;
u16 *tlut = (u16*)(texMem + tlutaddr);
switch (tlutfmt)
{
case 0:
*((u32*)dst) = decodeIA8Swapped(tlut[val]);
break;
case 1:
*((u32*)dst) = decode565RGBA(Common::swap16(tlut[val]));
break;
case 2:
*((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val]));
break;
}
}
break;
case GX_TF_I4:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 3;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5;
u16 blkS = s & 7;
u16 blkT = t & 7;
u32 blkOff = (blkT << 3) + blkS;
int rs = (blkOff & 1)?0:4;
u32 offset = base + (blkOff >> 1);
u8 val = (*(src + offset) >> rs) & 0xF;
val = Convert4To8(val);
dst[0] = val;
dst[1] = val;
dst[2] = val;
dst[3] = val;
}
break;
case GX_TF_I8:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5;
u16 blkS = s & 7;
u16 blkT = t & 3;
u32 blkOff = (blkT << 3) + blkS;
u8 val = *(src + base + blkOff);
dst[0] = val;
dst[1] = val;
dst[2] = val;
dst[3] = val;
}
break;
case GX_TF_C8:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5;
u16 blkS = s & 7;
u16 blkT = t & 3;
u32 blkOff = (blkT << 3) + blkS;
u8 val = *(src + base + blkOff);
u16 *tlut = (u16*)(texMem + tlutaddr);
switch (tlutfmt)
{
case 0:
*((u32*)dst) = decodeIA8Swapped(tlut[val]);
break;
case 1:
*((u32*)dst) = decode565RGBA(Common::swap16(tlut[val]));
break;
case 2:
*((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val]));
break;
}
}
break;
case GX_TF_IA4:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5;
u16 blkS = s & 7;
u16 blkT = t & 3;
u32 blkOff = (blkT << 3) + blkS;
u8 val = *(src + base + blkOff);
const u8 a = Convert4To8(val>>4);
const u8 l = Convert4To8(val&0xF);
dst[0] = l;
dst[1] = l;
dst[2] = l;
dst[3] = a;
}
break;
case GX_TF_IA8:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 4;
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blkOff = (blkT << 2) + blkS;
u32 offset = (base + blkOff) << 1;
const u16* valAddr = (u16*)(src + offset);
*((u32*)dst) = decodeIA8Swapped(*valAddr);
}
break;
case GX_TF_C14X2:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 4;
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blkOff = (blkT << 2) + blkS;
u32 offset = (base + blkOff) << 1;
const u16* valAddr = (u16*)(src + offset);
u16 val = Common::swap16(*valAddr) & 0x3FFF;
u16 *tlut = (u16*)(texMem + tlutaddr);
switch (tlutfmt)
{
case 0:
*((u32*)dst) = decodeIA8Swapped(tlut[val]);
break;
case 1:
*((u32*)dst) = decode565RGBA(Common::swap16(tlut[val]));
break;
case 2:
*((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val]));
break;
}
}
break;
case GX_TF_RGB565:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 4;
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blkOff = (blkT << 2) + blkS;
u32 offset = (base + blkOff) << 1;
const u16* valAddr = (u16*)(src + offset);
*((u32*)dst) = decode565RGBA(Common::swap16(*valAddr));
}
break;
case GX_TF_RGB5A3:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 4;
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blkOff = (blkT << 2) + blkS;
u32 offset = (base + blkOff) << 1;
const u16* valAddr = (u16*)(src + offset);
*((u32*)dst) = decode5A3RGBA(Common::swap16(*valAddr));
}
break;
case GX_TF_RGBA8:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5; // shift by 5 is correct
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blkOff = (blkT << 2) + blkS;
u32 offset = (base + blkOff) << 1 ;
const u8* valAddr = src + offset;
dst[3] = valAddr[0];
dst[0] = valAddr[1];
dst[1] = valAddr[32];
dst[2] = valAddr[33];
}
break;
case GX_TF_CMPR:
{
u16 sDxt = s >> 2;
u16 tDxt = t >> 2;
u16 sBlk = sDxt >> 1;
u16 tBlk = tDxt >> 1;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 2;
u16 blkS = sDxt & 1;
u16 blkT = tDxt & 1;
u32 blkOff = (blkT << 1) + blkS;
u32 offset = (base + blkOff) << 3;
const DXTBlock* dxtBlock = (const DXTBlock*)(src + offset);
u16 c1 = Common::swap16(dxtBlock->color1);
u16 c2 = Common::swap16(dxtBlock->color2);
int blue1 = Convert5To8(c1 & 0x1F);
int blue2 = Convert5To8(c2 & 0x1F);
int green1 = Convert6To8((c1 >> 5) & 0x3F);
int green2 = Convert6To8((c2 >> 5) & 0x3F);
int red1 = Convert5To8((c1 >> 11) & 0x1F);
int red2 = Convert5To8((c2 >> 11) & 0x1F);
u16 ss = s & 3;
u16 tt = t & 3;
int colorSel = dxtBlock->lines[tt];
int rs = 6 - (ss << 1);
colorSel = (colorSel >> rs) & 3;
colorSel |= c1 > c2?0:4;
u32 color = 0;
switch (colorSel)
{
case 0:
case 4:
color = makeRGBA(red1, green1, blue1, 255);
break;
case 1:
case 5:
color = makeRGBA(red2, green2, blue2, 255);
break;
case 2:
color = makeRGBA(red1+(red2-red1)/3, green1+(green2-green1)/3, blue1+(blue2-blue1)/3, 255);
break;
case 3:
color = makeRGBA(red2+(red1-red2)/3, green2+(green1-green2)/3, blue2+(blue1-blue2)/3, 255);
break;
case 6:
color = makeRGBA((int)ceil((float)(red1+red2)/2), (int)ceil((float)(green1+green2)/2), (int)ceil((float)(blue1+blue2)/2), 255);
break;
case 7:
color = makeRGBA(red2, green2, blue2, 0);
break;
}
*((u32*)dst) = color;
}
break;
}
}
const char* texfmt[] = {
// pixel
"I4", "I8", "IA4", "IA8",

View File

@ -85,6 +85,8 @@ enum PC_TexFormat
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt);
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt);
u32 TexDecoder_GetSafeTextureHash(const u8 *src, int width, int height, int texformat, u32 seed=0);
u32 TexDecoder_GetTlutHash(const u8* src, int len);

View File

@ -25,6 +25,7 @@
#include "StringUtil.h"
#include "x64Emitter.h"
#include "ABI.h"
#include "PixelEngine.h"
#include "LookUpTables.h"
#include "Statistics.h"
@ -89,7 +90,7 @@ void LOADERDECL PosMtx_Write()
void LOADERDECL UpdateBoundingBox()
{
if (!*g_VideoInitialize.pBBoxActive)
if (!PixelEngine::bbox_active)
return;
// Truly evil hack, reading backwards from the write pointer. If we were writing to write-only
@ -125,10 +126,10 @@ void LOADERDECL UpdateBoundingBox()
o[0] = (o[0] + 1.0f) * 320.0f;
o[1] = (o[1] + 1.0f) * 240.0f;
if (o[0] < g_VideoInitialize.pBBox[0]) g_VideoInitialize.pBBox[0] = (u16)std::max(0.0f, o[0]);
if (o[0] > g_VideoInitialize.pBBox[1]) g_VideoInitialize.pBBox[1] = (u16)std::min(640.0f, o[0]);
if (o[1] < g_VideoInitialize.pBBox[2]) g_VideoInitialize.pBBox[2] = (u16)std::max(0.0f, o[1]);
if (o[1] > g_VideoInitialize.pBBox[3]) g_VideoInitialize.pBBox[3] = (u16)std::min(480.0f, o[1]);
if (o[0] < PixelEngine::bbox[0]) PixelEngine::bbox[0] = (u16)std::max(0.0f, o[0]);
if (o[0] > PixelEngine::bbox[1]) PixelEngine::bbox[1] = (u16)std::min(640.0f, o[0]);
if (o[1] < PixelEngine::bbox[2]) PixelEngine::bbox[2] = (u16)std::max(0.0f, o[1]);
if (o[1] > PixelEngine::bbox[3]) PixelEngine::bbox[3] = (u16)std::min(480.0f, o[1]);
/*
if (GetAsyncKeyState(VK_LSHIFT)) {
ERROR_LOG(VIDEO, "XForm: %f %f %f to %f %f", p[0], p[1], p[2], o[0], o[1]);

View File

@ -22,6 +22,8 @@
#include "XFMemory.h"
#include "TextureDecoder.h"
#include "Fifo.h"
#include "CommandProcessor.h"
#include "PixelEngine.h"
static void DoState(PointerWrap &p)
{
@ -43,6 +45,9 @@ static void DoState(PointerWrap &p)
// FIFO
Fifo_DoState(p);
CommandProcessor::DoState(p);
PixelEngine::DoState(p);
}
void VideoCommon_DoState(PointerWrap &p)