Finally Merge branch 'videosoftware-xfb'

This adds xfb support to the videosoftware backend, which increases it's
accuracy and more imporantly, enables the usage of many homebrew apps
which write directly to the xfb on the videosoftware backend.

Conflicts:
	Source/Core/VideoBackends/Software/SWRenderer.cpp
	Source/Core/VideoBackends/Software/SWmain.cpp
This commit is contained in:
Scott Mansell
2014-01-21 00:10:00 +13:00
34 changed files with 17020 additions and 5102 deletions

View File

@ -15,20 +15,38 @@
#include "HW/Memmap.h"
#include "Core.h"
static const float s_gammaLUT[] =
{
1.0f,
1.7f,
2.2f,
1.0f
};
namespace EfbCopy
{
void CopyToXfb()
void CopyToXfb(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma)
{
GLInterface->Update(); // just updates the render window position and the backbuffer size
if (!g_SWVideoConfig.bHwRasterizer)
{
// copy to open gl for rendering
EfbInterface::UpdateColorTexture();
SWRenderer::DrawTexture(EfbInterface::efbColorTexture, EFB_WIDTH, EFB_HEIGHT);
}
INFO_LOG(VIDEO, "xfbaddr: %x, fbwidth: %i, fbheight: %i, source: (%i, %i, %i, %i), Gamma %f",
xfbAddr, fbWidth, fbHeight, sourceRc.top, sourceRc.left, sourceRc.bottom, sourceRc.right, Gamma);
SWRenderer::SwapBuffer();
if(!g_SWVideoConfig.bBypassXFB) {
EfbInterface::yuv422_packed* xfb_in_ram = (EfbInterface::yuv422_packed *) Memory::GetPointer(xfbAddr);
EfbInterface::CopyToXFB(xfb_in_ram, fbWidth, fbHeight, sourceRc, Gamma);
} else {
u8 *colorTexture = SWRenderer::getColorTexture(); // Ask SWRenderer for the next color texture
EfbInterface::BypassXFB(colorTexture, fbWidth, fbHeight, sourceRc, Gamma);
SWRenderer::swapColorTexture(); // Tell SWRenderer we are now finished with it.
}
}
}
void CopyToRam()
@ -62,21 +80,45 @@ namespace EfbCopy
void CopyEfb()
{
if (bpmem.triggerEFBCopy.copy_to_xfb)
DebugUtil::OnFrameEnd();
EFBRectangle rc;
rc.left = (int)bpmem.copyTexSrcXY.x;
rc.top = (int)bpmem.copyTexSrcXY.y;
// flipper represents the widths internally as last pixel minus starting pixel, so
// these are zero indexed.
rc.right = rc.left + (int)bpmem.copyTexSrcWH.x + 1;
rc.bottom = rc.top + (int)bpmem.copyTexSrcWH.y + 1;
if (!g_bSkipCurrentFrame)
{
if (bpmem.triggerEFBCopy.copy_to_xfb)
{
CopyToXfb();
Core::Callback_VideoCopiedToXFB(true);
float yScale;
if (bpmem.triggerEFBCopy.scale_invert)
yScale = 256.0f / (float)bpmem.dispcopyyscale;
else
yScale = (float)bpmem.dispcopyyscale / 256.0f;
swstats.frameCount++;
float xfbLines = ((bpmem.copyTexSrcWH.y + 1.0f) * yScale);
if (yScale != 1.0)
WARN_LOG(VIDEO, "yScale of %f is currently unsupported", yScale);
if ((u32)xfbLines > MAX_XFB_HEIGHT)
{
INFO_LOG(VIDEO, "Tried to scale EFB to too many XFB lines (%f)", xfbLines);
xfbLines = MAX_XFB_HEIGHT;
}
CopyToXfb(bpmem.copyTexDest << 5,
bpmem.copyMipMapStrideChannels << 4,
(u32)xfbLines,
rc,
s_gammaLUT[bpmem.triggerEFBCopy.gamma]);
}
else
{
CopyToRam();
CopyToRam(); // FIXME: should use the rectangle we have already created above
}
if (bpmem.triggerEFBCopy.clear)
@ -87,13 +129,5 @@ namespace EfbCopy
ClearEfb();
}
}
else
{
if (bpmem.triggerEFBCopy.copy_to_xfb)
{
// no frame rendered but tell that a frame has finished for frame skip counter
Core::Callback_VideoCopiedToXFB(false);
}
}
}
}

View File

@ -8,16 +8,13 @@
#include "BPMemLoader.h"
#include "LookUpTables.h"
#include "SWPixelEngine.h"
#include "HW/Memmap.h"
u8 efb[EFB_WIDTH*EFB_HEIGHT*6];
namespace EfbInterface
{
u8 efbColorTexture[EFB_WIDTH*EFB_HEIGHT*4];
inline u32 GetColorOffset(u16 x, u16 y)
{
return (x + y * EFB_WIDTH) * 3;
@ -31,7 +28,6 @@ namespace EfbInterface
void DoState(PointerWrap &p)
{
p.DoArray(efb, EFB_WIDTH*EFB_HEIGHT*6);
p.DoArray(efbColorTexture, EFB_WIDTH*EFB_HEIGHT*4);
}
void SetPixelAlphaOnly(u32 offset, u8 a)
@ -469,6 +465,19 @@ namespace EfbInterface
GetPixelColor(offset, color);
}
// For internal used only, return a non-normalized value, which saves work later.
void GetColorYUV(u16 x, u16 y, yuv444 *out)
{
u8 color[4];
GetColor(x, y, color);
// GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
out->Y = 0.257f * color[RED_C] + 0.504f * color[GRN_C] + 0.098f * color[BLU_C];
out->U = -0.148f * color[RED_C] + -0.291f * color[GRN_C] + 0.439f * color[BLU_C];
out->V = 0.439f * color[RED_C] + -0.368f * color[GRN_C] + -0.071f * color[BLU_C];
}
u32 GetDepth(u16 x, u16 y)
{
u32 offset = GetDepthOffset(x, y);
@ -482,21 +491,85 @@ namespace EfbInterface
return &efb[GetColorOffset(x, y)];
}
void UpdateColorTexture()
{
void CopyToXFB(yuv422_packed* xfb_in_ram, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma) {
// FIXME: We should do Gamma correction
if (!xfb_in_ram)
{
WARN_LOG(VIDEO, "Tried to copy to invalid XFB address");
return;
}
int left = sourceRc.left;
int right = sourceRc.right;
// this assumes copies will always start on an even (YU) pixel and the
// copy always has an even width, which might not be true.
if (left & 1 || right & 1) {
WARN_LOG(VIDEO, "Trying to copy XFB to from unaligned EFB source");
// this will show up as wrongly encoded
}
// Scanline buffer, leave room for borders
yuv444 scanline[EFB_WIDTH+2];
// our internal yuv444 type is not normalized, so black is {0, 0, 0} instead of {16, 128, 128}
yuv444 black;
black.Y = 0;
black.U = 0;
black.V = 0;
scanline[0] = black; // black border at start
scanline[right+1] = black; // black border at end
for (u16 y = sourceRc.top; y < sourceRc.bottom; y++)
{
// Get a scanline of YUV pixels in 4:4:4 format
for (int i = 1, x = left; x < right; i++, x++)
{
GetColorYUV(x, y, &scanline[i]);
}
// And Downsample them to 4:2:2
for (int i = 1, x = left; x < right; i+=2, x+=2)
{
// YU pixel
xfb_in_ram[x].Y = scanline[i].Y + 16;
// we mix our color difrences in 10 bit space so it will round more accurately
// U[i] = 1/4 * U[i-1] + 1/2 * U[i] + 1/4 * U[i+1]
xfb_in_ram[x].UV = 128 + ((scanline[i-1].U + (scanline[i].U << 1) + scanline[i+1].U) >> 2);
// YV pixel
xfb_in_ram[x+1].Y = scanline[i+1].Y + 16;
// V[i] = 1/4 * V[i-1] + 1/2 * V[i] + 1/4 * V[i+1]
xfb_in_ram[x+1].UV = 128 + ((scanline[i].V + (scanline[i+1].V << 1) + scanline[i+2].V) >> 2);
}
xfb_in_ram += fbWidth;
}
}
// Like CopyToXFB, but we copy directly into the opengl colour texture without going via Gamecube main memory or doing a yuyv conversion
void BypassXFB(u8* texture, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma) {
if(fbWidth*fbHeight > 640*568) {
ERROR_LOG(VIDEO, "Framebuffer is too large: %ix%i", fbWidth, fbHeight);
return;
}
u32 color;
u8* colorPtr = (u8*)&color;
u32* texturePtr = (u32*)efbColorTexture;
u32* texturePtr = (u32*)texture;
u32 textureAddress = 0;
u32 efbOffset = 0;
for (u16 y = 0; y < EFB_HEIGHT; y++)
int left = sourceRc.left;
int right = sourceRc.right;
for (u16 y = sourceRc.top; y < sourceRc.bottom; y++)
{
for (u16 x = 0; x < EFB_WIDTH; x++)
for (u16 x = left; x < right; x++)
{
GetPixelColor(efbOffset, colorPtr);
efbOffset += 3;
texturePtr[textureAddress++] = Common::swap32(color); // ABGR->RGBA
GetColor(x, y, colorPtr);
texturePtr[textureAddress++] = Common::swap32(color);
}
}
}

View File

@ -11,6 +11,21 @@ namespace EfbInterface
{
const int DEPTH_BUFFER_START = EFB_WIDTH * EFB_HEIGHT * 3;
// xfb color format - packed so the compiler doesn't mess with alignment
#pragma pack(push,1)
typedef struct {
u8 Y;
u8 UV;
} yuv422_packed;
#pragma pack(pop)
// But this struct is only used internally, so we could optimise alignment
typedef struct {
u8 Y;
s8 U;
s8 V;
} yuv444;
enum { ALP_C, BLU_C, GRN_C, RED_C };
// color order is ABGR in order to emulate RGBA on little-endian hardware
@ -28,12 +43,14 @@ namespace EfbInterface
void SetDepth(u16 x, u16 y, u32 depth);
void GetColor(u16 x, u16 y, u8 *color);
void GetColorYUV(u16 x, u16 y, yuv444 *color);
u32 GetDepth(u16 x, u16 y);
u8* GetPixelPointer(u16 x, u16 y, bool depth);
void UpdateColorTexture();
extern u8 efbColorTexture[EFB_WIDTH*EFB_HEIGHT*4]; // RGBA format
void CopyToXFB(yuv422_packed* xfb_in_ram, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma);
void BypassXFB(u8* texture, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma);
void DoState(PointerWrap &p);
}

View File

@ -5,10 +5,12 @@
#include "Common.h"
#include "../OGL/GLUtil.h"
#include "Core.h"
#include "ImageWrite.h"
#include "RasterFont.h"
#include "SWRenderer.h"
#include "SWStatistics.h"
#include "SWCommandProcessor.h"
#include "OnScreenDisplay.h"
@ -18,6 +20,9 @@ static GLint attr_pos = -1, attr_tex = -1;
static GLint uni_tex = -1;
static GLuint program;
static u8 *s_xfbColorTexture[2];
static int s_currentColorTexture = 0;
static volatile bool s_bScreenshot;
static std::mutex s_criticalScreenshot;
static std::string s_sScreenshotName;
@ -34,6 +39,8 @@ void SWRenderer::Init()
void SWRenderer::Shutdown()
{
delete s_xfbColorTexture[0];
delete s_xfbColorTexture[1];
glDeleteProgram(program);
glDeleteTextures(1, &s_RenderTarget);
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL)
@ -77,6 +84,11 @@ void CreateShaders()
void SWRenderer::Prepare()
{
s_xfbColorTexture[0] = new u8[640*568*4];
s_xfbColorTexture[1] = new u8[640*568*4];
s_currentColorTexture = 0;
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment
glGenTextures(1, &s_RenderTarget);
@ -140,8 +152,67 @@ void SWRenderer::DrawDebugText()
SWRenderer::RenderText(debugtext_buffer, 20, 20, 0xFFFFFF00);
}
u8* SWRenderer::getColorTexture() {
return s_xfbColorTexture[!s_currentColorTexture];
}
void SWRenderer::swapColorTexture() {
s_currentColorTexture = !s_currentColorTexture;
}
void SWRenderer::UpdateColorTexture(EfbInterface::yuv422_packed *xfb, u32 fbWidth, u32 fbHeight)
{
if(fbWidth*fbHeight > 640*568) {
ERROR_LOG(VIDEO, "Framebuffer is too large: %ix%i", fbWidth, fbHeight);
return;
}
u32 offset = 0;
u8 *TexturePointer = getColorTexture();
for (u16 y = 0; y < fbHeight; y++)
{
for (u16 x = 0; x < fbWidth; x+=2)
{
// We do this one color sample (aka 2 RGB pixles) at a time
int Y1 = xfb[x].Y - 16;
int Y2 = xfb[x+1].Y - 16;
int U = int(xfb[x].UV) - 128;
int V = int(xfb[x+1].UV) - 128;
// We do the inverse BT.601 conversion for YCbCr to RGB
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y1 + 1.596f * V));
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y1 - 0.392f * U - 0.813f * V));
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y1 + 2.017f * U ));
TexturePointer[offset++] = 255;
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y2 + 1.596f * V));
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y2 - 0.392f * U - 0.813f * V));
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y2 + 2.017f * U ));
TexturePointer[offset++] = 255;
}
xfb += fbWidth;
}
swapColorTexture();
}
// Called on the GPU thread
void SWRenderer::Swap(u32 fbWidth, u32 fbHeight)
{
GLInterface->Update(); // just updates the render window position and the backbuffer size
if (!g_SWVideoConfig.bHwRasterizer)
SWRenderer::DrawTexture(s_xfbColorTexture[s_currentColorTexture], fbWidth, fbHeight);
swstats.frameCount++;
SWRenderer::SwapBuffer();
Core::Callback_VideoCopiedToXFB(true); // FIXME: should this function be called FrameRendered?
}
void SWRenderer::DrawTexture(u8 *texture, int width, int height)
{
// FIXME: This should add black bars when the game has set the VI to render less than the full xfb.
// Save screenshot
if (s_bScreenshot)
{
@ -151,9 +222,11 @@ void SWRenderer::DrawTexture(u8 *texture, int width, int height)
s_sScreenshotName.clear();
s_bScreenshot = false;
}
GLsizei glWidth = (GLsizei)GLInterface->GetBackBufferWidth();
GLsizei glHeight = (GLsizei)GLInterface->GetBackBufferHeight();
// Update GLViewPort
glViewport(0, 0, glWidth, glHeight);
glScissor(0, 0, glWidth, glHeight);

View File

@ -6,6 +6,7 @@
#define _RENDERER_H_
#include "CommonTypes.h"
#include "EfbInterface.h"
#include "Thread.h"
namespace SWRenderer
@ -18,8 +19,12 @@ namespace SWRenderer
void RenderText(const char* pstr, int left, int top, u32 color);
void DrawDebugText();
u8* getColorTexture();
void swapColorTexture();
void UpdateColorTexture(EfbInterface::yuv422_packed *xfb, u32 fbWidth, u32 fbHeight);
void DrawTexture(u8 *texture, int width, int height);
void Swap(u32 fbWidth, u32 fbHeight);
void SwapBuffer();
}

View File

@ -15,6 +15,7 @@ SWVideoConfig::SWVideoConfig()
renderToMainframe = false;
bHwRasterizer = false;
bBypassXFB = false;
bShowStats = false;
@ -41,6 +42,7 @@ void SWVideoConfig::Load(const char* ini_file)
iniFile.Get("Hardware", "RenderToMainframe", &renderToMainframe, false);
iniFile.Get("Rendering", "HwRasterizer", &bHwRasterizer, false);
iniFile.Get("Rendering", "BypassXFB", &bBypassXFB, false);
iniFile.Get("Rendering", "ZComploc", &bZComploc, true);
iniFile.Get("Rendering", "ZFreeze", &bZFreeze, true);
@ -65,6 +67,7 @@ void SWVideoConfig::Save(const char* ini_file)
iniFile.Set("Hardware", "RenderToMainframe", renderToMainframe);
iniFile.Set("Rendering", "HwRasterizer", bHwRasterizer);
iniFile.Set("Rendering", "BypassXFB", bBypassXFB);
iniFile.Set("Rendering", "ZComploc", bZComploc);
iniFile.Set("Rendering", "ZFreeze", bZFreeze);

View File

@ -22,6 +22,7 @@ struct SWVideoConfig : NonCopyable
bool renderToMainframe;
bool bHwRasterizer;
bool bBypassXFB;
// Emulation features
bool bZComploc;

View File

@ -10,6 +10,7 @@
#endif // HAVE_WX
#include "../OGL/GLExtensions/GLExtensions.h"
#include "Atomic.h"
#include "SWCommandProcessor.h"
#include "OpcodeDecoder.h"
#include "SWVideoConfig.h"
@ -29,10 +30,22 @@
#include "OpcodeDecoder.h"
#include "SWVertexLoader.h"
#include "SWStatistics.h"
#include "HW/VideoInterface.h"
#include "HW/Memmap.h"
#include "ConfigManager.h"
#include "OnScreenDisplay.h"
#define VSYNC_ENABLED 0
static volatile u32 s_swapRequested = false;
static volatile struct
{
u32 xfbAddr;
u32 fbWidth;
u32 fbHeight;
} s_beginFieldArgs;
namespace SW
{
@ -188,11 +201,41 @@ void VideoSoftware::Video_Prepare()
// Run from the CPU thread (from VideoInterface.cpp)
void VideoSoftware::Video_BeginField(u32 xfbAddr, u32 fbWidth, u32 fbHeight)
{
s_beginFieldArgs.xfbAddr = xfbAddr;
s_beginFieldArgs.fbWidth = fbWidth;
s_beginFieldArgs.fbHeight = fbHeight;
}
// Run from the CPU thread (from VideoInterface.cpp)
void VideoSoftware::Video_EndField()
{
// Techincally the XFB is continually rendered out scanline by scanline between
// BeginField and EndFeild, We could possibly get away with copying out the whole thing
// at BeginField for less lag, but for the safest emulation we run it here.
if (g_bSkipCurrentFrame || s_beginFieldArgs.xfbAddr == 0 ) {
swstats.frameCount++;
swstats.ResetFrame();
Core::Callback_VideoCopiedToXFB(false);
return;
}
if (!g_SWVideoConfig.bHwRasterizer) {
if(!g_SWVideoConfig.bBypassXFB) {
EfbInterface::yuv422_packed *xfb = (EfbInterface::yuv422_packed *) Memory::GetPointer(s_beginFieldArgs.xfbAddr);
SWRenderer::UpdateColorTexture(xfb, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight);
}
}
// Idealy we would just move all the opengl contex stuff to the CPU thread, but this gets
// messy when the Hardware Rasterizer is enabled.
// And Neobrain loves his Hardware Rasterizer
// If we are runing dual core, Signal the GPU thread about the new colour texture.
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
Common::AtomicStoreRelease(s_swapRequested, true);
else
SWRenderer::Swap(s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight);
}
u32 VideoSoftware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32 InputData)
@ -239,6 +282,16 @@ bool VideoSoftware::Video_Screenshot(const char *_szFilename)
return true;
}
// Run from the graphics thread
static void VideoFifo_CheckSwapRequest()
{
if (Common::AtomicLoadAcquire(s_swapRequested))
{
SWRenderer::Swap(s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight);
Common::AtomicStoreRelease(s_swapRequested, false);
}
}
// -------------------------------
// Enter and exit the video loop
// -------------------------------
@ -249,6 +302,7 @@ void VideoSoftware::Video_EnterLoop()
while (fifoStateRun)
{
VideoFifo_CheckSwapRequest();
g_video_backend->PeekMessages();
if (!SWCommandProcessor::RunBuffer())
@ -259,6 +313,7 @@ void VideoSoftware::Video_EnterLoop()
while (!emuRunningState && fifoStateRun)
{
g_video_backend->PeekMessages();
VideoFifo_CheckSwapRequest();
m_csSWVidOccupied.unlock();
Common::SleepCurrentThread(1);
m_csSWVidOccupied.lock();

View File

@ -68,6 +68,9 @@ VideoConfigDialog::VideoConfigDialog(wxWindow* parent, const std::string& title,
// rasterizer
szr_rendering->Add(new SettingCheckBox(page_general, wxT("Hardware rasterization"), wxT(""), vconfig.bHwRasterizer));
// xfb
szr_rendering->Add(new SettingCheckBox(page_general, wxT("Bypass XFB"), wxT(""), vconfig.bBypassXFB));
}
// - info