Software Backend: Fix xfb output and add vertical scaling support

This commit is contained in:
iwubcode 2017-08-23 21:46:23 -05:00
parent e6d85b0915
commit 6e686f6ea1
14 changed files with 179 additions and 117 deletions

View File

@ -0,0 +1,33 @@
#pragma once
#include "Common/MathUtil.h"
#include <cmath>
namespace SW
{
// Modified from
// http://tech-algorithm.com/articles/nearest-neighbor-image-scaling/
template <typename T>
void copy_region(const T* const source, const MathUtil::Rectangle<int>& srcrect, T* destination,
const MathUtil::Rectangle<int>& dstrect)
{
double x_ratio = srcrect.GetWidth() / static_cast<double>(dstrect.GetWidth());
double y_ratio = srcrect.GetHeight() / static_cast<double>(dstrect.GetHeight());
for (int i = 0; i < dstrect.GetHeight(); i++)
{
for (int j = 0; j < dstrect.GetWidth(); j++)
{
int destination_x = j + dstrect.left;
int destination_y = i + dstrect.top;
int destination_offset = (destination_y * dstrect.GetWidth()) + destination_x;
double src_x = std::round(destination_x*x_ratio) + srcrect.left;
double src_y = std::round(destination_y*y_ratio) + srcrect.top;
int src_offset = static_cast<int>((src_y*srcrect.GetWidth()) + src_x);
destination[destination_offset] = source[src_offset];
}
}
}
}

View File

@ -12,29 +12,8 @@
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/Fifo.h"
static const float s_gammaLUT[] = {1.0f, 1.7f, 2.2f, 1.0f};
namespace EfbCopy
{
static void CopyToXfb(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc,
float Gamma)
{
DEBUG_LOG(VIDEO, "xfbaddr: %x, fbwidth: %i, fbheight: %i, source: (%i, %i, %i, %i), Gamma %f",
xfbAddr, fbWidth, fbHeight, sourceRc.top, sourceRc.left, sourceRc.bottom,
sourceRc.right, Gamma);
EfbInterface::yuv422_packed* xfb_in_ram =
(EfbInterface::yuv422_packed*)Memory::GetPointer(xfbAddr);
EfbInterface::CopyToXFB(xfb_in_ram, fbWidth, fbHeight, sourceRc, Gamma);
}
static void CopyToRam()
{
u8* dest_ptr = Memory::GetPointer(bpmem.copyTexDest << 5);
TextureEncoder::Encode(dest_ptr);
}
void ClearEfb()
{
@ -56,42 +35,4 @@ void ClearEfb()
}
}
void CopyEfb()
{
EFBRectangle rc;
rc.left = (int)bpmem.copyTexSrcXY.x;
rc.top = (int)bpmem.copyTexSrcXY.y;
// flipper represents the widths internally as last pixel minus starting pixel, so
// these are zero indexed.
rc.right = rc.left + (int)bpmem.copyTexSrcWH.x + 1;
rc.bottom = rc.top + (int)bpmem.copyTexSrcWH.y + 1;
if (bpmem.triggerEFBCopy.copy_to_xfb)
{
float yScale;
if (bpmem.triggerEFBCopy.scale_invert)
yScale = 256.0f / (float)bpmem.dispcopyyscale;
else
yScale = (float)bpmem.dispcopyyscale / 256.0f;
float xfbLines = ((bpmem.copyTexSrcWH.y + 1.0f) * yScale);
if (yScale != 1.0)
WARN_LOG(VIDEO, "yScale of %f is currently unsupported", yScale);
if ((u32)xfbLines > MAX_XFB_HEIGHT)
{
INFO_LOG(VIDEO, "Tried to scale EFB to too many XFB lines (%f)", xfbLines);
xfbLines = MAX_XFB_HEIGHT;
}
CopyToXfb(bpmem.copyTexDest << 5, bpmem.copyMipMapStrideChannels << 4, (u32)xfbLines, rc,
s_gammaLUT[bpmem.triggerEFBCopy.gamma]);
}
else
{
CopyToRam(); // FIXME: should use the rectangle we have already created above
}
}
}

View File

@ -6,8 +6,5 @@
namespace EfbCopy
{
// Copy the EFB to RAM as a texture format or XFB
void CopyEfb();
void ClearEfb();
}

View File

@ -7,11 +7,13 @@
#include <algorithm>
#include <cstddef>
#include <cstring>
#include <vector>
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/Swap.h"
#include "VideoBackends/Software/CopyRegion.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/LookUpTables.h"
#include "VideoCommon/PerfQueryBase.h"
@ -495,19 +497,16 @@ u8* GetPixelPointer(u16 x, u16 y, bool depth)
return &efb[GetColorOffset(x, y)];
}
void CopyToXFB(yuv422_packed* xfb_in_ram, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc,
float Gamma)
void EncodeXFB(yuv422_packed* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale)
{
// FIXME: We should do Gamma correction
if (!xfb_in_ram)
{
WARN_LOG(VIDEO, "Tried to copy to invalid XFB address");
return;
}
int left = sourceRc.left;
int right = sourceRc.right;
int left = source_rect.left;
int right = source_rect.right;
// this assumes copies will always start on an even (YU) pixel and the
// copy always has an even width, which might not be true.
@ -520,7 +519,11 @@ void CopyToXFB(yuv422_packed* xfb_in_ram, u32 fbWidth, u32 fbHeight, const EFBRe
// Scanline buffer, leave room for borders
yuv444 scanline[EFB_WIDTH + 2];
for (u16 y = sourceRc.top; y < sourceRc.bottom; y++)
static std::vector<yuv422_packed> source;
source.resize(EFB_WIDTH * EFB_HEIGHT);
yuv422_packed* src_ptr = &source[0];
for (float y = source_rect.top; y < source_rect.bottom; y++)
{
// Get a scanline of YUV pixels in 4:4:4 format
@ -537,20 +540,23 @@ void CopyToXFB(yuv422_packed* xfb_in_ram, u32 fbWidth, u32 fbHeight, const EFBRe
for (int i = 1, x = left; x < right; i += 2, x += 2)
{
// YU pixel
xfb_in_ram[x].Y = scanline[i].Y + 16;
src_ptr[x].Y = scanline[i].Y + 16;
// we mix our color differences in 10 bit space so it will round more accurately
// U[i] = 1/4 * U[i-1] + 1/2 * U[i] + 1/4 * U[i+1]
xfb_in_ram[x].UV =
src_ptr[x].UV =
128 + ((scanline[i - 1].U + (scanline[i].U << 1) + scanline[i + 1].U) >> 2);
// YV pixel
xfb_in_ram[x + 1].Y = scanline[i + 1].Y + 16;
src_ptr[x + 1].Y = scanline[i + 1].Y + 16;
// V[i] = 1/4 * V[i-1] + 1/2 * V[i] + 1/4 * V[i+1]
xfb_in_ram[x + 1].UV =
src_ptr[x + 1].UV =
128 + ((scanline[i].V + (scanline[i + 1].V << 1) + scanline[i + 2].V) >> 2);
}
xfb_in_ram += fbWidth;
src_ptr += memory_stride;
}
// Apply y scaling and copy to the xfb memory location
SW::copy_region(source.data(), source_rect, xfb_in_ram, EFBRectangle{ source_rect.left, source_rect.top, source_rect.right, static_cast<int>(static_cast<float>(source_rect.bottom) * y_scale) });
}
bool ZCompare(u16 x, u16 y, u32 z)

View File

@ -57,8 +57,7 @@ u32 GetDepth(u16 x, u16 y);
u8* GetPixelPointer(u16 x, u16 y, bool depth);
void CopyToXFB(yuv422_packed* xfb_in_ram, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc,
float Gamma);
void EncodeXFB(yuv422_packed* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale);
extern u32 perf_values[PQ_NUM_MEMBERS];
inline void IncPerfCounterQuadCount(PerfQueryType type)

View File

@ -9,7 +9,7 @@
#include "Common/Logging/Log.h"
#include "VideoBackends/Software/SWOGLWindow.h"
#include "VideoCommon/AbstractTexture.h"
#include "VideoBackends/Software/SWTexture.h"
std::unique_ptr<SWOGLWindow> SWOGLWindow::s_instance;
@ -54,9 +54,9 @@ void SWOGLWindow::Prepare()
std::string frag_shader = "in vec2 TexCoord;\n"
"out vec4 ColorOut;\n"
"uniform sampler2DArray samp;\n"
"uniform sampler2D samp;\n"
"void main() {\n"
" ColorOut = texture(samp, vec3(TexCoord, 0.0));\n"
" ColorOut = texture(samp, TexCoord);\n"
"}\n";
std::string vertex_shader = "out vec2 TexCoord;\n"
@ -76,8 +76,11 @@ void SWOGLWindow::Prepare()
glUseProgram(m_image_program);
glUniform1i(glGetUniformLocation(m_image_program, "samp"), 0);
glGenTextures(1, &m_image_texture);
glBindTexture(GL_TEXTURE_2D, m_image_texture);
glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glGenVertexArrays(1, &m_image_vao);
}
@ -89,6 +92,7 @@ void SWOGLWindow::PrintText(const std::string& text, int x, int y, u32 color)
void SWOGLWindow::ShowImage(AbstractTexture* image, float aspect)
{
SW::SWTexture * sw_image = static_cast<SW::SWTexture*>(image);
GLInterface->Update(); // just updates the render window position and the backbuffer size
GLsizei glWidth = (GLsizei)GLInterface->GetBackBufferWidth();
@ -96,10 +100,15 @@ void SWOGLWindow::ShowImage(AbstractTexture* image, float aspect)
glViewport(0, 0, glWidth, glHeight);
image->Bind(0);
glActiveTexture(GL_TEXTURE9);
glBindTexture(GL_TEXTURE_2D, m_image_texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment
glPixelStorei(GL_UNPACK_ROW_LENGTH, sw_image->GetConfig().width);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, static_cast<GLsizei>(sw_image->GetConfig().width),
static_cast<GLsizei>(sw_image->GetConfig().height), 0, GL_RGBA, GL_UNSIGNED_BYTE,
sw_image->GetData());
glUseProgram(m_image_program);

View File

@ -42,5 +42,5 @@ private:
bool m_init{false};
u32 m_image_program, m_image_vao;
u32 m_image_program, m_image_texture, m_image_vao;
};

View File

@ -45,8 +45,6 @@ void SWRenderer::RenderText(const std::string& pstr, int left, int top, u32 colo
// Called on the GPU thread
void SWRenderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma)
{
SWOGLWindow::s_instance->ShowImage(texture, 1.0);
OSD::DoCallbacks(OSD::CallbackType::OnFrame);
DrawDebugText();

View File

@ -4,8 +4,13 @@
#include "VideoBackends/Software/SWTexture.h"
#include <cstring>
#include "VideoBackends/Software/CopyRegion.h"
namespace SW
{
SWTexture::SWTexture(const TextureConfig& tex_config) : AbstractTexture(tex_config)
{
}
@ -18,11 +23,38 @@ void SWTexture::CopyRectangleFromTexture(const AbstractTexture* source,
const MathUtil::Rectangle<int>& srcrect,
const MathUtil::Rectangle<int>& dstrect)
{
const SWTexture * software_source_texture = static_cast<const SWTexture*>(source);
if (srcrect.GetWidth() == dstrect.GetWidth() && srcrect.GetHeight() == dstrect.GetHeight())
{
m_data.assign(software_source_texture->GetData(), software_source_texture->GetData() + m_data.size());
}
else
{
copy_region(software_source_texture->GetData(), srcrect, GetData(), dstrect);
}
}
void SWTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer,
size_t buffer_size)
{
m_data.assign(buffer, buffer + buffer_size);
}
const u8* SWTexture::GetData() const
{
return m_data.data();
}
u8* SWTexture::GetData()
{
return m_data.data();
}
std::optional<AbstractTexture::RawTextureInfo> SWTexture::MapFullImpl()
{
return AbstractTexture::RawTextureInfo{ GetData(),
m_config.width * 4, m_config.width, m_config.height };
}
} // namespace SW

View File

@ -4,6 +4,8 @@
#pragma once
#include <vector>
#include "Common/CommonTypes.h"
#include "VideoCommon/AbstractTexture.h"
@ -23,6 +25,15 @@ public:
const MathUtil::Rectangle<int>& dstrect) override;
void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer,
size_t buffer_size) override;
const u8* GetData() const;
u8* GetData();
private:
std::optional<RawTextureInfo> MapFullImpl() override;
std::vector<u8> m_data;
};
} // namespace SW

View File

@ -54,6 +54,7 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="Clipper.h" />
<ClInclude Include="CopyRegion.h" />
<ClInclude Include="DebugUtil.h" />
<ClInclude Include="EfbCopy.h" />
<ClInclude Include="EfbInterface.h" />

View File

@ -1,9 +1,8 @@
#pragma once
#include <memory>
#include "VideoBackends/Software/EfbCopy.h"
#include "VideoBackends/Software/SWTexture.h"
#include "VideoBackends/Software/TextureEncoder.h"
#include "VideoCommon/TextureCacheBase.h"
namespace SW
@ -22,7 +21,9 @@ public:
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) override
{
EfbCopy::CopyEfb();
TextureEncoder::Encode(dst, params, native_width, bytes_per_row,
num_blocks_y, memory_stride, src_rect,
scale_by_half);
}
private:
@ -34,7 +35,7 @@ private:
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, unsigned int cbuf_id, const float* colmat) override
{
EfbCopy::CopyEfb();
// TODO: If we ever want to "fake" vram textures, we would need to implement this
}
};

View File

@ -14,6 +14,7 @@
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/LookUpTables.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureDecoder.h"
namespace TextureEncoder
@ -1416,37 +1417,65 @@ static void EncodeZ24halfscale(u8* dst, const u8* src, EFBCopyFormat format)
}
}
void Encode(u8* dest_ptr)
namespace
{
void EncodeEfbCopy(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half)
{
auto pixelformat = bpmem.zcontrol.pixel_format;
bool bFromZBuffer = pixelformat == PEControl::Z24;
bool bIsIntensityFmt = bpmem.triggerEFBCopy.intensity_fmt > 0;
EFBCopyFormat copyfmt = bpmem.triggerEFBCopy.tp_realFormat();
const u8* src =
EfbInterface::GetPixelPointer(bpmem.copyTexSrcXY.x, bpmem.copyTexSrcXY.y, bFromZBuffer);
EfbInterface::GetPixelPointer(src_rect.left, src_rect.top, params.depth);
if (bpmem.triggerEFBCopy.half_scale)
if (scale_by_half)
{
if (pixelformat == PEControl::RGBA6_Z24)
EncodeRGBA6halfscale(dest_ptr, src, copyfmt, bIsIntensityFmt);
else if (pixelformat == PEControl::RGB8_Z24)
EncodeRGB8halfscale(dest_ptr, src, copyfmt, bIsIntensityFmt);
else if (pixelformat == PEControl::RGB565_Z16) // not supported
EncodeRGB8halfscale(dest_ptr, src, copyfmt, bIsIntensityFmt);
else if (pixelformat == PEControl::Z24)
EncodeZ24halfscale(dest_ptr, src, copyfmt);
switch (params.efb_format)
{
case PEControl::RGBA6_Z24:
EncodeRGBA6halfscale(dst, src, params.copy_format, params.yuv);
break;
case PEControl::RGB8_Z24:
EncodeRGB8halfscale(dst, src, params.copy_format, params.yuv);
break;
case PEControl::RGB565_Z16:
EncodeRGB8halfscale(dst, src, params.copy_format, params.yuv);
break;
case PEControl::Z24:
EncodeZ24halfscale(dst, src, params.copy_format);
break;
}
}
else
{
if (pixelformat == PEControl::RGBA6_Z24)
EncodeRGBA6(dest_ptr, src, copyfmt, bIsIntensityFmt);
else if (pixelformat == PEControl::RGB8_Z24)
EncodeRGB8(dest_ptr, src, copyfmt, bIsIntensityFmt);
else if (pixelformat == PEControl::RGB565_Z16) // not supported
EncodeRGB8(dest_ptr, src, copyfmt, bIsIntensityFmt);
else if (pixelformat == PEControl::Z24)
EncodeZ24(dest_ptr, src, copyfmt);
switch (params.efb_format)
{
case PEControl::RGBA6_Z24:
EncodeRGBA6(dst, src, params.copy_format, params.yuv);
break;
case PEControl::RGB8_Z24:
EncodeRGB8(dst, src, params.copy_format, params.yuv);
break;
case PEControl::RGB565_Z16:
EncodeRGB8(dst, src, params.copy_format, params.yuv);
break;
case PEControl::Z24:
EncodeZ24(dst, src, params.copy_format);
break;
}
}
}
}
void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half)
{
if (params.copy_format == EFBCopyFormat::XFB)
{
EfbInterface::EncodeXFB(reinterpret_cast<EfbInterface::yuv422_packed*>(dst), native_width, src_rect, params.y_scale);
}
else
{
EncodeEfbCopy(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect, scale_by_half);
}
}
}

View File

@ -5,8 +5,13 @@
#pragma once
#include "Common/CommonTypes.h"
#include "VideoCommon/VideoCommon.h"
struct EFBCopyParams;
namespace TextureEncoder
{
void Encode(u8* dest_ptr);
void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half);
}