diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index 9117e4d0f4..b93fcf980a 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -591,9 +591,9 @@ void FifoPlayer::ClearEfb() UPE_Copy copy = bpmem.triggerEFBCopy; copy.clamp_top = false; copy.clamp_bottom = false; - copy.yuv = false; + copy.unknown_bit = false; copy.target_pixel_format = static_cast(EFBCopyFormat::RGBA8) << 1; - copy.gamma = 0; + copy.gamma = GammaCorrection::Gamma1_0; copy.half_scale = false; copy.scale_invert = false; copy.clear = true; diff --git a/Source/Core/VideoBackends/Null/TextureCache.h b/Source/Core/VideoBackends/Null/TextureCache.h index 5b2b73c365..2b95586f44 100644 --- a/Source/Core/VideoBackends/Null/TextureCache.h +++ b/Source/Core/VideoBackends/Null/TextureCache.h @@ -14,7 +14,7 @@ protected: u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) override + const std::array& filter_coefficients) override { } @@ -22,7 +22,7 @@ protected: const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) override + const std::array& filter_coefficients) override { } }; diff --git a/Source/Core/VideoBackends/Software/EfbInterface.cpp b/Source/Core/VideoBackends/Software/EfbInterface.cpp index 1e5498e28a..399e0bb57f 100644 --- a/Source/Core/VideoBackends/Software/EfbInterface.cpp +++ b/Source/Core/VideoBackends/Software/EfbInterface.cpp @@ -535,9 +535,14 @@ static yuv444 ConvertColorToYUV(u32 color) // GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see // http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion - return {static_cast(0.257f * red + 0.504f * green + 0.098f * blue), - static_cast(-0.148f * red + -0.291f * green + 0.439f * blue), - static_cast(0.439f * red + -0.368f * green + -0.071f * blue)}; + // These numbers were determined by hardware testing + const u16 y = +66 * red + 129 * green + +25 * blue; + const s16 u = -38 * red + -74 * green + 112 * blue; + const s16 v = 112 * red + -94 * green + -18 * blue; + const u8 y_round = static_cast((y >> 8) + ((y >> 7) & 1)); + const s8 u_round = static_cast((u >> 8) + ((u >> 7) & 1)); + const s8 v_round = static_cast((v >> 8) + ((v >> 7) & 1)); + return {y_round, u_round, v_round}; } u32 GetDepth(u16 x, u16 y) diff --git a/Source/Core/VideoBackends/Software/TextureCache.h b/Source/Core/VideoBackends/Software/TextureCache.h index 9ffa8fa4f4..a7d241197f 100644 --- a/Source/Core/VideoBackends/Software/TextureCache.h +++ b/Source/Core/VideoBackends/Software/TextureCache.h @@ -14,7 +14,7 @@ protected: u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) override + const std::array& filter_coefficients) override { TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect, scale_by_half, y_scale, gamma); @@ -23,7 +23,7 @@ protected: const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) override + const std::array& filter_coefficients) override { // TODO: If we ever want to "fake" vram textures, we would need to implement this } diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index 28155a2cd6..8d57b76a38 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -2035,17 +2035,30 @@ struct fmt::formatter : EnumFormatter constexpr formatter() : EnumFormatter(names) {} }; +enum class GammaCorrection : u32 +{ + Gamma1_0 = 0, + Gamma1_7 = 1, + Gamma2_2 = 2, + // Hardware testing indicates this behaves the same as Gamma2_2 + Invalid2_2 = 3, +}; +template <> +struct fmt::formatter : EnumFormatter +{ + constexpr formatter() : EnumFormatter({"1.0", "1.7", "2.2", "Invalid 2.2"}) {} +}; + union UPE_Copy { u32 Hex; - BitField<0, 1, bool, u32> clamp_top; // if set clamp top - BitField<1, 1, bool, u32> clamp_bottom; // if set clamp bottom - BitField<2, 1, bool, u32> yuv; // if set, color conversion from RGB to YUV + BitField<0, 1, bool, u32> clamp_top; // if set clamp top + BitField<1, 1, bool, u32> clamp_bottom; // if set clamp bottom + BitField<2, 1, u32> unknown_bit; BitField<3, 4, u32> target_pixel_format; // realformat is (fmt/2)+((fmt&1)*8).... for some reason // the msb is the lsb (pattern: cycling right shift) - // gamma correction.. 0 = 1.0 ; 1 = 1.7 ; 2 = 2.2 ; 3 is reserved - BitField<7, 2, u32> gamma; + BitField<7, 2, GammaCorrection> gamma; // "mipmap" filter... false = no filter (scale 1:1) ; true = box filter (scale 2:1) BitField<9, 1, bool, u32> half_scale; BitField<10, 1, bool, u32> scale_invert; // if set vertical scaling is on @@ -2084,23 +2097,10 @@ struct fmt::formatter else clamp = "None"; } - std::string_view gamma = "Invalid"; - switch (copy.gamma) - { - case 0: - gamma = "1.0"; - break; - case 1: - gamma = "1.7"; - break; - case 2: - gamma = "2.2"; - break; - } return fmt::format_to(ctx.out(), "Clamping: {}\n" - "Converting from RGB to YUV: {}\n" + "Unknown bit: {}\n" "Target pixel format: {}\n" "Gamma correction: {}\n" "Half scale: {}\n" @@ -2110,7 +2110,7 @@ struct fmt::formatter "Copy to XFB: {}\n" "Intensity format: {}\n" "Automatic color conversion: {}", - clamp, no_yes[copy.yuv], copy.tp_realFormat(), gamma, + clamp, copy.unknown_bit, copy.tp_realFormat(), copy.gamma, no_yes[copy.half_scale], no_yes[copy.scale_invert], no_yes[copy.clear], copy.frame_to_field, no_yes[copy.copy_to_xfb], no_yes[copy.intensity_fmt], no_yes[copy.auto_conv]); @@ -2123,10 +2123,12 @@ union CopyFilterCoefficients u64 Hex; + BitField<0, 32, u32, u64> Low; BitField<0, 6, u64> w0; BitField<6, 6, u64> w1; BitField<12, 6, u64> w2; BitField<18, 6, u64> w3; + BitField<32, 32, u32, u64> High; BitField<32, 6, u64> w4; BitField<38, 6, u64> w5; BitField<44, 6, u64> w6; diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index c6746d444a..010dbf6abe 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -11,6 +11,7 @@ #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/Logging/Log.h" #include "Core/ConfigManager.h" @@ -42,7 +43,8 @@ using namespace BPFunctions; -static const float s_gammaLUT[] = {1.0f, 1.7f, 2.2f, 1.0f}; +static constexpr Common::EnumMap s_gammaLUT = {1.0f, 1.7f, 2.2f, + 2.2f}; void BPInit() { @@ -276,9 +278,9 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future) bool is_depth_copy = bpmem.zcontrol.pixel_format == PixelFormat::Z24; g_texture_cache->CopyRenderTargetToTexture( destAddr, PE_copy.tp_realFormat(), copy_width, copy_height, destStride, is_depth_copy, - srcRect, PE_copy.intensity_fmt, PE_copy.half_scale, 1.0f, 1.0f, - bpmem.triggerEFBCopy.clamp_top, bpmem.triggerEFBCopy.clamp_bottom, - bpmem.copyfilter.GetCoefficients()); + srcRect, PE_copy.intensity_fmt && PE_copy.auto_conv, PE_copy.half_scale, 1.0f, + s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top, + bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients()); } else { diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp index 40949f34aa..345578d071 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.cpp +++ b/Source/Core/VideoCommon/FramebufferShaderGen.cpp @@ -550,7 +550,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF break; default: - WARN_LOG_FMT(VIDEO, "From format {} is not supported", static_cast(from_format)); + WARN_LOG_FMT(VIDEO, "From format {} is not supported", from_format); return "{}\n"; } @@ -602,7 +602,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF } break; default: - WARN_LOG_FMT(VIDEO, "To format {} is not supported", static_cast(to_format)); + WARN_LOG_FMT(VIDEO, "To format {} is not supported", to_format); return "{}\n"; } diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 9eb6e09792..2f896027ad 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -276,8 +276,7 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, const u8* palette, TLU const AbstractPipeline* pipeline = g_shader_cache->GetPaletteConversionPipeline(tlutfmt); if (!pipeline) { - ERROR_LOG_FMT(VIDEO, "Failed to get conversion pipeline for format {:#04X}", - static_cast(tlutfmt)); + ERROR_LOG_FMT(VIDEO, "Failed to get conversion pipeline for format {}", tlutfmt); return nullptr; } @@ -345,9 +344,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::ReinterpretEntry(const TCacheEn g_shader_cache->GetTextureReinterpretPipeline(existing_entry->format.texfmt, new_format); if (!pipeline) { - ERROR_LOG_FMT(VIDEO, - "Failed to obtain texture reinterpreting pipeline from format {:#04X} to {:#04X}", - static_cast(existing_entry->format.texfmt), static_cast(new_format)); + ERROR_LOG_FMT(VIDEO, "Failed to obtain texture reinterpreting pipeline from format {} to {}", + existing_entry->format.texfmt, new_format); return nullptr; } @@ -1980,44 +1978,49 @@ void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry) } } -EFBCopyFilterCoefficients +std::array TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) { // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1 // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below. - return EFBCopyFilterCoefficients{ - static_cast(static_cast(coefficients[0]) + static_cast(coefficients[1])) / - 64.0f, - static_cast(static_cast(coefficients[2]) + static_cast(coefficients[3]) + - static_cast(coefficients[4])) / - 64.0f, - static_cast(static_cast(coefficients[5]) + static_cast(coefficients[6])) / - 64.0f, + return { + static_cast(coefficients[0]) + static_cast(coefficients[1]), + static_cast(coefficients[2]) + static_cast(coefficients[3]) + + static_cast(coefficients[4]), + static_cast(coefficients[5]) + static_cast(coefficients[6]), }; } -EFBCopyFilterCoefficients +std::array TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) { // If the user disables the copy filter, only apply it to the VRAM copy. // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected. - EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients); + std::array res = GetRAMCopyFilterCoefficients(coefficients); if (!g_ActiveConfig.bDisableCopyFilter) return res; // Disabling the copy filter in options should not ignore the values the game sets completely, // as some games use the filter coefficients to control the brightness of the screen. Instead, // add all coefficients to the middle sample, so the deflicker/vertical filter has no effect. - res.middle = res.upper + res.middle + res.lower; - res.upper = 0.0f; - res.lower = 0.0f; + res[1] = res[0] + res[1] + res[2]; + res[0] = 0; + res[2] = 0; return res; } -bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients) +bool TextureCacheBase::AllCopyFilterCoefsNeeded(const std::array& coefficients) { // If the top/bottom coefficients are zero, no point sampling/blending from these rows. - return coefficients.upper != 0 || coefficients.lower != 0; + return coefficients[0] != 0 || coefficients[2] != 0; +} + +bool TextureCacheBase::CopyFilterCanOverflow(const std::array& coefficients) +{ + // Normally, the copy filter coefficients will sum to at most 64. If the sum is higher than that, + // colors are clamped to the range [0, 255], but if the sum is higher than 128, that clamping + // breaks (as colors end up >= 512, which wraps back to 0). + return coefficients[0] + coefficients[1] + coefficients[2] >= 128; } void TextureCacheBase::CopyRenderTargetToTexture( @@ -2257,10 +2260,11 @@ void TextureCacheBase::CopyRenderTargetToTexture( if (copy_to_ram) { - EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); + const std::array coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); PixelFormat srcFormat = bpmem.zcontrol.pixel_format; EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, - NeedsCopyFilterInShader(coefficients)); + AllCopyFilterCoefsNeeded(coefficients), + CopyFilterCanOverflow(coefficients), gamma != 1.0); std::unique_ptr staging_texture = GetEFBCopyStagingTexture(); if (staging_texture) @@ -2718,16 +2722,15 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop bool scale_by_half, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) + const std::array& filter_coefficients) { // Flush EFB pokes first, as they're expected to be included. g_framebuffer_manager->FlushEFBPokes(); // Get the pipeline which we will be using. If the compilation failed, this will be null. - const AbstractPipeline* copy_pipeline = - g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid( - dst_format, is_depth_copy, is_intensity, scale_by_half, - NeedsCopyFilterInShader(filter_coefficients))); + const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToVRAMPipeline( + TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, + scale_by_half, 1.0f / gamma, filter_coefficients)); if (!copy_pipeline) { WARN_LOG_FMT(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline."); @@ -2748,7 +2751,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop struct Uniforms { float src_left, src_top, src_width, src_height; - float filter_coefficients[3]; + std::array filter_coefficients; float gamma_rcp; float clamp_top; float clamp_bottom; @@ -2763,9 +2766,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop uniforms.src_top = framebuffer_rect.top * rcp_efb_height; uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width; uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height; - uniforms.filter_coefficients[0] = filter_coefficients.upper; - uniforms.filter_coefficients[1] = filter_coefficients.middle; - uniforms.filter_coefficients[2] = filter_coefficients.lower; + uniforms.filter_coefficients = filter_coefficients; uniforms.gamma_rcp = 1.0f / gamma; // NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB, // which returns random garbage from the empty bus (confirmed by hardware tests). @@ -2797,7 +2798,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& u32 memory_stride, const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) + const std::array& filter_coefficients) { // Flush EFB pokes first, as they're expected to be included. g_framebuffer_manager->FlushEFBPokes(); @@ -2828,7 +2829,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& float gamma_rcp; float clamp_top; float clamp_bottom; - float filter_coefficients[3]; + std::array filter_coefficients; u32 padding; }; Uniforms encoder_params; @@ -2849,9 +2850,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& encoder_params.clamp_top = (static_cast(top_coord) + .5f) * rcp_efb_height; const u32 bottom_coord = (clamp_bottom ? framebuffer_rect.bottom : efb_height) - 1; encoder_params.clamp_bottom = (static_cast(bottom_coord) + .5f) * rcp_efb_height; - encoder_params.filter_coefficients[0] = filter_coefficients.upper; - encoder_params.filter_coefficients[1] = filter_coefficients.middle; - encoder_params.filter_coefficients[2] = filter_coefficients.lower; + encoder_params.filter_coefficients = filter_coefficients; g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params)); // Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left. diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 287f3b840f..4fe11a64f4 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -57,23 +57,30 @@ struct TextureAndTLUTFormat struct EFBCopyParams { EFBCopyParams(PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, bool yuv_, - bool copy_filter_) + bool all_copy_filter_coefs_needed_, bool copy_filter_can_overflow_, + bool apply_gamma_) : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_), - copy_filter(copy_filter_) + all_copy_filter_coefs_needed(all_copy_filter_coefs_needed_), + copy_filter_can_overflow(copy_filter_can_overflow_), apply_gamma(apply_gamma_) { } bool operator<(const EFBCopyParams& rhs) const { - return std::tie(efb_format, copy_format, depth, yuv, copy_filter) < - std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.copy_filter); + return std::tie(efb_format, copy_format, depth, yuv, all_copy_filter_coefs_needed, + copy_filter_can_overflow, + apply_gamma) < std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, + rhs.all_copy_filter_coefs_needed, + rhs.copy_filter_can_overflow, rhs.apply_gamma); } PixelFormat efb_format; EFBCopyFormat copy_format; bool depth; bool yuv; - bool copy_filter; + bool all_copy_filter_coefs_needed; + bool copy_filter_can_overflow; + bool apply_gamma; }; template <> @@ -89,19 +96,13 @@ struct fmt::formatter else copy_format = fmt::to_string(uid.copy_format); return fmt::format_to(ctx.out(), - "format: {}, copy format: {}, depth: {}, yuv: {}, copy filter: {}", - uid.efb_format, copy_format, uid.depth, uid.yuv, uid.copy_filter); + "format: {}, copy format: {}, depth: {}, yuv: {}, apply_gamma: {}, " + "all_copy_filter_coefs_needed: {}, copy_filter_can_overflow: {}", + uid.efb_format, copy_format, uid.depth, uid.yuv, uid.apply_gamma, + uid.all_copy_filter_coefs_needed, uid.copy_filter_can_overflow); } }; -// Reduced version of the full coefficient array, with a single value for each row. -struct EFBCopyFilterCoefficients -{ - float upper; - float middle; - float lower; -}; - class TextureCacheBase { private: @@ -267,8 +268,8 @@ public: // Save States void DoState(PointerWrap& p); - // Returns false if the top/bottom row coefficients are zero. - static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients); + static bool AllCopyFilterCoefsNeeded(const std::array& coefficients); + static bool CopyFilterCanOverflow(const std::array& coefficients); protected: // Decodes the specified data to the GPU texture specified by entry. @@ -285,12 +286,12 @@ protected: u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, float y_scale, float gamma, bool clamp_top, - bool clamp_bottom, const EFBCopyFilterCoefficients& filter_coefficients); + bool clamp_bottom, const std::array& filter_coefficients); virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients); + const std::array& filter_coefficients); alignas(16) u8* temp = nullptr; size_t temp_size = 0; @@ -338,9 +339,9 @@ private: void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y); // Precomputing the coefficients for the previous, current, and next lines for the copy filter. - static EFBCopyFilterCoefficients + static std::array GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); - static EFBCopyFilterCoefficients + static std::array GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); // Flushes a pending EFB copy to RAM from the host to the guest RAM. diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index ed97c48144..6cabb4ce88 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -18,8 +18,6 @@ namespace TextureConversionShaderTiled { -static bool IntensityConstantAdded = false; - u16 GetEncodedSampleCount(EFBCopyFormat format) { switch (format) @@ -48,8 +46,7 @@ u16 GetEncodedSampleCount(EFBCopyFormat format) case EFBCopyFormat::XFB: return 2; default: - PanicAlertFmt("Invalid EFB Copy Format ({:#X})! (GetEncodedSampleCount)", - static_cast(format)); + PanicAlertFmt("Invalid EFB Copy Format {}! (GetEncodedSampleCount)", format); return 1; } } @@ -63,7 +60,7 @@ static void WriteHeader(ShaderCode& code, APIType api_type) " float y_scale;\n" " float gamma_rcp;\n" " float2 clamp_tb;\n" - " float3 filter_coefficients;\n" + " uint3 filter_coefficients;\n" "}};\n"); if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { @@ -86,115 +83,124 @@ static void WriteHeader(ShaderCode& code, APIType api_type) "float4 RGBA8ToRGBA6(float4 src)\n" "{{\n" - " int4 val = int4(roundEven(src * 255.0)) >> 2;\n" - " return float4(val) / 63.0;\n" + " int4 val = int4(roundEven(src * 255.0));\n" + " val = (val & 0xfc) | (val >> 6);\n" + " return float4(val) / 255.0;\n" "}}\n" "float4 RGBA8ToRGB565(float4 src)\n" "{{\n" " int4 val = int4(roundEven(src * 255.0));\n" - " val = int4(val.r >> 3, val.g >> 2, val.b >> 3, 1);\n" - " return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n" + " val.r = (val.r & 0xf8) | (val.r >> 5);\n" + " val.g = (val.g & 0xfc) | (val.g >> 6);\n" + " val.b = (val.b & 0xf8) | (val.b >> 5);\n" + " val.a = 255;\n" + " return float4(val) / 255.0;\n" "}}\n"); } static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, APIType api_type) { - const auto WriteSampleOp = [api_type, &code, ¶ms](int yoffset) { - if (!params.depth) - { - switch (params.efb_format) - { - case PixelFormat::RGB8_Z24: - code.Write("RGBA8ToRGB8("); - break; - case PixelFormat::RGBA6_Z24: - code.Write("RGBA8ToRGBA6("); - break; - case PixelFormat::RGB565_Z16: - code.Write("RGBA8ToRGB565("); - break; - default: - code.Write("("); - break; - } - } - else - { - // Handle D3D depth inversion. - if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) - code.Write("1.0 - ("); - else - code.Write("("); - } + code.Write("uint4 SampleEFB0(float2 uv, float2 pixel_size, float x_offset, float y_offset) {{\n" + " float4 tex_sample = texture(samp0, float3(uv.x + x_offset * pixel_size.x, "); - code.Write("texture(samp0, float3("); + // Reverse the direction for OpenGL, since positive numbers are distance from the bottom row. + // TODO: This isn't done on TextureConverterShaderGen - maybe it handles that via pixel_size? + if (api_type == APIType::OpenGL) + code.Write("clamp(uv.y - y_offset * pixel_size.y, clamp_tb.x, clamp_tb.y)"); + else + code.Write("clamp(uv.y + y_offset * pixel_size.y, clamp_tb.x, clamp_tb.y)"); - code.Write("uv.x + float(xoffset) * pixel_size.x, "); + code.Write(", 0.0));\n"); - // Reverse the direction for OpenGL, since positive numbers are distance from the bottom row. - if (yoffset != 0) - { - if (api_type == APIType::OpenGL) - code.Write("clamp(uv.y - float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset); - else - code.Write("clamp(uv.y + float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset); - } - else - { - code.Write("uv.y"); - } + // TODO: Is this really needed? Doesn't the EFB only store appropriate values? Or is this for + // EFB2Ram having consistent output with force 32-bit color? + if (params.efb_format == PixelFormat::RGB8_Z24) + code.Write(" tex_sample = RGBA8ToRGB8(tex_sample);\n"); + else if (params.efb_format == PixelFormat::RGBA6_Z24) + code.Write(" tex_sample = RGBA8ToRGBA6(tex_sample);\n"); + else if (params.efb_format == PixelFormat::RGB565_Z16) + code.Write(" tex_sample = RGBA8ToRGB565(tex_sample);\n"); - code.Write(", 0.0)))"); - }; - - // The copy filter applies to both color and depth copies. This has been verified on hardware. - // The filter is only applied to the RGB channels, the alpha channel is left intact. - code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n" - "{{\n"); - if (params.copy_filter) + if (params.depth) { - code.Write(" float4 prev_row = "); - WriteSampleOp(-1); - code.Write(";\n" - " float4 current_row = "); - WriteSampleOp(0); - code.Write(";\n" - " float4 next_row = "); - WriteSampleOp(1); - code.Write(";\n" - " return float4(min(prev_row.rgb * filter_coefficients[0] +\n" - " current_row.rgb * filter_coefficients[1] +\n" - " next_row.rgb * filter_coefficients[2], \n" - " float3(1, 1, 1)), current_row.a);\n"); + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + code.Write(" tex_sample.x = 1.0 - tex_sample.x;\n"); + + code.Write(" uint depth = uint(tex_sample.x * 16777216.0);\n" + " return uint4((depth >> 16) & 255u, (depth >> 8) & 255u, depth & 255u, 255u);\n" + "}}\n"); } else { - code.Write(" float4 current_row = "); - WriteSampleOp(0); - code.Write(";\n" - "return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n" - " current_row.a);\n"); + code.Write(" return uint4(tex_sample * 255.0);\n" + "}}\n"); } + + // The copy filter applies to both color and depth copies. This has been verified on hardware. + // The filter is only applied to the RGB channels, the alpha channel is left intact. + code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int x_offset)\n" + "{{\n"); + if (params.all_copy_filter_coefs_needed) + { + code.Write(" uint4 prev_row = SampleEFB0(uv, pixel_size, float(x_offset), -1.0f);\n" + " uint4 current_row = SampleEFB0(uv, pixel_size, float(x_offset), 0.0f);\n" + " uint4 next_row = SampleEFB0(uv, pixel_size, float(x_offset), 1.0f);\n" + " uint3 combined_rows = prev_row.rgb * filter_coefficients[0] +\n" + " current_row.rgb * filter_coefficients[1] +\n" + " next_row.rgb * filter_coefficients[2];\n"); + } + else + { + code.Write(" uint4 current_row = SampleEFB0(uv, pixel_size, float(x_offset), 0.0f);\n" + " uint3 combined_rows = current_row.rgb * filter_coefficients[1];\n"); + } + code.Write(" // Shift right by 6 to divide by 64, as filter coefficients\n" + " // that sum to 64 result in no change in brightness\n" + " uint4 texcol_raw = uint4(combined_rows.rgb >> 6, current_row.a);\n"); + + if (params.copy_filter_can_overflow) + code.Write(" texcol_raw &= 0x1ffu;\n"); + // Note that overflow occurs when the sum of values is >= 128, but this max situation can be hit + // on >= 64, so we always include it. + code.Write(" texcol_raw = min(texcol_raw, uint4(255, 255, 255, 255));\n"); + + if (params.apply_gamma) + { + code.Write(" texcol_raw = uint4(round(pow(float4(texcol_raw) / 255.0,\n" + " float4(gamma_rcp, gamma_rcp, gamma_rcp, 1.0)) * 255.0));\n"); + } + + if (params.yuv) + { + code.Write(" // Intensity/YUV format conversion constants determined by hardware testing\n" + " const float4 y_const = float4( 66, 129, 25, 16);\n" + " const float4 u_const = float4(-38, -74, 112, 128);\n" + " const float4 v_const = float4(112, -94, -18, 128);\n" + " // Intensity/YUV format conversion\n" + " texcol_raw.rgb = uint3(dot(y_const, float4(texcol_raw.rgb, 256)),\n" + " dot(u_const, float4(texcol_raw.rgb, 256)),\n" + " dot(v_const, float4(texcol_raw.rgb, 256)));\n" + " // Divide by 256 and round .5 and higher up\n" + " texcol_raw.rgb = (texcol_raw.rgb >> 8) + ((texcol_raw.rgb >> 7) & 1);\n"); + } + + code.Write(" return float4(texcol_raw) / 255.0;\n"); code.Write("}}\n"); } // Block dimensions : widthStride, heightStride // Texture dimensions : width, height, x offset, y offset -static void WriteSwizzler(ShaderCode& code, const EFBCopyParams& params, EFBCopyFormat format, - APIType api_type) +static void WriteSwizzler(ShaderCode& code, const EFBCopyParams& params, APIType api_type) { - WriteHeader(code, api_type); - WriteSampleFunction(code, params, api_type); - code.Write("void main()\n" "{{\n" " int2 sampleUv;\n" " int2 uv1 = int2(gl_FragCoord.xy);\n"); - const int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format); - const int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format); - int samples = GetEncodedSampleCount(format); + const int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(params.copy_format); + const int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(params.copy_format); + int samples = GetEncodedSampleCount(params.copy_format); code.Write(" int x_block_position = (uv1.x >> {}) << {};\n", IntLog2(blkH * blkW / samples), IntLog2(blkW)); @@ -244,146 +250,13 @@ static void WriteSampleColor(ShaderCode& code, std::string_view color_comp, std: code.Write(" {} = SampleEFB(uv0, pixel_size, {}).{};\n", dest, x_offset, color_comp); } -static void WriteColorToIntensity(ShaderCode& code, std::string_view src, std::string_view dest) -{ - if (!IntensityConstantAdded) - { - code.Write(" float4 IntensityConst = float4(0.257f,0.504f,0.098f,0.0625f);\n"); - IntensityConstantAdded = true; - } - code.Write(" {} = dot(IntensityConst.rgb, {}.rgb);\n", dest, src); - // don't add IntensityConst.a yet, because doing it later is faster and uses less instructions, - // due to vectorization -} - static void WriteToBitDepth(ShaderCode& code, u8 depth, std::string_view src, std::string_view dest) { code.Write(" {} = floor({} * 255.0 / exp2(8.0 - {}.0));\n", dest, src, depth); } -static void WriteEncoderEnd(ShaderCode& code) -{ - code.Write("}}\n"); - IntensityConstantAdded = false; -} - -static void WriteI8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) -{ - WriteSwizzler(code, params, EFBCopyFormat::R8, api_type); - code.Write(" float3 texSample;\n"); - - WriteSampleColor(code, "rgb", "texSample", 0, api_type, params); - WriteColorToIntensity(code, "texSample", "ocol0.b"); - - WriteSampleColor(code, "rgb", "texSample", 1, api_type, params); - WriteColorToIntensity(code, "texSample", "ocol0.g"); - - WriteSampleColor(code, "rgb", "texSample", 2, api_type, params); - WriteColorToIntensity(code, "texSample", "ocol0.r"); - - WriteSampleColor(code, "rgb", "texSample", 3, api_type, params); - WriteColorToIntensity(code, "texSample", "ocol0.a"); - - // See WriteColorToIntensity - code.Write(" ocol0.rgba += IntensityConst.aaaa;\n"); - - WriteEncoderEnd(code); -} - -static void WriteI4Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) -{ - WriteSwizzler(code, params, EFBCopyFormat::R4, api_type); - code.Write(" float3 texSample;\n" - " float4 color0;\n" - " float4 color1;\n"); - - WriteSampleColor(code, "rgb", "texSample", 0, api_type, params); - WriteColorToIntensity(code, "texSample", "color0.b"); - - WriteSampleColor(code, "rgb", "texSample", 1, api_type, params); - WriteColorToIntensity(code, "texSample", "color1.b"); - - WriteSampleColor(code, "rgb", "texSample", 2, api_type, params); - WriteColorToIntensity(code, "texSample", "color0.g"); - - WriteSampleColor(code, "rgb", "texSample", 3, api_type, params); - WriteColorToIntensity(code, "texSample", "color1.g"); - - WriteSampleColor(code, "rgb", "texSample", 4, api_type, params); - WriteColorToIntensity(code, "texSample", "color0.r"); - - WriteSampleColor(code, "rgb", "texSample", 5, api_type, params); - WriteColorToIntensity(code, "texSample", "color1.r"); - - WriteSampleColor(code, "rgb", "texSample", 6, api_type, params); - WriteColorToIntensity(code, "texSample", "color0.a"); - - WriteSampleColor(code, "rgb", "texSample", 7, api_type, params); - WriteColorToIntensity(code, "texSample", "color1.a"); - - code.Write(" color0.rgba += IntensityConst.aaaa;\n" - " color1.rgba += IntensityConst.aaaa;\n"); - - WriteToBitDepth(code, 4, "color0", "color0"); - WriteToBitDepth(code, 4, "color1", "color1"); - - code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); - WriteEncoderEnd(code); -} - -static void WriteIA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) -{ - WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type); - code.Write(" float4 texSample;\n"); - - WriteSampleColor(code, "rgba", "texSample", 0, api_type, params); - code.Write(" ocol0.b = texSample.a;\n"); - WriteColorToIntensity(code, "texSample", "ocol0.g"); - - WriteSampleColor(code, "rgba", "texSample", 1, api_type, params); - code.Write(" ocol0.r = texSample.a;\n"); - WriteColorToIntensity(code, "texSample", "ocol0.a"); - - code.Write(" ocol0.ga += IntensityConst.aa;\n"); - - WriteEncoderEnd(code); -} - -static void WriteIA4Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) -{ - WriteSwizzler(code, params, EFBCopyFormat::RA4, api_type); - code.Write(" float4 texSample;\n" - " float4 color0;\n" - " float4 color1;\n"); - - WriteSampleColor(code, "rgba", "texSample", 0, api_type, params); - code.Write(" color0.b = texSample.a;\n"); - WriteColorToIntensity(code, "texSample", "color1.b"); - - WriteSampleColor(code, "rgba", "texSample", 1, api_type, params); - code.Write(" color0.g = texSample.a;\n"); - WriteColorToIntensity(code, "texSample", "color1.g"); - - WriteSampleColor(code, "rgba", "texSample", 2, api_type, params); - code.Write(" color0.r = texSample.a;\n"); - WriteColorToIntensity(code, "texSample", "color1.r"); - - WriteSampleColor(code, "rgba", "texSample", 3, api_type, params); - code.Write(" color0.a = texSample.a;\n"); - WriteColorToIntensity(code, "texSample", "color1.a"); - - code.Write(" color1.rgba += IntensityConst.aaaa;\n"); - - WriteToBitDepth(code, 4, "color0", "color0"); - WriteToBitDepth(code, 4, "color1", "color1"); - - code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); - WriteEncoderEnd(code); -} - static void WriteRGB565Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) { - WriteSwizzler(code, params, EFBCopyFormat::RGB565, api_type); code.Write(" float3 texSample0;\n" " float3 texSample1;\n"); @@ -403,13 +276,10 @@ static void WriteRGB565Encoder(ShaderCode& code, APIType api_type, const EFBCopy code.Write(" ocol0.ga = ocol0.ga + gLower * 32.0;\n"); code.Write(" ocol0 = ocol0 / 255.0;\n"); - WriteEncoderEnd(code); } static void WriteRGB5A3Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) { - WriteSwizzler(code, params, EFBCopyFormat::RGB5A3, api_type); - code.Write(" float4 texSample;\n" " float color0;\n" " float gUpper;\n" @@ -467,13 +337,10 @@ static void WriteRGB5A3Encoder(ShaderCode& code, APIType api_type, const EFBCopy code.Write("}}\n"); code.Write(" ocol0 = ocol0 / 255.0;\n"); - WriteEncoderEnd(code); } static void WriteRGBA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) { - WriteSwizzler(code, params, EFBCopyFormat::RGBA8, api_type); - code.Write(" float4 texSample;\n" " float4 color0;\n" " float4 color1;\n"); @@ -491,14 +358,11 @@ static void WriteRGBA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyP " color1.a = texSample.b;\n"); code.Write(" ocol0 = first ? color0 : color1;\n"); - - WriteEncoderEnd(code); } static void WriteC4Encoder(ShaderCode& code, std::string_view comp, APIType api_type, const EFBCopyParams& params) { - WriteSwizzler(code, params, EFBCopyFormat::R4, api_type); code.Write(" float4 color0;\n" " float4 color1;\n"); @@ -515,26 +379,20 @@ static void WriteC4Encoder(ShaderCode& code, std::string_view comp, APIType api_ WriteToBitDepth(code, 4, "color1", "color1"); code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); - WriteEncoderEnd(code); } static void WriteC8Encoder(ShaderCode& code, std::string_view comp, APIType api_type, const EFBCopyParams& params) { - WriteSwizzler(code, params, EFBCopyFormat::R8, api_type); - WriteSampleColor(code, comp, "ocol0.b", 0, api_type, params); WriteSampleColor(code, comp, "ocol0.g", 1, api_type, params); WriteSampleColor(code, comp, "ocol0.r", 2, api_type, params); WriteSampleColor(code, comp, "ocol0.a", 3, api_type, params); - - WriteEncoderEnd(code); } static void WriteCC4Encoder(ShaderCode& code, std::string_view comp, APIType api_type, const EFBCopyParams& params) { - WriteSwizzler(code, params, EFBCopyFormat::RA4, api_type); code.Write(" float2 texSample;\n" " float4 color0;\n" " float4 color1;\n"); @@ -559,198 +417,52 @@ static void WriteCC4Encoder(ShaderCode& code, std::string_view comp, APIType api WriteToBitDepth(code, 4, "color1", "color1"); code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); - WriteEncoderEnd(code); } static void WriteCC8Encoder(ShaderCode& code, std::string_view comp, APIType api_type, const EFBCopyParams& params) { - WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type); - WriteSampleColor(code, comp, "ocol0.bg", 0, api_type, params); WriteSampleColor(code, comp, "ocol0.ra", 1, api_type, params); - - WriteEncoderEnd(code); -} - -static void WriteZ8Encoder(ShaderCode& code, std::string_view multiplier, APIType api_type, - const EFBCopyParams& params) -{ - WriteSwizzler(code, params, EFBCopyFormat::G8, api_type); - - code.Write(" float depth;\n"); - - WriteSampleColor(code, "r", "depth", 0, api_type, params); - code.Write("ocol0.b = frac(depth * {});\n", multiplier); - - WriteSampleColor(code, "r", "depth", 1, api_type, params); - code.Write("ocol0.g = frac(depth * {});\n", multiplier); - - WriteSampleColor(code, "r", "depth", 2, api_type, params); - code.Write("ocol0.r = frac(depth * {});\n", multiplier); - - WriteSampleColor(code, "r", "depth", 3, api_type, params); - code.Write("ocol0.a = frac(depth * {});\n", multiplier); - - WriteEncoderEnd(code); -} - -static void WriteZ16Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) -{ - WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type); - - code.Write(" float depth;\n" - " float3 expanded;\n"); - - // Byte order is reversed - - WriteSampleColor(code, "r", "depth", 0, api_type, params); - - code.Write(" depth *= 16777216.0;\n" - " expanded.r = floor(depth / (256.0 * 256.0));\n" - " depth -= expanded.r * 256.0 * 256.0;\n" - " expanded.g = floor(depth / 256.0);\n"); - - code.Write(" ocol0.b = expanded.g / 255.0;\n" - " ocol0.g = expanded.r / 255.0;\n"); - - WriteSampleColor(code, "r", "depth", 1, api_type, params); - - code.Write(" depth *= 16777216.0;\n" - " expanded.r = floor(depth / (256.0 * 256.0));\n" - " depth -= expanded.r * 256.0 * 256.0;\n" - " expanded.g = floor(depth / 256.0);\n"); - - code.Write(" ocol0.r = expanded.g / 255.0;\n" - " ocol0.a = expanded.r / 255.0;\n"); - - WriteEncoderEnd(code); -} - -static void WriteZ16LEncoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) -{ - WriteSwizzler(code, params, EFBCopyFormat::GB8, api_type); - - code.Write(" float depth;\n" - " float3 expanded;\n"); - - // Byte order is reversed - - WriteSampleColor(code, "r", "depth", 0, api_type, params); - - code.Write(" depth *= 16777216.0;\n" - " expanded.r = floor(depth / (256.0 * 256.0));\n" - " depth -= expanded.r * 256.0 * 256.0;\n" - " expanded.g = floor(depth / 256.0);\n" - " depth -= expanded.g * 256.0;\n" - " expanded.b = depth;\n"); - - code.Write(" ocol0.b = expanded.b / 255.0;\n" - " ocol0.g = expanded.g / 255.0;\n"); - - WriteSampleColor(code, "r", "depth", 1, api_type, params); - - code.Write(" depth *= 16777216.0;\n" - " expanded.r = floor(depth / (256.0 * 256.0));\n" - " depth -= expanded.r * 256.0 * 256.0;\n" - " expanded.g = floor(depth / 256.0);\n" - " depth -= expanded.g * 256.0;\n" - " expanded.b = depth;\n"); - - code.Write(" ocol0.r = expanded.b / 255.0;\n" - " ocol0.a = expanded.g / 255.0;\n"); - - WriteEncoderEnd(code); -} - -static void WriteZ24Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) -{ - WriteSwizzler(code, params, EFBCopyFormat::RGBA8, api_type); - - code.Write(" float depth0;\n" - " float depth1;\n" - " float3 expanded0;\n" - " float3 expanded1;\n"); - - WriteSampleColor(code, "r", "depth0", 0, api_type, params); - WriteSampleColor(code, "r", "depth1", 1, api_type, params); - - for (int i = 0; i < 2; i++) - { - code.Write(" depth{} *= 16777216.0;\n", i); - - code.Write(" expanded{}.r = floor(depth{} / (256.0 * 256.0));\n", i, i); - code.Write(" depth{} -= expanded{}.r * 256.0 * 256.0;\n", i, i); - code.Write(" expanded{}.g = floor(depth{} / 256.0);\n", i, i); - code.Write(" depth{} -= expanded{}.g * 256.0;\n", i, i); - code.Write(" expanded{}.b = depth{};\n", i, i); - } - - code.Write(" if (!first) {{\n"); - // Upper 16 - code.Write(" ocol0.b = expanded0.g / 255.0;\n" - " ocol0.g = expanded0.b / 255.0;\n" - " ocol0.r = expanded1.g / 255.0;\n" - " ocol0.a = expanded1.b / 255.0;\n" - " }} else {{\n"); - // Lower 8 - code.Write(" ocol0.b = 1.0;\n" - " ocol0.g = expanded0.r / 255.0;\n" - " ocol0.r = 1.0;\n" - " ocol0.a = expanded1.r / 255.0;\n" - " }}\n"); - - WriteEncoderEnd(code); } static void WriteXFBEncoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) { - WriteSwizzler(code, params, EFBCopyFormat::XFB, api_type); - - code.Write("float3 color0, color1;\n"); - WriteSampleColor(code, "rgb", "color0", 0, api_type, params); - WriteSampleColor(code, "rgb", "color1", 1, api_type, params); - - // Gamma is only applied to XFB copies. - code.Write(" color0 = pow(abs(color0), float3(gamma_rcp, gamma_rcp, gamma_rcp));\n" - " color1 = pow(abs(color1), float3(gamma_rcp, gamma_rcp, gamma_rcp));\n"); + code.Write("float4 color0 = float4(0, 0, 0, 1), color1 = float4(0, 0, 0, 1);\n"); + WriteSampleColor(code, "rgb", "color0.rgb", 0, api_type, params); + WriteSampleColor(code, "rgb", "color1.rgb", 1, api_type, params); // Convert to YUV. - code.Write(" const float3 y_const = float3(0.257, 0.504, 0.098);\n" - " const float3 u_const = float3(-0.148, -0.291, 0.439);\n" - " const float3 v_const = float3(0.439, -0.368, -0.071);\n" - " float3 average = (color0 + color1) * 0.5;\n" - " ocol0.b = dot(color0, y_const) + 0.0625;\n" - " ocol0.g = dot(average, u_const) + 0.5;\n" - " ocol0.r = dot(color1, y_const) + 0.0625;\n" - " ocol0.a = dot(average, v_const) + 0.5;\n"); - - WriteEncoderEnd(code); + code.Write(" // Intensity/YUV format conversion constants determined by hardware testing\n" + " const float4 y_const = float4( 66, 129, 25, 16);\n" + " const float4 u_const = float4(-38, -74, 112, 128);\n" + " const float4 v_const = float4(112, -94, -18, 128);\n" + " float4 average = (color0 + color1) * 0.5;\n" + " // TODO: check rounding\n" + " ocol0.b = round(dot(color0, y_const)) / 256.0;\n" + " ocol0.g = round(dot(average, u_const)) / 256.0;\n" + " ocol0.r = round(dot(color1, y_const)) / 256.0;\n" + " ocol0.a = round(dot(average, v_const)) / 256.0;\n"); } std::string GenerateEncodingShader(const EFBCopyParams& params, APIType api_type) { ShaderCode code; + WriteHeader(code, api_type); + WriteSampleFunction(code, params, api_type); + WriteSwizzler(code, params, api_type); + switch (params.copy_format) { case EFBCopyFormat::R4: - if (params.yuv) - WriteI4Encoder(code, api_type, params); - else - WriteC4Encoder(code, "r", api_type, params); + WriteC4Encoder(code, "r", api_type, params); break; case EFBCopyFormat::RA4: - if (params.yuv) - WriteIA4Encoder(code, api_type, params); - else - WriteCC4Encoder(code, "ar", api_type, params); + WriteCC4Encoder(code, "ar", api_type, params); break; case EFBCopyFormat::RA8: - if (params.yuv) - WriteIA8Encoder(code, api_type, params); - else - WriteCC8Encoder(code, "ar", api_type, params); + WriteCC8Encoder(code, "ar", api_type, params); break; case EFBCopyFormat::RGB565: WriteRGB565Encoder(code, api_type, params); @@ -759,54 +471,37 @@ std::string GenerateEncodingShader(const EFBCopyParams& params, APIType api_type WriteRGB5A3Encoder(code, api_type, params); break; case EFBCopyFormat::RGBA8: - if (params.depth) - WriteZ24Encoder(code, api_type, params); - else - WriteRGBA8Encoder(code, api_type, params); + WriteRGBA8Encoder(code, api_type, params); break; case EFBCopyFormat::A8: WriteC8Encoder(code, "a", api_type, params); break; case EFBCopyFormat::R8_0x1: case EFBCopyFormat::R8: - if (params.yuv) - WriteI8Encoder(code, api_type, params); - else - WriteC8Encoder(code, "r", api_type, params); + WriteC8Encoder(code, "r", api_type, params); break; case EFBCopyFormat::G8: - if (params.depth) - WriteZ8Encoder(code, "256.0", api_type, params); // Z8M - else - WriteC8Encoder(code, "g", api_type, params); + WriteC8Encoder(code, "g", api_type, params); break; case EFBCopyFormat::B8: - if (params.depth) - WriteZ8Encoder(code, "65536.0", api_type, params); // Z8L - else - WriteC8Encoder(code, "b", api_type, params); + WriteC8Encoder(code, "b", api_type, params); break; case EFBCopyFormat::RG8: - if (params.depth) - WriteZ16Encoder(code, api_type, params); // Z16H - else - WriteCC8Encoder(code, "gr", api_type, params); + WriteCC8Encoder(code, "gr", api_type, params); break; case EFBCopyFormat::GB8: - if (params.depth) - WriteZ16LEncoder(code, api_type, params); // Z16L - else - WriteCC8Encoder(code, "bg", api_type, params); + WriteCC8Encoder(code, "bg", api_type, params); break; case EFBCopyFormat::XFB: WriteXFBEncoder(code, api_type, params); break; default: - PanicAlertFmt("Invalid EFB Copy Format ({:#X})! (GenerateEncodingShader)", - static_cast(params.copy_format)); + PanicAlertFmt("Invalid EFB Copy Format {}! (GenerateEncodingShader)", params.copy_format); break; } + code.Write("}}\n"); + return code.GetBuffer(); } @@ -1273,6 +968,8 @@ static const std::map s_decoding_shader_info{ // We do the inverse BT.601 conversion for YCbCr to RGB // http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion + // TODO: Use more precise numbers for this conversion (although on real hardware, the XFB isn't + // in a real texture format, so does this conversion actually ever happen?) {TextureFormat::XFB, {TEXEL_BUFFER_FORMAT_RGBA8_UINT, 0, 8, 8, false, R"( diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index ae09871b98..3bee37060a 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -6,13 +6,15 @@ #include "Common/Assert.h" #include "Common/CommonTypes.h" #include "VideoCommon/BPMemory.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" namespace TextureConversionShaderGen { TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, - bool scale_by_half, bool copy_filter) + bool scale_by_half, float gamma_rcp, + const std::array& filter_coefficients) { TCShaderUid out; @@ -22,7 +24,11 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i uid_data->is_depth_copy = is_depth_copy; uid_data->is_intensity = is_intensity; uid_data->scale_by_half = scale_by_half; - uid_data->copy_filter = copy_filter; + uid_data->all_copy_filter_coefs_needed = + TextureCacheBase::AllCopyFilterCoefsNeeded(filter_coefficients); + uid_data->copy_filter_can_overflow = TextureCacheBase::CopyFilterCanOverflow(filter_coefficients); + // If the gamma is needed, then include that too. + uid_data->apply_gamma = gamma_rcp != 1.0f; return out; } @@ -31,7 +37,7 @@ static void WriteHeader(APIType api_type, ShaderCode& out) { out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n" " float2 src_offset, src_size;\n" - " float3 filter_coefficients;\n" + " uint3 filter_coefficients;\n" " float gamma_rcp;\n" " float2 clamp_tb;\n" " float pixel_height;\n" @@ -78,11 +84,25 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) WriteHeader(api_type, out); out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); - out.Write("float4 SampleEFB(float3 uv, float y_offset) {{\n" - " return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " - "clamp_tb.x, clamp_tb.y), {}));\n" - "}}\n", + out.Write("uint4 SampleEFB(float3 uv, float y_offset) {{\n" + " float4 tex_sample = texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * " + "pixel_height), clamp_tb.x, clamp_tb.y), {}));\n", mono_depth ? "0.0" : "uv.z"); + if (uid_data->is_depth_copy) + { + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + out.Write(" tex_sample.x = 1.0 - tex_sample.x;\n"); + + out.Write(" uint depth = uint(tex_sample.x * 16777216.0);\n" + " return uint4((depth >> 16) & 255u, (depth >> 8) & 255u, depth & 255u, 255u);\n" + "}}\n"); + } + else + { + out.Write(" return uint4(tex_sample * 255.0);\n" + "}}\n"); + } + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("VARYING_LOCATION(0) in VertexData {{\n" @@ -93,201 +113,125 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) { out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n"); } + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n" "void main()\n{{\n"); // The copy filter applies to both color and depth copies. This has been verified on hardware. // The filter is only applied to the RGB channels, the alpha channel is left intact. - if (uid_data->copy_filter) + if (uid_data->all_copy_filter_coefs_needed) { - out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n" - " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" - " float4 next_row = SampleEFB(v_tex0, 1.0f);\n" - " float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n" - " current_row.rgb * filter_coefficients[1] +\n" - " next_row.rgb * filter_coefficients[2], \n" - " float3(1, 1, 1)), current_row.a);\n"); + out.Write(" uint4 prev_row = SampleEFB(v_tex0, -1.0f);\n" + " uint4 current_row = SampleEFB(v_tex0, 0.0f);\n" + " uint4 next_row = SampleEFB(v_tex0, 1.0f);\n" + " uint3 combined_rows = prev_row.rgb * filter_coefficients[0] +\n" + " current_row.rgb * filter_coefficients[1] +\n" + " next_row.rgb * filter_coefficients[2];\n"); } else { - out.Write( - " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" - " float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n" - " current_row.a);\n"); + out.Write(" uint4 current_row = SampleEFB(v_tex0, 0.0f);\n" + " uint3 combined_rows = current_row.rgb * filter_coefficients[1];\n"); + } + out.Write(" // Shift right by 6 to divide by 64, as filter coefficients\n" + " // that sum to 64 result in no change in brightness\n" + " uint4 texcol_raw = uint4(combined_rows.rgb >> 6, {});\n", + uid_data->efb_has_alpha ? "current_row.a" : "255"); + + if (uid_data->copy_filter_can_overflow) + out.Write(" texcol_raw &= 0x1ffu;\n"); + // Note that overflow occurs when the sum of values is >= 128, but this max situation can be hit + // on >= 64, so we always include it. + out.Write(" texcol_raw = min(texcol_raw, uint4(255, 255, 255, 255));\n"); + + if (uid_data->apply_gamma) + { + out.Write(" texcol_raw = uint4(round(pow(abs(float4(texcol_raw) / 255.0),\n" + " float4(gamma_rcp, gamma_rcp, gamma_rcp, 1.0)) * 255.0));\n"); } - if (uid_data->is_depth_copy) + if (uid_data->is_intensity) { - if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) - out.Write("texcol.x = 1.0 - texcol.x;\n"); - - out.Write(" int depth = int(texcol.x * 16777216.0);\n" - - // Convert to Z24 format - " int4 workspace;\n" - " workspace.r = (depth >> 16) & 255;\n" - " workspace.g = (depth >> 8) & 255;\n" - " workspace.b = depth & 255;\n" - - // Convert to Z4 format - " workspace.a = (depth >> 16) & 0xF0;\n" - - // Normalize components to [0.0..1.0] - " texcol = float4(workspace) / 255.0;\n"); - switch (uid_data->dst_format) - { - case EFBCopyFormat::R4: // Z4 - out.Write(" ocol0 = texcol.aaaa;\n"); - break; - - case EFBCopyFormat::R8_0x1: // Z8 - case EFBCopyFormat::R8: // Z8H - out.Write(" ocol0 = texcol.rrrr;\n"); - break; - - case EFBCopyFormat::RA8: // Z16 - out.Write(" ocol0 = texcol.gggr;\n"); - break; - - case EFBCopyFormat::RG8: // Z16 (reverse order) - out.Write(" ocol0 = texcol.rrrg;\n"); - break; - - case EFBCopyFormat::RGBA8: // Z24X8 - out.Write(" ocol0 = float4(texcol.rgb, 1.0);\n"); - break; - - case EFBCopyFormat::G8: // Z8M - out.Write(" ocol0 = texcol.gggg;\n"); - break; - - case EFBCopyFormat::B8: // Z8L - out.Write(" ocol0 = texcol.bbbb;\n"); - break; - - case EFBCopyFormat::GB8: // Z16L - copy lower 16 depth bits - // expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits - // stored as alpha) - // Used e.g. in Zelda: Skyward Sword - out.Write(" ocol0 = texcol.gggb;\n"); - break; - - default: - ERROR_LOG_FMT(VIDEO, "Unknown copy zbuf format: {:#X}", - static_cast(uid_data->dst_format)); - out.Write(" ocol0 = float4(texcol.bgr, 0.0);\n"); - break; - } + out.Write(" // Intensity/YUV format conversion constants determined by hardware testing\n" + " const float4 y_const = float4( 66, 129, 25, 16);\n" + " const float4 u_const = float4(-38, -74, 112, 128);\n" + " const float4 v_const = float4(112, -94, -18, 128);\n" + " // Intensity/YUV format conversion\n" + " texcol_raw.rgb = uint3(dot(y_const, float4(texcol_raw.rgb, 256)),\n" + " dot(u_const, float4(texcol_raw.rgb, 256)),\n" + " dot(v_const, float4(texcol_raw.rgb, 256)));\n" + " // Divide by 256 and round .5 and higher up\n" + " texcol_raw.rgb = (texcol_raw.rgb >> 8) + ((texcol_raw.rgb >> 7) & 1);\n"); } - else if (uid_data->is_intensity) + + switch (uid_data->dst_format) { - if (!uid_data->efb_has_alpha) - out.Write(" texcol.a = 1.0;\n"); + case EFBCopyFormat::R4: // R4 + out.Write(" float red = float(texcol_raw.r & 0xF0u) / 240.0;\n" + " ocol0 = float4(red, red, red, red);\n"); + break; - bool has_four_bits = - (uid_data->dst_format == EFBCopyFormat::R4 || uid_data->dst_format == EFBCopyFormat::RA4); - bool has_alpha = - (uid_data->dst_format == EFBCopyFormat::RA4 || uid_data->dst_format == EFBCopyFormat::RA8); + case EFBCopyFormat::R8_0x1: // R8 + case EFBCopyFormat::R8: // R8 + out.Write(" ocol0 = float4(texcol_raw).rrrr / 255.0;\n"); + break; - switch (uid_data->dst_format) - { - case EFBCopyFormat::R4: // I4 - case EFBCopyFormat::R8_0x1: // I8 - case EFBCopyFormat::R8: // I8 - case EFBCopyFormat::RA4: // IA4 - case EFBCopyFormat::RA8: // IA8 - if (has_four_bits) - out.Write(" texcol = float4(int4(texcol * 255.0) & 0xF0) * (1.0 / 240.0);\n"); + case EFBCopyFormat::RA4: // RA4 + out.Write(" float2 red_alpha = float2(texcol_raw.ra & 0xF0u) / 240.0;\n" + " ocol0 = red_alpha.rrrg;\n"); + break; - // TODO - verify these coefficients - out.Write(" const float3 coefficients = float3(0.257, 0.504, 0.098);\n" - " float intensity = dot(texcol.rgb, coefficients) + 16.0 / 255.0;\n" - " ocol0 = float4(intensity, intensity, intensity, {});\n", - has_alpha ? "texcol.a" : "intensity"); - break; + case EFBCopyFormat::RA8: // RA8 + out.Write(" ocol0 = float4(texcol_raw).rrra / 255.0;\n"); + break; - default: - ERROR_LOG_FMT(VIDEO, "Unknown copy intensity format: {:#X}", - static_cast(uid_data->dst_format)); - out.Write(" ocol0 = texcol;\n"); - break; - } - } - else - { - if (!uid_data->efb_has_alpha) - out.Write(" texcol.a = 1.0;\n"); + case EFBCopyFormat::A8: // A8 + out.Write(" ocol0 = float4(texcol_raw).aaaa / 255.0;\n"); + break; - switch (uid_data->dst_format) - { - case EFBCopyFormat::R4: // R4 - out.Write(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n" - " ocol0 = float4(red, red, red, red);\n"); - break; + case EFBCopyFormat::G8: // G8 + out.Write(" ocol0 = float4(texcol_raw).gggg / 255.0;\n"); + break; - case EFBCopyFormat::R8_0x1: // R8 - case EFBCopyFormat::R8: // R8 - out.Write(" ocol0 = texcol.rrrr;\n"); - break; + case EFBCopyFormat::B8: // B8 + out.Write(" ocol0 = float4(texcol_raw).bbbb / 255.0;\n"); + break; - case EFBCopyFormat::RA4: // RA4 - out.Write(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n" - " ocol0 = red_alpha.rrrg;\n"); - break; + case EFBCopyFormat::RG8: // RG8 + out.Write(" ocol0 = float4(texcol_raw).rrrg / 255.0;\n"); + break; - case EFBCopyFormat::RA8: // RA8 - out.Write(" ocol0 = texcol.rrra;\n"); - break; + case EFBCopyFormat::GB8: // GB8 + out.Write(" ocol0 = float4(texcol_raw).gggb / 255.0;\n"); + break; - case EFBCopyFormat::A8: // A8 - out.Write(" ocol0 = texcol.aaaa;\n"); - break; + case EFBCopyFormat::RGB565: // RGB565 + out.Write(" float2 red_blue = float2(texcol_raw.rb & 0xF8u) / 248.0;\n" + " float green = float(texcol_raw.g & 0xFCu) / 252.0;\n" + " ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n"); + break; - case EFBCopyFormat::G8: // G8 - out.Write(" ocol0 = texcol.gggg;\n"); - break; + case EFBCopyFormat::RGB5A3: // RGB5A3 + // TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection + // will need to be implemented once we move away from floats. + out.Write(" float3 color = float3(texcol_raw.rgb & 0xF8u) / 248.0;\n" + " float alpha = float(texcol_raw.a & 0xE0u) / 224.0;\n" + " ocol0 = float4(color, alpha);\n"); + break; - case EFBCopyFormat::B8: // B8 - out.Write(" ocol0 = texcol.bbbb;\n"); - break; + case EFBCopyFormat::RGBA8: // RGBA8 + out.Write(" ocol0 = float4(texcol_raw.rgba) / 255.0;\n"); + break; - case EFBCopyFormat::RG8: // RG8 - out.Write(" ocol0 = texcol.rrrg;\n"); - break; + case EFBCopyFormat::XFB: + out.Write(" ocol0 = float4(float3(texcol_raw.rgb) / 255.0, 1.0);\n"); + break; - case EFBCopyFormat::GB8: // GB8 - out.Write(" ocol0 = texcol.gggb;\n"); - break; - - case EFBCopyFormat::RGB565: // RGB565 - out.Write(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n" - " float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n" - " ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n"); - break; - - case EFBCopyFormat::RGB5A3: // RGB5A3 - // TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection - // will need to be implemented once we move away from floats. - out.Write(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n" - " float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n" - " ocol0 = float4(color, alpha);\n"); - break; - - case EFBCopyFormat::RGBA8: // RGBA8 - out.Write(" ocol0 = texcol;\n"); - break; - - case EFBCopyFormat::XFB: - out.Write(" ocol0 = float4(pow(abs(texcol.rgb), float3(gamma_rcp, gamma_rcp, gamma_rcp)), " - "1.0f);\n"); - break; - - default: - ERROR_LOG_FMT(VIDEO, "Unknown copy color format: {:#X}", - static_cast(uid_data->dst_format)); - out.Write(" ocol0 = texcol;\n"); - break; - } + default: + ERROR_LOG_FMT(VIDEO, "Unknown copy/intensity color format: {} {}", uid_data->dst_format, + uid_data->is_intensity); + out.Write(" ocol0 = float4(texcol_raw.rgba) / 255.0;\n"); + break; } out.Write("}}\n"); diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.h b/Source/Core/VideoCommon/TextureConverterShaderGen.h index 54665104f6..10745cb3dc 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.h +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.h @@ -25,7 +25,9 @@ struct UidData u32 is_depth_copy : 1; u32 is_intensity : 1; u32 scale_by_half : 1; - u32 copy_filter : 1; + u32 all_copy_filter_coefs_needed : 1; + u32 copy_filter_can_overflow : 1; + u32 apply_gamma : 1; }; #pragma pack() @@ -35,7 +37,8 @@ ShaderCode GenerateVertexShader(APIType api_type); ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data); TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, - bool scale_by_half, bool copy_filter); + bool scale_by_half, float gamma_rcp, + const std::array& filter_coefficients); } // namespace TextureConversionShaderGen @@ -53,8 +56,10 @@ struct fmt::formatter dst_format = fmt::to_string(uid.dst_format); return fmt::format_to(ctx.out(), "dst_format: {}, efb_has_alpha: {}, is_depth_copy: {}, is_intensity: {}, " - "scale_by_half: {}, copy_filter: {}", + "scale_by_half: {}, all_copy_filter_coefs_needed: {}, " + "copy_filter_can_overflow: {}, apply_gamma: {}", dst_format, uid.efb_has_alpha, uid.is_depth_copy, uid.is_intensity, - uid.scale_by_half, uid.copy_filter); + uid.scale_by_half, uid.all_copy_filter_coefs_needed, + uid.copy_filter_can_overflow, uid.apply_gamma); } }; diff --git a/Source/Core/VideoCommon/TextureDecoder_Common.cpp b/Source/Core/VideoCommon/TextureDecoder_Common.cpp index 798496ac6d..30121aeb7b 100644 --- a/Source/Core/VideoCommon/TextureDecoder_Common.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_Common.cpp @@ -50,8 +50,7 @@ int TexDecoder_GetTexelSizeInNibbles(TextureFormat format) case TextureFormat::XFB: return 4; default: - PanicAlertFmt("Invalid Texture Format ({:#X})! (GetTexelSizeInNibbles)", - static_cast(format)); + PanicAlertFmt("Invalid Texture Format {}! (GetTexelSizeInNibbles)", format); return 1; } } @@ -90,8 +89,7 @@ int TexDecoder_GetBlockWidthInTexels(TextureFormat format) case TextureFormat::XFB: return 16; default: - PanicAlertFmt("Invalid Texture Format ({:#X})! (GetBlockWidthInTexels)", - static_cast(format)); + PanicAlertFmt("Invalid Texture Format {}! (GetBlockWidthInTexels)", format); return 8; } } @@ -125,8 +123,7 @@ int TexDecoder_GetBlockHeightInTexels(TextureFormat format) case TextureFormat::XFB: return 1; default: - PanicAlertFmt("Invalid Texture Format ({:#X})! (GetBlockHeightInTexels)", - static_cast(format)); + PanicAlertFmt("Invalid Texture Format {}! (GetBlockHeightInTexels)", format); return 4; } } @@ -160,8 +157,7 @@ int TexDecoder_GetEFBCopyBlockWidthInTexels(EFBCopyFormat format) case EFBCopyFormat::XFB: return 16; default: - PanicAlertFmt("Invalid EFB Copy Format ({:#X})! (GetEFBCopyBlockWidthInTexels)", - static_cast(format)); + PanicAlertFmt("Invalid EFB Copy Format {}! (GetEFBCopyBlockWidthInTexels)", format); return 8; } } @@ -195,8 +191,7 @@ int TexDecoder_GetEFBCopyBlockHeightInTexels(EFBCopyFormat format) case EFBCopyFormat::XFB: return 1; default: - PanicAlertFmt("Invalid EFB Copy Format ({:#X})! (GetEFBCopyBlockHeightInTexels)", - static_cast(format)); + PanicAlertFmt("Invalid EFB Copy Format {}! (GetEFBCopyBlockHeightInTexels)", format); return 4; } } @@ -247,8 +242,7 @@ TextureFormat TexDecoder_GetEFBCopyBaseFormat(EFBCopyFormat format) case EFBCopyFormat::XFB: return TextureFormat::XFB; default: - PanicAlertFmt("Invalid EFB Copy Format ({:#X})! (GetEFBCopyBaseFormat)", - static_cast(format)); + PanicAlertFmt("Invalid EFB Copy Format {}! (GetEFBCopyBaseFormat)", format); return static_cast(format); } } @@ -259,77 +253,6 @@ void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center) TexFmt_Overlay_Center = center; } -static const char* texfmt[] = { - // pixel - "I4", - "I8", - "IA4", - "IA8", - "RGB565", - "RGB5A3", - "RGBA8", - "0x07", - "C4", - "C8", - "C14X2", - "0x0B", - "0x0C", - "0x0D", - "CMPR", - "0x0F", - // Z-buffer - "0x10", - "Z8", - "0x12", - "Z16", - "0x14", - "0x15", - "Z24X8", - "0x17", - "0x18", - "0x19", - "0x1A", - "0x1B", - "0x1C", - "0x1D", - "0x1E", - "0x1F", - // pixel + copy - "CR4", - "0x21", - "CRA4", - "CRA8", - "0x24", - "0x25", - "CYUVA8", - "CA8", - "CR8", - "CG8", - "CB8", - "CRG8", - "CGB8", - "0x2D", - "0x2E", - "XFB", - // Z + copy - "CZ4", - "0x31", - "0x32", - "0x33", - "0x34", - "0x35", - "0x36", - "0x37", - "0x38", - "CZ8M", - "CZ8L", - "0x3B", - "CZ16L", - "0x3D", - "0x3E", - "0x3F", -}; - static void TexDecoder_DrawOverlay(u8* dst, int width, int height, TextureFormat texformat) { int w = std::min(width, 40); @@ -344,11 +267,11 @@ static void TexDecoder_DrawOverlay(u8* dst, int width, int height, TextureFormat yoff = 0; } - const char* fmt = texfmt[static_cast(texformat) & 15]; - while (*fmt) + const auto fmt_str = fmt::to_string(texformat); + for (char ch : fmt_str) { int xcnt = 0; - int nchar = sfont_map[(int)*fmt]; + int nchar = sfont_map[ch]; const unsigned char* ptr = sfont_raw[nchar]; // each char is up to 9x10 @@ -369,7 +292,6 @@ static void TexDecoder_DrawOverlay(u8* dst, int width, int height, TextureFormat ptr += 9; } xoff += xcnt; - fmt++; } } @@ -707,6 +629,8 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth // We do the inverse BT.601 conversion for YCbCr to RGB // http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion + // TODO: Use more precise numbers for this conversion (although on real hardware, the XFB isn't + // in a real texture format, so does this conversion actually ever happen?) u8 R = std::clamp(int(1.164f * Y + 1.596f * V), 0, 255); u8 G = std::clamp(int(1.164f * Y - 0.392f * U - 0.813f * V), 0, 255); u8 B = std::clamp(int(1.164f * Y + 2.017f * U), 0, 255); @@ -772,6 +696,8 @@ void TexDecoder_DecodeXFB(u8* dst, const u8* src, u32 width, u32 height, u32 str // We do the inverse BT.601 conversion for YCbCr to RGB // http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion + // TODO: Use more precise numbers for this conversion (although on real hardware, the XFB + // isn't in a real texture format, so does this conversion actually ever happen?) u8 R1 = static_cast(std::clamp(int(1.164f * Y1 + 1.596f * V), 0, 255)); u8 G1 = static_cast(std::clamp(int(1.164f * Y1 - 0.392f * U - 0.813f * V), 0, 255)); u8 B1 = static_cast(std::clamp(int(1.164f * Y1 + 2.017f * U), 0, 255)); diff --git a/Source/Core/VideoCommon/TextureDecoder_x64.cpp b/Source/Core/VideoCommon/TextureDecoder_x64.cpp index 761fc0cd64..85a6e5e731 100644 --- a/Source/Core/VideoCommon/TextureDecoder_x64.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_x64.cpp @@ -1495,8 +1495,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, Text break; default: - PanicAlertFmt("Invalid Texture Format ({:#X})! (_TexDecoder_DecodeImpl)", - static_cast(texformat)); + PanicAlertFmt("Invalid Texture Format {}! (_TexDecoder_DecodeImpl)", texformat); break; } } diff --git a/Source/Core/VideoCommon/sfont.inc b/Source/Core/VideoCommon/sfont.inc index d5ddaa7d40..b9883ca058 100644 --- a/Source/Core/VideoCommon/sfont.inc +++ b/Source/Core/VideoCommon/sfont.inc @@ -4,7 +4,7 @@ static const unsigned char sfont_map[] = { 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, - 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,63,64,10,10,10,10,10,10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,10,10,10,10,10, 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, 26,27,28,29,30,31,32,33,34,35,36,10,10,10,10,10, @@ -713,5 +713,27 @@ static const unsigned char sfont_raw[][9*10] = { 0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, }, };