Merge pull request #9921 from Pokechu22/non-power-of-2-wrap

Software: Handle texture wrapping more accurately
This commit is contained in:
Tilka
2021-07-25 05:08:11 +01:00
committed by GitHub
3 changed files with 49 additions and 38 deletions

View File

@ -18,29 +18,36 @@
namespace TextureSampler namespace TextureSampler
{ {
static inline void WrapCoord(int* coordp, WrapMode wrapMode, int imageSize) static inline void WrapCoord(int* coordp, WrapMode wrap_mode, int image_size)
{ {
int coord = *coordp; int coord = *coordp;
switch (wrapMode) switch (wrap_mode)
{ {
case WrapMode::Clamp: case WrapMode::Clamp:
coord = (coord > imageSize) ? imageSize : (coord < 0) ? 0 : coord; coord = std::clamp(coord, 0, image_size - 1);
break; break;
case WrapMode::Repeat: case WrapMode::Repeat:
coord = coord % (imageSize + 1); // Per YAGCD's info on TX_SETMODE1_I0 (et al.), mirror "requires the texture size to be a power
coord = (coord < 0) ? imageSize + coord : coord; // of two. (wrapping is implemented by a logical AND (SIZE-1))". So though this doesn't wrap
// nicely for non-power-of-2 sizes, that's how hardware does it.
coord = coord & (image_size - 1);
break; break;
case WrapMode::Mirror: case WrapMode::Mirror:
{ {
const int sizePlus1 = imageSize + 1; // YAGCD doesn't mention this, but this seems to be the check used to implement mirroring.
const int div = coord / sizePlus1; // With power-of-2 sizes, this correctly checks if it's an even-numbered repeat or an
coord = coord - (div * sizePlus1); // odd-numbered one, and thus can decide whether to reflect. It fails in unusual ways
coord = (coord < 0) ? -coord : coord; // with non-power-of-2 sizes, but seems to match what happens on actual hardware.
coord = (div & 1) ? imageSize - coord : coord; if ((coord & image_size) != 0)
coord = ~coord;
coord = coord & (image_size - 1);
break; break;
} }
default: default:
PanicAlertFmt("Invalid wrap mode: {}", wrapMode); // Hardware testing indicates that wrap_mode set to 3 behaves the same as clamp.
PanicAlertFmt("Invalid wrap mode: {}", wrap_mode);
coord = std::clamp(coord, 0, image_size - 1);
break;
} }
*coordp = coord; *coordp = coord;
} }
@ -131,8 +138,8 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
imageSrc = Memory::GetPointer(imageBase); imageSrc = Memory::GetPointer(imageBase);
} }
int imageWidth = ti0.width; int image_width_minus_1 = ti0.width;
int imageHeight = ti0.height; int image_height_minus_1 = ti0.height;
const int tlutAddress = texTlut.tmem_offset << 9; const int tlutAddress = texTlut.tmem_offset << 9;
const u8* tlut = &texMem[tlutAddress]; const u8* tlut = &texMem[tlutAddress];
@ -141,15 +148,15 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
// move texture pointer to mip location // move texture pointer to mip location
if (mip) if (mip)
{ {
int mipWidth = imageWidth + 1; int mipWidth = image_width_minus_1 + 1;
int mipHeight = imageHeight + 1; int mipHeight = image_height_minus_1 + 1;
const int fmtWidth = TexDecoder_GetBlockWidthInTexels(texfmt); const int fmtWidth = TexDecoder_GetBlockWidthInTexels(texfmt);
const int fmtHeight = TexDecoder_GetBlockHeightInTexels(texfmt); const int fmtHeight = TexDecoder_GetBlockHeightInTexels(texfmt);
const int fmtDepth = TexDecoder_GetTexelSizeInNibbles(texfmt); const int fmtDepth = TexDecoder_GetTexelSizeInNibbles(texfmt);
imageWidth >>= mip; image_width_minus_1 >>= mip;
imageHeight >>= mip; image_height_minus_1 >>= mip;
s >>= mip; s >>= mip;
t >>= mip; t >>= mip;
@ -186,45 +193,45 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
u8 sampledTex[4]; u8 sampledTex[4];
u32 texel[4]; u32 texel[4];
WrapCoord(&imageS, tm0.wrap_s, imageWidth); WrapCoord(&imageS, tm0.wrap_s, image_width_minus_1 + 1);
WrapCoord(&imageT, tm0.wrap_t, imageHeight); WrapCoord(&imageT, tm0.wrap_t, image_height_minus_1 + 1);
WrapCoord(&imageSPlus1, tm0.wrap_s, imageWidth); WrapCoord(&imageSPlus1, tm0.wrap_s, image_width_minus_1 + 1);
WrapCoord(&imageTPlus1, tm0.wrap_t, imageHeight); WrapCoord(&imageTPlus1, tm0.wrap_t, image_height_minus_1 + 1);
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed)) if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed))
{ {
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, texfmt, tlut, TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, image_width_minus_1, texfmt,
tlutfmt); tlut, tlutfmt);
SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT)); SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, texfmt, tlut, TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, image_width_minus_1, texfmt,
tlutfmt); tlut, tlutfmt);
AddTexel(sampledTex, texel, (fractS) * (128 - fractT)); AddTexel(sampledTex, texel, (fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, texfmt, tlut, TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, image_width_minus_1, texfmt,
tlutfmt); tlut, tlutfmt);
AddTexel(sampledTex, texel, (128 - fractS) * (fractT)); AddTexel(sampledTex, texel, (128 - fractS) * (fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, texfmt, TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, image_width_minus_1,
tlut, tlutfmt); texfmt, tlut, tlutfmt);
AddTexel(sampledTex, texel, (fractS) * (fractT)); AddTexel(sampledTex, texel, (fractS) * (fractT));
} }
else else
{ {
TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageS, imageT, TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageS, imageT,
imageWidth); image_width_minus_1);
SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT)); SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT));
TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageSPlus1, imageT, TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageSPlus1, imageT,
imageWidth); image_width_minus_1);
AddTexel(sampledTex, texel, (fractS) * (128 - fractT)); AddTexel(sampledTex, texel, (fractS) * (128 - fractT));
TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageS, imageTPlus1, TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageS, imageTPlus1,
imageWidth); image_width_minus_1);
AddTexel(sampledTex, texel, (128 - fractS) * (fractT)); AddTexel(sampledTex, texel, (128 - fractS) * (fractT));
TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageSPlus1, TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageSPlus1,
imageTPlus1, imageWidth); imageTPlus1, image_width_minus_1);
AddTexel(sampledTex, texel, (fractS) * (fractT)); AddTexel(sampledTex, texel, (fractS) * (fractT));
} }
@ -240,14 +247,15 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
int imageT = t >> 7; int imageT = t >> 7;
// nearest neighbor sampling // nearest neighbor sampling
WrapCoord(&imageS, tm0.wrap_s, imageWidth); WrapCoord(&imageS, tm0.wrap_s, image_width_minus_1 + 1);
WrapCoord(&imageT, tm0.wrap_t, imageHeight); WrapCoord(&imageT, tm0.wrap_t, image_height_minus_1 + 1);
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed)) if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed))
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, texfmt, tlut, tlutfmt); TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, image_width_minus_1, texfmt, tlut,
tlutfmt);
else else
TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT, TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT,
imageWidth); image_width_minus_1);
} }
} }
} // namespace TextureSampler } // namespace TextureSampler

View File

@ -713,6 +713,8 @@ enum class WrapMode : u32
Clamp = 0, Clamp = 0,
Repeat = 1, Repeat = 1,
Mirror = 2, Mirror = 2,
// Hardware testing indicates that WrapMode set to 3 behaves the same as clamp, though this is an
// invalid value
}; };
template <> template <>
struct fmt::formatter<WrapMode> : EnumFormatter<WrapMode::Mirror> struct fmt::formatter<WrapMode> : EnumFormatter<WrapMode::Mirror>

View File

@ -240,8 +240,9 @@ void SamplerState::Generate(const BPMemory& bp, u32 index)
lod_bias = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm0.lod_bias * (256 / 32) : 0; lod_bias = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm0.lod_bias * (256 / 32) : 0;
// Address modes // Address modes
// Hardware testing indicates that wrap_mode set to 3 behaves the same as clamp.
static constexpr std::array<AddressMode, 4> address_modes = { static constexpr std::array<AddressMode, 4> address_modes = {
{AddressMode::Clamp, AddressMode::Repeat, AddressMode::MirroredRepeat, AddressMode::Repeat}}; {AddressMode::Clamp, AddressMode::Repeat, AddressMode::MirroredRepeat, AddressMode::Clamp}};
wrap_u = address_modes[u32(tm0.wrap_s.Value())]; wrap_u = address_modes[u32(tm0.wrap_s.Value())];
wrap_v = address_modes[u32(tm0.wrap_t.Value())]; wrap_v = address_modes[u32(tm0.wrap_t.Value())];
anisotropic_filtering = 0; anisotropic_filtering = 0;