handle address wrap around in texture cache

fixes out of bounds access in Mario 64 also slightly optimise paletted texture conversion
2025-09-12 22:43:07 -06:00 · 2024-10-27 23:32:05 +01:00
parent b60f42b281
commit 58ab33210a
5 changed files with 136 additions and 114 deletions
--- a/src/GPU.h
+++ b/src/GPU.h
@ -499,6 +499,17 @@ public:
        OAMDirty |= 1 << (addr / 1024);
    }

+    template <typename T>
+    inline T ReadVRAMFlat_Texture(u32 addr) const
+    {
+        return *(T*)&VRAMFlat_Texture[addr & 0x7FFFF];
+    }
+    template <typename T>
+    inline T ReadVRAMFlat_TexPal(u32 addr) const
+    {
+        return *(T*)&VRAMFlat_TexPal[addr & 0x1FFFF];
+    }
+
    void SetPowerCnt(u32 val) noexcept;

    void StartFrame() noexcept;
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@ -193,10 +193,10 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
    case 1: // A3I5
        {
            vramaddr += ((t * width) + s);
-            u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
+            u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);

            texpal <<= 4;
-            *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1), gpu);
+            *color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
            *alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
        }
        break;
@ -204,12 +204,12 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
    case 2: // 4-color
        {
            vramaddr += (((t * width) + s) >> 2);
-            u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
+            u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
            pixel >>= ((s & 0x3) << 1);
            pixel &= 0x3;

            texpal <<= 3;
-            *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1), gpu);
+            *color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + (pixel<<1));
            *alpha = (pixel==0) ? alpha0 : 31;
        }
        break;
@ -217,12 +217,12 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
    case 3: // 16-color
        {
            vramaddr += (((t * width) + s) >> 1);
-            u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
+            u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
            if (s & 0x1) pixel >>= 4;
            else         pixel &= 0xF;

            texpal <<= 4;
-            *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1), gpu);
+            *color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + (pixel<<1));
            *alpha = (pixel==0) ? alpha0 : 31;
        }
        break;
@ -230,10 +230,10 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
    case 4: // 256-color
        {
            vramaddr += ((t * width) + s);
-            u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
+            u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);

            texpal <<= 4;
-            *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1), gpu);
+            *color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + (pixel<<1));
            *alpha = (pixel==0) ? alpha0 : 31;
        }
        break;
@ -253,31 +253,31 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
                val = 0;
            else
            {
-                val = ReadVRAM_Texture<u8>(vramaddr, gpu);
+                val = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
                val >>= (2 * (s & 0x3));
            }

-            u16 palinfo = ReadVRAM_Texture<u16>(slot1addr, gpu);
+            u16 palinfo = gpu.ReadVRAMFlat_Texture<u16>(slot1addr);
            u32 paloffset = (palinfo & 0x3FFF) << 2;
            texpal <<= 4;

            switch (val & 0x3)
            {
            case 0:
-                *color = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
+                *color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
                *alpha = 31;
                break;

            case 1:
-                *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
+                *color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);
                *alpha = 31;
                break;

            case 2:
                if ((palinfo >> 14) == 1)
                {
-                    u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
-                    u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
+                    u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
+                    u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);

                    u32 r0 = color0 & 0x001F;
                    u32 g0 = color0 & 0x03E0;
@ -294,8 +294,8 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
                }
                else if ((palinfo >> 14) == 3)
                {
-                    u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
-                    u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
+                    u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
+                    u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);

                    u32 r0 = color0 & 0x001F;
                    u32 g0 = color0 & 0x03E0;
@ -311,20 +311,20 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
                    *color = r | g | b;
                }
                else
-                    *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 4, gpu);
+                    *color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 4);
                *alpha = 31;
                break;

            case 3:
                if ((palinfo >> 14) == 2)
                {
-                    *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 6, gpu);
+                    *color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 6);
                    *alpha = 31;
                }
                else if ((palinfo >> 14) == 3)
                {
-                    u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
-                    u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
+                    u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
+                    u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);

                    u32 r0 = color0 & 0x001F;
                    u32 g0 = color0 & 0x03E0;
@ -353,10 +353,10 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
    case 6: // A5I3
        {
            vramaddr += ((t * width) + s);
-            u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
+            u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);

            texpal <<= 4;
-            *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1), gpu);
+            *color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + ((pixel&0x7)<<1));
            *alpha = (pixel >> 3);
        }
        break;
@ -364,7 +364,7 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
    case 7: // direct color
        {
            vramaddr += (((t * width) + s) << 1);
-            *color = ReadVRAM_Texture<u16>(vramaddr, gpu);
+            *color = gpu.ReadVRAMFlat_Texture<u16>(vramaddr);
            *alpha = (*color & 0x8000) ? 31 : 0;
        }
        break;
@ -1659,8 +1659,8 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
        {
            for (int x = 0; x < 256; x++)
            {
-                u16 val2 = ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1), gpu);
-                u16 val3 = ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1), gpu);
+                u16 val2 = gpu.ReadVRAMFlat_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
+                u16 val3 = gpu.ReadVRAMFlat_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));

                // TODO: confirm color conversion
                u32 r = (val2 << 1) & 0x3E; if (r) r++;
--- a/src/GPU3D_Soft.h
+++ b/src/GPU3D_Soft.h
@ -430,16 +430,6 @@ private:
        s32 ycoverage, ycov_incr;
    };

-    template <typename T>
-    inline T ReadVRAM_Texture(u32 addr, const GPU& gpu) const
-    {
-        return *(T*)&gpu.VRAMFlat_Texture[addr & 0x7FFFF];
-    }
-    template <typename T>
-    inline T ReadVRAM_TexPal(u32 addr, const GPU& gpu) const
-    {
-        return *(T*)&gpu.VRAMFlat_TexPal[addr & 0x1FFFF];
-    }
    u32 AlphaBlend(const GPU3D& gpu3d, u32 srccolor, u32 dstcolor, u32 alpha) const noexcept;

    struct RendererPolygon
--- a/src/GPU3D_Texcache.cpp
+++ b/src/GPU3D_Texcache.cpp
@ -75,11 +75,11 @@ inline u32 ConvertRGB5ToRGB6(u16 val)
 }

 template <int outputFmt>
-void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData)
+void ConvertBitmapTexture(u32 width, u32 height, u32* output, u32 addr, GPU& gpu)
 {
    for (u32 i = 0; i < width*height; i++)
    {
-        u16 value = *(u16*)&texData[i * 2];
+        u16 value = gpu.ReadVRAMFlat_Texture<u16>(addr + i * 2);

        switch (outputFmt)
        {
@ -96,28 +96,28 @@ void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData)
    }
 }

-template void ConvertBitmapTexture<outputFmt_RGB6A5>(u32 width, u32 height, u32* output, u8* texData);
+template void ConvertBitmapTexture<outputFmt_RGB6A5>(u32 width, u32 height, u32* output, u32 addr, GPU& gpu);

 template <int outputFmt>
-void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u8* texAuxData, u16* palData)
+void ConvertCompressedTexture(u32 width, u32 height, u32* output, u32 addr, u32 addrAux, u32 palAddr, GPU& gpu)
 {
    // we process a whole block at the time
    for (int y = 0; y < height / 4; y++)
    {
        for (int x = 0; x < width / 4; x++)
        {
-            u32 data = ((u32*)texData)[x + y * (width / 4)];
-            u16 auxData = ((u16*)texAuxData)[x + y * (width / 4)];
+            u32 data = gpu.ReadVRAMFlat_Texture<u32>(addr + (x + y * (width / 4))*4);
+            u16 auxData = gpu.ReadVRAMFlat_Texture<u16>(addrAux + (x + y * (width / 4))*2);

-            u32 paletteOffset = auxData & 0x3FFF;
-            u16 color0 = palData[paletteOffset*2] | 0x8000;
-            u16 color1 = palData[paletteOffset*2+1] | 0x8000;
-            u16 color2, color3;
+            u32 paletteOffset = palAddr + (auxData & 0x3FFF) * 4;
+            u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset) | 0x8000;
+            u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset+2) | 0x8000;
+            u16 color2 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset+4) | 0x8000;
+            u16 color3 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset+6) | 0x8000;

            switch ((auxData >> 14) & 0x3)
            {
            case 0:
-                color2 = palData[paletteOffset*2+2] | 0x8000;
                color3 = 0;
                break;
            case 1:
@ -137,8 +137,6 @@ void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u
                color3 = 0;
                break;
            case 2:
-                color2 = palData[paletteOffset*2+2] | 0x8000;
-                color3 = palData[paletteOffset*2+3] | 0x8000;
                break;
            case 3:
                {
@ -179,7 +177,8 @@ void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u
            {
                for (int i = 0; i < 4; i++)
                {
-                    u16 color = (packed >> 16 * (data >> 2 * (i + j * 4))) & 0xFFFF;
+                    u32 colorIdx = 16 * ((data >> 2 * (i + j * 4)) & 0x3);
+                    u16 color = (packed >> colorIdx) & 0xFFFF;
                    u32 res;
                    switch (outputFmt)
                    {
@ -197,20 +196,20 @@ void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u
    }
 }

-template void ConvertCompressedTexture<outputFmt_RGB6A5>(u32, u32, u32*, u8*, u8*, u16*);
+template void ConvertCompressedTexture<outputFmt_RGB6A5>(u32, u32, u32*, u32, u32, u32, GPU&);

 template <int outputFmt, int X, int Y>
-void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData)
+void ConvertAXIYTexture(u32 width, u32 height, u32* output, u32 addr, u32 palAddr, GPU& gpu)
 {
    for (int y = 0; y < height; y++)
    {
        for (int x = 0; x < width; x++)
        {
-            u8 val = texData[x + y * width];
+            u8 val = gpu.ReadVRAMFlat_Texture<u8>(addr + x + y * width);

            u32 idx = val & ((1 << Y) - 1);

-            u16 color = palData[idx];
+            u16 color = gpu.ReadVRAMFlat_TexPal<u16>(palAddr + idx * 2);
            u32 alpha = (val >> Y) & ((1 << X) - 1);
            if (X != 5)
                alpha = alpha * 4 + alpha / 2;
@ -228,22 +227,24 @@ void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* pa
    }
 }

-template void ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(u32, u32, u32*, u8*, u16*);
-template void ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(u32, u32, u32*, u8*, u16*);
+template void ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(u32, u32, u32*, u32, u32, GPU&);
+template void ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(u32, u32, u32*, u32, u32, GPU&);

 template <int outputFmt, int colorBits>
-void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent)
+void ConvertNColorsTexture(u32 width, u32 height, u32* output, u32 addr, u32 palAddr, bool color0Transparent, GPU& gpu)
 {
    for (int y = 0; y < height; y++)
    {
-        for (int x = 0; x < width / (8 / colorBits); x++)
+        for (int x = 0; x < width / (16 / colorBits); x++)
        {
-            u8 val = texData[x + y * (width / (8 / colorBits))];
+            // smallest possible row is 8 pixels with 2bpp => fits in u16
+            u16 val = gpu.ReadVRAMFlat_Texture<u16>(addr + 2 * (x + y * (width / (16 / colorBits))));

-            for (int i = 0; i < 8 / colorBits; i++)
+            for (int i = 0; i < 16 / colorBits; i++)
            {
-                u32 index = (val >> (i * colorBits)) & ((1 << colorBits) - 1);
-                u16 color = palData[index];
+                u32 index = val & ((1 << colorBits) - 1);
+                val >>= colorBits;
+                u16 color = gpu.ReadVRAMFlat_TexPal<u16>(palAddr + index * 2);

                bool transparent = color0Transparent && index == 0;
                u32 res;
@ -256,14 +257,14 @@ void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16*
                case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color)
                    | (transparent ? 0 : 0xFF000000); break;
                }
-                output[x * (8 / colorBits) + y * width + i] = res;
+                output[x * (16 / colorBits) + y * width + i] = res;
            }
        }
    }
 }

-template void ConvertNColorsTexture<outputFmt_RGB6A5, 2>(u32, u32, u32*, u8*, u16*, bool);
-template void ConvertNColorsTexture<outputFmt_RGB6A5, 4>(u32, u32, u32*, u8*, u16*, bool);
-template void ConvertNColorsTexture<outputFmt_RGB6A5, 8>(u32, u32, u32*, u8*, u16*, bool);
+template void ConvertNColorsTexture<outputFmt_RGB6A5, 2>(u32, u32, u32*, u32, u32, bool, GPU&);
+template void ConvertNColorsTexture<outputFmt_RGB6A5, 4>(u32, u32, u32*, u32, u32, bool, GPU&);
+template void ConvertNColorsTexture<outputFmt_RGB6A5, 8>(u32, u32, u32*, u32, u32, bool, GPU&);

 }
--- a/src/GPU3D_Texcache.h
+++ b/src/GPU3D_Texcache.h
@ -32,13 +32,13 @@ enum
 };

 template <int outputFmt>
-void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData);
+void ConvertBitmapTexture(u32 width, u32 height, u32* output, u32 addr, GPU& gpu);
 template <int outputFmt>
-void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u8* texAuxData, u16* palData);
+void ConvertCompressedTexture(u32 width, u32 height, u32* output, u32 addr, u32 addrAux, u32 palAddr, GPU& gpu);
 template <int outputFmt, int X, int Y>
-void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData);
+void ConvertAXIYTexture(u32 width, u32 height, u32* output, u32 addr, u32 palAddr, GPU& gpu);
 template <int outputFmt, int colorBits>
-void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent);
+void ConvertNColorsTexture(u32 width, u32 height, u32* output, u32 addr, u32 palAddr, bool color0Transparent, GPU& gpu);

 template <typename TexLoaderT, typename TexHandleT>
 class Texcache
@ -48,6 +48,50 @@ public:
        : TexLoader(texloader) // probably better if this would be a move constructor???
    {}

+    u64 MaskedHash(u8* vram, u32 vramSize, u32 addr, u32 size)
+    {
+        u64 hash = 0;
+
+        while (size > 0)
+        {
+            u32 pieceSize;
+            if (addr + size > vramSize)
+                // wraps around, only do the part inside
+                pieceSize = vramSize - addr;
+            else
+                // fits completely inside
+                pieceSize = size;
+
+            hash = XXH64(&vram[addr], pieceSize, hash);
+
+            addr += pieceSize;
+            addr &= (vramSize - 1);
+            assert(size >= pieceSize);
+            size -= pieceSize;
+        }
+
+        return hash;
+    }
+
+    bool CheckInvalid(u32 start, u32 size, u64 oldHash, u64* dirty, u8* vram, u32 vramSize)
+    {
+        u32 startBit = start / VRAMDirtyGranularity;
+        u32 bitsCount = ((start + size + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
+    
+        u32 startEntry = startBit >> 6;
+        u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
+        for (u32 j = startEntry; j < startEntry + entriesCount; j++)
+        {
+            if (GetRangedBitMask(j, startBit, bitsCount) & dirty[j & ((vramSize / VRAMDirtyGranularity)-1)])
+            {
+                if (MaskedHash(vram, vramSize, start, size) != oldHash)
+                    return true;
+            }
+        }
+
+        return false;
+    }
+
    bool Update(GPU& gpu)
    {
        auto textureDirty = gpu.VRAMDirty_Texture.DeriveState(gpu.VRAMMap_Texture, gpu);
@ -66,40 +110,21 @@ public:
                {
                    for (u32 i = 0; i < 2; i++)
                    {
-                        u32 startBit = entry.TextureRAMStart[i] / VRAMDirtyGranularity;
-                        u32 bitsCount = ((entry.TextureRAMStart[i] + entry.TextureRAMSize[i] + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
-
-                        u32 startEntry = startBit >> 6;
-                        u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
-                        for (u32 j = startEntry; j < startEntry + entriesCount; j++)
-                        {
-                            if (GetRangedBitMask(j, startBit, bitsCount) & textureDirty.Data[j])
-                            {
-                                u64 newTexHash = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
-
-                                if (newTexHash != entry.TextureHash[i])
-                                    goto invalidate;
-                            }
-                        }
+                        if (CheckInvalid(entry.TextureRAMStart[i], entry.TextureRAMSize[i],
+                                entry.TextureHash[i],
+                                textureDirty.Data,
+                                gpu.VRAMFlat_Texture, sizeof(gpu.VRAMFlat_Texture)))
+                            goto invalidate;
                    }
                }

                if (texPalChanged && entry.TexPalSize > 0)
                {
-                    u32 startBit = entry.TexPalStart / VRAMDirtyGranularity;
-                    u32 bitsCount = ((entry.TexPalStart + entry.TexPalSize + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
-
-                    u32 startEntry = startBit >> 6;
-                    u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
-                    for (u32 j = startEntry; j < startEntry + entriesCount; j++)
-                    {
-                        if (GetRangedBitMask(j, startBit, bitsCount) & texPalDirty.Data[j])
-                        {
-                            u64 newPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
-                            if (newPalHash != entry.TexPalHash)
-                                goto invalidate;
-                        }
-                    }
+                    if (CheckInvalid(entry.TexPalStart, entry.TexPalSize,
+                            entry.TexPalHash,
+                            texPalDirty.Data,
+                            gpu.VRAMFlat_TexPal, sizeof(gpu.VRAMFlat_TexPal)))
+                        goto invalidate;
                }

                it++;
@ -163,17 +188,13 @@ public:
        {
            entry.TextureRAMSize[0] = width*height*2;

-            ConvertBitmapTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, &gpu.VRAMFlat_Texture[addr]);
+            ConvertBitmapTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, addr, gpu);
        }
        else if (fmt == 5)
        {
-            u8* texData = &gpu.VRAMFlat_Texture[addr];
            u32 slot1addr = 0x20000 + ((addr & 0x1FFFC) >> 1);
            if (addr >= 0x40000)
                slot1addr += 0x10000;
-            u8* texAuxData = &gpu.VRAMFlat_Texture[slot1addr];
-
-            u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palBase*16);

            entry.TextureRAMSize[0] = width*height/16*4;
            entry.TextureRAMStart[1] = slot1addr;
@ -181,7 +202,7 @@ public:
            entry.TexPalStart = palBase*16;
            entry.TexPalSize = 0x10000;

-            ConvertCompressedTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, texData, texAuxData, palData);
+            ConvertCompressedTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, addr, slot1addr, entry.TexPalStart, gpu);
        }
        else
        {
@ -204,30 +225,29 @@ public:
            entry.TexPalStart = palAddr;
            entry.TexPalSize = numPalEntries*2;

-            u8* texData = &gpu.VRAMFlat_Texture[addr];
-            u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palAddr);
-
            //assert(entry.TexPalStart+entry.TexPalSize <= 128*1024*1024);

            bool color0Transparent = texParam & (1 << 29);

            switch (fmt)
            {
-            case 1: ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(width, height, DecodingBuffer, texData, palData); break;
-            case 6: ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(width, height, DecodingBuffer, texData, palData); break;
-            case 2: ConvertNColorsTexture<outputFmt_RGB6A5, 2>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
-            case 3: ConvertNColorsTexture<outputFmt_RGB6A5, 4>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
-            case 4: ConvertNColorsTexture<outputFmt_RGB6A5, 8>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
+            case 1: ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(width, height, DecodingBuffer, addr, palAddr, gpu); break;
+            case 6: ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(width, height, DecodingBuffer, addr, palAddr, gpu); break;
+            case 2: ConvertNColorsTexture<outputFmt_RGB6A5, 2>(width, height, DecodingBuffer, addr, palAddr, color0Transparent, gpu); break;
+            case 3: ConvertNColorsTexture<outputFmt_RGB6A5, 4>(width, height, DecodingBuffer, addr, palAddr, color0Transparent, gpu); break;
+            case 4: ConvertNColorsTexture<outputFmt_RGB6A5, 8>(width, height, DecodingBuffer, addr, palAddr, color0Transparent, gpu); break;
            }
        }

        for (int i = 0; i < 2; i++)
        {
            if (entry.TextureRAMSize[i])
-                entry.TextureHash[i] = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
+                entry.TextureHash[i] = MaskedHash(gpu.VRAMFlat_Texture, sizeof(gpu.VRAMFlat_Texture),
+                    entry.TextureRAMStart[i], entry.TextureRAMSize[i]);
        }
        if (entry.TexPalSize)
-            entry.TexPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
+            entry.TexPalHash = MaskedHash(gpu.VRAMFlat_TexPal, sizeof(gpu.VRAMFlat_TexPal),
+                entry.TexPalStart, entry.TexPalSize);

        auto& texArrays = TexArrays[widthLog2][heightLog2];
        auto& freeTextures = FreeTextures[widthLog2][heightLog2];