Merge remote-tracking branch 'origin/master' into doublemelon

# Conflicts: # src/frontend/qt_sdl/Config.cpp # src/frontend/qt_sdl/EmuThread.cpp # src/frontend/qt_sdl/EmuThread.h # src/frontend/qt_sdl/Screen.cpp # src/frontend/qt_sdl/main.cpp
2024-11-14 13:27:41 -07:00 · 2024-05-20 00:50:25 +02:00 · 2024-05-20 00:50:25 +02:00 · d75c46c9f3
commit d75c46c9f3
parent 8bd50bb8b3 a72b79a55a
198 changed files with 26938 additions and 376 deletions
--- a/.github/workflows/build-ubuntu.yml
+++ b/.github/workflows/build-ubuntu.yml
@ -21,7 +21,7 @@ jobs:
        sudo rm -f /etc/apt/sources.list.d/dotnetdev.list /etc/apt/sources.list.d/microsoft-prod.list
        sudo apt update
        sudo apt install --allow-downgrades cmake ninja-build extra-cmake-modules libpcap0.8-dev libsdl2-dev \
-          qt6-{base,base-private,multimedia}-dev libslirp0 libslirp-dev libarchive-dev libzstd-dev libfuse2
+          qt6-{base,base-private,multimedia}-dev libarchive-dev libzstd-dev libfuse2
    - name: Configure
      run: cmake -B build -G Ninja -DUSE_QT6=ON -DCMAKE_INSTALL_PREFIX=/usr
    - name: Build
@ -63,7 +63,7 @@ jobs:
          apt update
          apt -y full-upgrade
          apt -y install git {gcc-12,g++-12}-aarch64-linux-gnu cmake ninja-build extra-cmake-modules \
-            {libsdl2,qt6-{base,base-private,multimedia},libslirp,libarchive,libzstd}-dev:arm64 \
+            {libsdl2,qt6-{base,base-private,multimedia},libarchive,libzstd}-dev:arm64 \
            pkg-config dpkg-dev
      - name: Check out source
        uses: actions/checkout@v4
--- a/.github/workflows/build-windows.yml
+++ b/.github/workflows/build-windows.yml
@ -27,7 +27,7 @@ jobs:
          update: true

    - name: Install dependencies
-      run: pacman -Sq --noconfirm git pkgconf mingw-w64-x86_64-{cmake,SDL2,qt5-static,libslirp,libarchive,toolchain}
+      run: pacman -Sq --noconfirm git pkgconf mingw-w64-x86_64-{cmake,SDL2,qt5-static,libarchive,toolchain}

    - name: Configure
      working-directory: ${{runner.workspace}}
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -8,6 +8,7 @@ endif()
 set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)

 set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
+set(CMAKE_USER_MAKE_RULES_OVERRIDE "${CMAKE_SOURCE_DIR}/cmake/DefaultBuildFlags.cmake")

 option(USE_VCPKG "Use vcpkg for dependency packages" OFF)
 if (USE_VCPKG)
@ -25,6 +26,8 @@ include(CheckLibraryExists)
 include(CMakeDependentOption)
 include(CheckIPOSupported)

+include(SetupCCache)
+
 set(CMAKE_OSX_DEPLOYMENT_TARGET "10.15" CACHE STRING "Minimum OS X deployment version")

 set(CMAKE_C_STANDARD 11)
@ -33,8 +36,6 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})

-add_compile_definitions(MELONDS_VERSION="${melonDS_VERSION}")
-
 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
    set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
@ -78,14 +79,6 @@ if (ENABLE_LTO)
    set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 endif()

-if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
-    set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Og")
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og")
-endif()
-
-string(REPLACE "-O2" "-O3" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
-string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
-
 if (NOT APPLE)
    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -s")
 endif()
@ -100,13 +93,6 @@ endif()

 set(CMAKE_POSITION_INDEPENDENT_CODE ON)

-find_program(CCACHE "ccache")
-if (CCACHE)
-    message(STATUS "Using CCache to speed up compilation")
-    set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE})
-    set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE})
-endif()
-
 option(ENABLE_GDBSTUB "Enable GDB stub" ON)
 if (ENABLE_GDBSTUB)
    add_definitions(-DGDBSTUB_ENABLED)
--- a/README.md
+++ b/README.md
@ -6,10 +6,9 @@
 <a href="https://www.gnu.org/licenses/gpl-3.0" alt="License: GPLv3"><img src="https://img.shields.io/badge/License-GPL%20v3-%23ff554d.svg"></a>
 <a href="https://kiwiirc.com/client/irc.badnik.net/?nick=IRC-Source_?#melonds" alt="IRC channel: #melonds"><img src="https://img.shields.io/badge/IRC%20chat-%23melonds-%23dd2e44.svg"></a>
 <br>
-<a href="https://github.com/melonDS-emu/melonDS/actions?query=workflow%3A%22CMake+Build+%28Windows+x86-64%29%22+event%3Apush"><img src="https://img.shields.io/github/actions/workflow/status/melonDS-emu/melonDS/build-windows.yml?label=Windows%20x86-64&logo=GitHub&branch=master"></img></a>
-<a href="https://github.com/melonDS-emu/melonDS/actions?query=workflow%3A%22CMake+Build+%28Ubuntu+x86-64%29%22+event%3Apush"><img src="https://img.shields.io/github/actions/workflow/status/melonDS-emu/melonDS/build-ubuntu.yml?label=Linux%20x86-64&logo=GitHub"></img></a>
-<a href="https://github.com/melonDS-emu/melonDS/actions?query=workflow%3A%22CMake+Build+%28Ubuntu+aarch64%29%22+event%3Apush"><img src="https://img.shields.io/github/actions/workflow/status/melonDS-emu/melonDS/build-ubuntu-aarch64.yml?label=Linux%20ARM64&logo=GitHub"></img></a>
-<a href="https://github.com/melonDS-emu/melonDS/actions/workflows/build-macos-universal.yml?query=event%3Apush"><img src="https://img.shields.io/github/actions/workflow/status/melonDS-emu/melonDS/build-macos.yml?label=macOS%20Universal&logo=GitHub"></img></a>
+<a href="https://github.com/melonDS-emu/melonDS/actions/workflows/build-windows.yml?query=event%3Apush"><img src="https://github.com/melonDS-emu/melonDS/actions/workflows/build-windows.yml/badge.svg" /></a>
+<a href="https://github.com/melonDS-emu/melonDS/actions/workflows/build-ubuntu.yml?query=event%3Apush"><img src="https://github.com/melonDS-emu/melonDS/actions/workflows/build-ubuntu.yml/badge.svg" /></a>
+<a href="https://github.com/melonDS-emu/melonDS/actions/workflows/build-macos.yml?query=event%3Apush"><img src="https://github.com/melonDS-emu/melonDS/actions/workflows/build-macos.yml/badge.svg" /></a>
 </p>
 DS emulator, sorta

@ -35,9 +34,9 @@ As for the rest, the interface should be pretty straightforward. If you have a q

 ### Linux
 1. Install dependencies:
-   * Ubuntu 22.04: `sudo apt install cmake extra-cmake-modules libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qtbase5-dev qtbase5-private-dev qtmultimedia5-dev libslirp-dev libarchive-dev libzstd-dev`
-   * Older Ubuntu: `sudo apt install cmake extra-cmake-modules libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qt5-default qtbase5-private-dev qtmultimedia5-dev libslirp-dev libarchive-dev libzstd-dev`
-   * Arch Linux: `sudo pacman -S base-devel cmake extra-cmake-modules git libpcap sdl2 qt5-base qt5-multimedia libslirp libarchive zstd`
+   * Ubuntu 22.04: `sudo apt install cmake extra-cmake-modules libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qtbase5-dev qtbase5-private-dev qtmultimedia5-dev libarchive-dev libzstd-dev`
+   * Older Ubuntu: `sudo apt install cmake extra-cmake-modules libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qt5-default qtbase5-private-dev qtmultimedia5-dev libarchive-dev libzstd-dev`
+   * Arch Linux: `sudo pacman -S base-devel cmake extra-cmake-modules git libpcap sdl2 qt5-base qt5-multimedia libarchive zstd`
 3. Download the melonDS repository and prepare:
   ```bash
   git clone https://github.com/melonDS-emu/melonDS
@ -64,7 +63,7 @@ As for the rest, the interface should be pretty straightforward. If you have a q
   cd melonDS
   ```
 #### Dynamic builds (with DLLs)
-5. Install dependencies: `pacman -S mingw-w64-x86_64-{cmake,SDL2,toolchain,qt5-base,qt5-svg,qt5-multimedia,qt5-tools,libslirp,libarchive,zstd}`
+5. Install dependencies: `pacman -S mingw-w64-x86_64-{cmake,SDL2,toolchain,qt5-base,qt5-svg,qt5-multimedia,qt5-tools,libarchive,zstd}`
 6. Compile:
   ```bash
   cmake -B build
@ -75,7 +74,7 @@ As for the rest, the interface should be pretty straightforward. If you have a q
 If everything went well, melonDS and the libraries it needs should now be in the `dist` folder.

 #### Static builds (without DLLs, standalone executable)
-5. Install dependencies: `pacman -S mingw-w64-x86_64-{cmake,SDL2,toolchain,qt5-static,libslirp,libarchive,zstd}`
+5. Install dependencies: `pacman -S mingw-w64-x86_64-{cmake,SDL2,toolchain,qt5-static,libarchive,zstd}`
 6. Compile:
   ```bash
   cmake -B build -DBUILD_STATIC=ON -DCMAKE_PREFIX_PATH=/mingw64/qt5-static
@ -85,7 +84,7 @@ If everything went well, melonDS should now be in the `build` folder.

 ### macOS
 1. Install the [Homebrew Package Manager](https://brew.sh)
-2. Install dependencies: `brew install git pkg-config cmake sdl2 qt@6 libslirp libarchive zstd`
+2. Install dependencies: `brew install git pkg-config cmake sdl2 qt@6 libarchive zstd`
 3. Download the melonDS repository and prepare:
   ```zsh
   git clone https://github.com/melonDS-emu/melonDS
--- a/cmake/DefaultBuildFlags.cmake
+++ b/cmake/DefaultBuildFlags.cmake
@ -0,0 +1,9 @@
+if (CMAKE_C_COMPILER_ID STREQUAL GNU)
+	set(CMAKE_C_FLAGS_DEBUG_INIT "-g -Og")
+endif()
+if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
+	set(CMAKE_CXX_FLAGS_DEBUG_INIT "-g -Og")
+endif()
+
+string(REPLACE "-O2" "-O3" CMAKE_C_FLAGS_RELEASE_INIT "${CMAKE_C_FLAGS_RELEASE_INIT}")
+string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS_RELEASE_INIT "${CMAKE_CXX_FLAGS_RELEASE_INIT}")
--- a/cmake/FixInterfaceIncludes.cmake
+++ b/cmake/FixInterfaceIncludes.cmake
@ -19,6 +19,13 @@ function(fix_interface_includes)
            if (PARENT_DIR MATCHES "include$")
                list(APPEND NEW_DIRS "${PARENT_DIR}")
            endif()
+
+            # HACK
+            # The libarchive pkg-config file in MSYS2 seems to include a UNIX-style path for its
+            # include directory and CMake doesn't like that.
+            if (WIN32 AND MINGW AND target STREQUAL PkgConfig::LibArchive)
+                list(FILTER DIRS EXCLUDE REGEX "^/[^.]+64/.*")
+            endif()
        endforeach()

        list(APPEND DIRS ${NEW_DIRS})
--- a/cmake/SetupCCache.cmake
+++ b/cmake/SetupCCache.cmake
@ -0,0 +1,19 @@
+include(FindPackageMessage)
+
+find_program(CCACHE "ccache")
+
+cmake_dependent_option(USE_CCACHE "Use CCache to speed up repeated builds." ON CCACHE OFF)
+
+if (NOT CCACHE OR NOT USE_CCACHE)
+    return()
+endif()
+
+# Fedora, and probably also Red Hat-based distros in general, use CCache by default if it's installed on the system.
+# We'll try to detect this here, and exit if that's the case.
+# Trying to launch ccache with ccache as we'd otherwise do seems to cause build issues.
+if (CMAKE_C_COMPILER MATCHES "ccache" OR CMAKE_CXX_COMPILER MATCHES "ccache")
+    return()
+endif()
+
+find_package_message(CCache "Using CCache to speed up compilation" "${USE_CCACHE}")
+set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE}")
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -35,6 +35,8 @@ add_library(core STATIC
    GPU2D_Soft.cpp
    GPU3D.cpp
    GPU3D_Soft.cpp
+    GPU3D_Texcache.cpp
+    GPU3D_Texcache.h
    melonDLDI.h
    NDS.cpp
    NDSCart.cpp
@ -52,7 +54,6 @@ add_library(core STATIC
    types.h
    Utils.cpp
    Utils.h
-    version.h
    Wifi.cpp
    WifiAP.cpp

@ -79,6 +80,9 @@ if (ENABLE_OGLRENDERER)
        GPU_OpenGL.cpp
        GPU_OpenGL_shaders.h
        GPU3D_OpenGL.cpp
+        GPU3D_Compute.cpp
+        GPU3D_TexcacheOpenGL.cpp
+        GPU3D_TexcacheOpenGL.h
        GPU3D_OpenGL_shaders.h
        OpenGLSupport.cpp)

@ -123,6 +127,12 @@ if (ENABLE_JIT)
    endif()
 endif()

+set(MELONDS_VERSION_SUFFIX "$ENV{MELONDS_VERSION_SUFFIX}" CACHE STRING "Suffix to add to displayed melonDS version")
+
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/version.h.in" "${CMAKE_CURRENT_BINARY_DIR}/version.h")
+target_sources(core PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/version.h")
+target_include_directories(core PUBLIC "${CMAKE_CURRENT_BINARY_DIR}")
+
 add_subdirectory(teakra EXCLUDE_FROM_ALL)
 # Workaround for building teakra with -O0 on Windows either failing or hanging forever
 target_compile_options(teakra PRIVATE "$<$<CONFIG:DEBUG>:-Og>")
--- a/src/CP15.cpp
+++ b/src/CP15.cpp
@ -186,10 +186,14 @@ void ARMv5::UpdatePURegion(u32 n)
        return;
    }

-    u32 start = rgn >> 12;
-    u32 sz = 2 << ((rgn >> 1) & 0x1F);
-    u32 end = start + (sz >> 12);
-    // TODO: check alignment of start
+    // notes:
+    // * min size of a pu region is 4KiB (12 bits)
+    // * size is calculated as size + 1, but the 12 lsb of address space are ignored, therefore we need it as size + 1 - 12, or size - 11
+    // * pu regions are aligned based on their size
+    u32 size = std::max((int)((rgn>>1) & 0x1F) - 11, 0); // obtain the size, subtract 11 and clamp to a min of 0.
+    u32 start = ((rgn >> 12) >> size) << size; // determine the start offset, and use shifts to force alignment with a multiple of the size.
+    u32 end = start + (1<<size); // add 1 left shifted by size to start to determine end point
+    // dont need to bounds check the end point because the force alignment inherently prevents it from breaking

    u8 usermask = 0;
    u8 privmask = 0;
@ -239,7 +243,7 @@ void ARMv5::UpdatePURegion(u32 n)
        "PU region %d: %08X-%08X, user=%02X priv=%02X, %08X/%08X\n",
        n,
        start << 12,
-        end << 12,
+        (end << 12) - 1,
        usermask,
        privmask,
        PU_DataRW,
@ -579,12 +583,12 @@ void ARMv5::CP15Write(u32 id, u32 val)

        std::snprintf(log_output,
                 sizeof(log_output),
-                 "PU: region %d = %08X : %s, %08X-%08X\n",
+                 "PU: region %d = %08X : %s, start: %08X size: %02X\n",
                 (id >> 4) & 0xF,
                 val,
                 val & 1 ? "enabled" : "disabled",
                 val & 0xFFFFF000,
-                 (val & 0xFFFFF000) + (2 << ((val & 0x3E) >> 1))
+                 (val & 0x3E) >> 1
        );
        Log(LogLevel::Debug, "%s", log_output);
        // Some implementations of Log imply a newline, so we build up the line before printing it
--- a/src/DMA.cpp
+++ b/src/DMA.cpp
@ -21,6 +21,7 @@
 #include "DSi.h"
 #include "DMA.h"
 #include "GPU.h"
+#include "GPU3D.h"
 #include "DMA_Timings.h"
 #include "Platform.h"

--- a/src/DSi_NDMA.cpp
+++ b/src/DSi_NDMA.cpp
@ -22,6 +22,7 @@
 #include "DSi_NDMA.h"
 #include "GPU.h"
 #include "DSi_AES.h"
+#include "GPU3D.h"

 namespace melonDS
 {
--- a/src/GPU.cpp
+++ b/src/GPU.cpp
@ -23,7 +23,7 @@
 #include "ARMJIT.h"

 #include "GPU2D_Soft.h"
-#include "GPU3D_Soft.h"
+#include "GPU3D.h"

 namespace melonDS
 {
--- a/src/GPU2D.cpp
+++ b/src/GPU2D.cpp
@ -20,6 +20,7 @@
 #include <string.h>
 #include "NDS.h"
 #include "GPU.h"
+#include "GPU3D.h"

 namespace melonDS
 {
--- a/src/GPU2D_Soft.cpp
+++ b/src/GPU2D_Soft.cpp
@ -18,7 +18,7 @@

 #include "GPU2D_Soft.h"
 #include "GPU.h"
-#include "GPU3D_OpenGL.h"
+#include "GPU3D.h"

 namespace melonDS
 {
--- a/src/GPU3D.cpp
+++ b/src/GPU3D.cpp
@ -24,6 +24,7 @@
 #include "FIFO.h"
 #include "GPU3D_Soft.h"
 #include "Platform.h"
+#include "GPU3D.h"

 namespace melonDS
 {
--- a/src/GPU3D.h
+++ b/src/GPU3D.h
@ -349,7 +349,14 @@ public:
    virtual void RestartFrame(GPU& gpu) {};
    virtual u32* GetLine(int line) = 0;
    virtual void Blit(const GPU& gpu) {};
+
+    virtual void SetupAccelFrame() {}
    virtual void PrepareCaptureFrame() {}
+    virtual void BindOutputTexture(int buffer) {}
+
+    virtual bool NeedsShaderCompile() { return false; }
+    virtual void ShaderCompileStep(int& current, int& count) {}
+
 protected:
    Renderer3D(bool Accelerated);
 };
--- a/src/GPU3D_Compute.cpp
+++ b/src/GPU3D_Compute.cpp
--- a/src/GPU3D_Compute.h
+++ b/src/GPU3D_Compute.h
@ -0,0 +1,242 @@
+/*
+    Copyright 2016-2022 melonDS team
+
+    This file is part of melonDS.
+
+    melonDS is free software: you can redistribute it and/or modify it under
+    the terms of the GNU General Public License as published by the Free
+    Software Foundation, either version 3 of the License, or (at your option)
+    any later version.
+
+    melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+    FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef GPU3D_COMPUTE
+#define GPU3D_COMPUTE
+
+#include <memory>
+
+#include "types.h"
+
+#include "GPU3D.h"
+
+#include "OpenGLSupport.h"
+#include "GPU_OpenGL.h"
+
+#include "GPU3D_TexcacheOpenGL.h"
+
+#include "NonStupidBitfield.h"
+
+namespace melonDS
+{
+
+class ComputeRenderer : public Renderer3D
+{
+public:
+    static std::unique_ptr<ComputeRenderer> New();
+    ~ComputeRenderer() override;
+
+    void Reset(GPU& gpu) override;
+
+    void SetRenderSettings(int scale, bool highResolutionCoordinates);
+
+    void VCount144(GPU& gpu) override;
+
+    void RenderFrame(GPU& gpu) override;
+    void RestartFrame(GPU& gpu) override;
+    u32* GetLine(int line) override;
+
+    void SetupAccelFrame() override;
+    void PrepareCaptureFrame() override;
+
+    void BindOutputTexture(int buffer) override;
+
+    void Blit(const GPU& gpu) override;
+    void Stop(const GPU& gpu) override;
+
+    bool NeedsShaderCompile() { return ShaderStepIdx != 33; }
+    void ShaderCompileStep(int& current, int& count) override;
+private:
+    ComputeRenderer(GLCompositor&& compositor);
+
+    GLuint ShaderInterpXSpans[2];
+    GLuint ShaderBinCombined;
+    GLuint ShaderDepthBlend[2];
+    GLuint ShaderRasteriseNoTexture[2];
+    GLuint ShaderRasteriseNoTextureToon[2];
+    GLuint ShaderRasteriseNoTextureHighlight[2];
+    GLuint ShaderRasteriseUseTextureDecal[2];
+    GLuint ShaderRasteriseUseTextureModulate[2];
+    GLuint ShaderRasteriseUseTextureToon[2];
+    GLuint ShaderRasteriseUseTextureHighlight[2];
+    GLuint ShaderRasteriseShadowMask[2];
+    GLuint ShaderClearCoarseBinMask;
+    GLuint ShaderClearIndirectWorkCount;
+    GLuint ShaderCalculateWorkListOffset;
+    GLuint ShaderSortWork;
+    GLuint ShaderFinalPass[8];
+
+    GLuint YSpanIndicesTextureMemory;
+    GLuint YSpanIndicesTexture;
+    GLuint YSpanSetupMemory;
+    GLuint XSpanSetupMemory;
+    GLuint BinResultMemory;
+    GLuint RenderPolygonMemory;
+    GLuint WorkDescMemory;
+
+    enum
+    {
+        tilememoryLayer_Color,
+        tilememoryLayer_Depth,
+        tilememoryLayer_Attr,
+        tilememoryLayer_Num,
+    };
+
+    GLuint TileMemory[tilememoryLayer_Num];
+    GLuint FinalTileMemory;
+
+    u32 DummyLine[256] = {};
+
+    struct SpanSetupY
+    {
+        // Attributes
+        s32 Z0, Z1, W0, W1;
+        s32 ColorR0, ColorG0, ColorB0;
+        s32 ColorR1, ColorG1, ColorB1;
+        s32 TexcoordU0, TexcoordV0;
+        s32 TexcoordU1, TexcoordV1;
+
+        // Interpolator
+        s32 I0, I1;
+        s32 Linear;
+        s32 IRecip;
+        s32 W0n, W0d, W1d;
+
+        // Slope
+        s32 Increment;
+
+        s32 X0, X1, Y0, Y1;
+        s32 XMin, XMax;
+        s32 DxInitial;
+
+        s32 XCovIncr;
+        u32 IsDummy;
+    };
+    struct SpanSetupX
+    {
+        s32 X0, X1;
+
+        s32 EdgeLenL, EdgeLenR, EdgeCovL, EdgeCovR;
+
+        s32 XRecip;
+
+        u32 Flags;
+
+        s32 Z0, Z1, W0, W1;
+        s32 ColorR0, ColorG0, ColorB0;
+        s32 ColorR1, ColorG1, ColorB1;
+        s32 TexcoordU0, TexcoordV0;
+        s32 TexcoordU1, TexcoordV1;
+
+        s32 CovLInitial, CovRInitial;
+    };
+    struct SetupIndices
+    {
+        u16 PolyIdx, SpanIdxL, SpanIdxR, Y;
+    };
+    struct RenderPolygon
+    {
+        u32 FirstXSpan;
+        s32 YTop, YBot;
+
+        s32 XMin, XMax;
+        s32 XMinY, XMaxY;
+
+        u32 Variant;
+        u32 Attr;
+
+        float TextureLayer;
+    };
+
+    static constexpr int TileSize = 8;
+    static constexpr int CoarseTileCountX = 8;
+    static constexpr int CoarseTileCountY = 4;
+    static constexpr int CoarseTileW = CoarseTileCountX * TileSize;
+    static constexpr int CoarseTileH = CoarseTileCountY * TileSize;
+
+    static constexpr int BinStride = 2048/32;
+    static constexpr int CoarseBinStride = BinStride/32;
+
+    static constexpr int MaxVariants = 256;
+
+    static constexpr int UniformIdxCurVariant = 0;
+    static constexpr int UniformIdxTextureSize = 1;
+
+    static constexpr int MaxFullscreenLayers = 16;
+
+    struct BinResultHeader
+    {
+        u32 VariantWorkCount[MaxVariants*4];
+        u32 SortedWorkOffset[MaxVariants];
+
+        u32 SortWorkWorkCount[4];
+    };
+
+    static const int MaxYSpanSetups = 6144*2;
+    std::vector<SetupIndices> YSpanIndices;
+    SpanSetupY YSpanSetups[MaxYSpanSetups];
+    RenderPolygon RenderPolygons[2048];
+
+    TexcacheOpenGL Texcache;
+
+    struct MetaUniform
+    {
+        u32 NumPolygons;
+        u32 NumVariants;
+
+        u32 AlphaRef;
+        u32 DispCnt;
+
+        u32 ToonTable[4*34];
+
+        u32 ClearColor, ClearDepth, ClearAttr;
+
+        u32 FogOffset, FogShift, FogColor;
+    };
+    GLuint MetaUniformMemory;
+
+    GLuint Samplers[9];
+
+    GLuint Framebuffer = 0;
+    GLuint LowResFramebuffer;
+    GLuint PixelBuffer;
+
+    u32 FramebufferCPU[256*192];
+
+    int ScreenWidth, ScreenHeight;
+    int TilesPerLine, TileLines;
+    int ScaleFactor = -1;
+    int MaxWorkTiles;
+    bool HiresCoordinates;
+
+    GLCompositor CurGLCompositor;
+
+    int ShaderStepIdx = 0;
+
+    void DeleteShaders();
+
+    void SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int to);
+    void SetupYSpan(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int from, int to, int side, s32 positions[10][2]);
+    void SetupYSpanDummy(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int vertex, int side, s32 positions[10][2]);
+
+    bool CompileShader(GLuint& shader, const std::string& source, const std::initializer_list<const char*>& defines);
+};
+
+}
+
+#endif
--- a/src/GPU3D_Compute_shaders.h
+++ b/src/GPU3D_Compute_shaders.h
--- a/src/GPU3D_OpenGL.cpp
+++ b/src/GPU3D_OpenGL.cpp
@ -28,46 +28,32 @@
 namespace melonDS
 {

-bool GLRenderer::BuildRenderShader(u32 flags, const char* vs, const char* fs)
+bool GLRenderer::BuildRenderShader(u32 flags, const std::string& vs, const std::string& fs)
 {
    char shadername[32];
    snprintf(shadername, sizeof(shadername), "RenderShader%02X", flags);

    int headerlen = strlen(kShaderHeader);

-    int vslen = strlen(vs);
-    int vsclen = strlen(kRenderVSCommon);
-    char* vsbuf = new char[headerlen + vsclen + vslen + 1];
-    strcpy(&vsbuf[0], kShaderHeader);
-    strcpy(&vsbuf[headerlen], kRenderVSCommon);
-    strcpy(&vsbuf[headerlen + vsclen], vs);
+    std::string vsbuf;
+    vsbuf += kShaderHeader;
+    vsbuf += kRenderVSCommon;
+    vsbuf += vs;

-    int fslen = strlen(fs);
-    int fsclen = strlen(kRenderFSCommon);
-    char* fsbuf = new char[headerlen + fsclen + fslen + 1];
-    strcpy(&fsbuf[0], kShaderHeader);
-    strcpy(&fsbuf[headerlen], kRenderFSCommon);
-    strcpy(&fsbuf[headerlen + fsclen], fs);
+    std::string fsbuf;
+    fsbuf += kShaderHeader;
+    fsbuf += kRenderFSCommon;
+    fsbuf += fs;

-    bool ret = OpenGL::BuildShaderProgram(vsbuf, fsbuf, RenderShader[flags], shadername);
-
-    delete[] vsbuf;
-    delete[] fsbuf;
+    GLuint prog;
+    bool ret = OpenGL::CompileVertexFragmentProgram(prog,
+        vsbuf, fsbuf,
+        shadername,
+        {{"vPosition", 0}, {"vColor", 1}, {"vTexcoord", 2}, {"vPolygonAttr", 3}},
+        {{"oColor", 0}, {"oAttr", 1}});

    if (!ret) return false;

-    GLuint prog = RenderShader[flags][2];
-
-    glBindAttribLocation(prog, 0, "vPosition");
-    glBindAttribLocation(prog, 1, "vColor");
-    glBindAttribLocation(prog, 2, "vTexcoord");
-    glBindAttribLocation(prog, 3, "vPolygonAttr");
-    glBindFragDataLocation(prog, 0, "oColor");
-    glBindFragDataLocation(prog, 1, "oAttr");
-
-    if (!OpenGL::LinkShaderProgram(RenderShader[flags]))
-        return false;
-
    GLint uni_id = glGetUniformBlockIndex(prog, "uConfig");
    glUniformBlockBinding(prog, uni_id, 0);

@ -78,13 +64,15 @@ bool GLRenderer::BuildRenderShader(u32 flags, const char* vs, const char* fs)
    uni_id = glGetUniformLocation(prog, "TexPalMem");
    glUniform1i(uni_id, 1);

+    RenderShader[flags] = prog;
+
    return true;
 }

 void GLRenderer::UseRenderShader(u32 flags)
 {
    if (CurShaderID == flags) return;
-    glUseProgram(RenderShader[flags][2]);
+    glUseProgram(RenderShader[flags]);
    CurShaderID = flags;
 }

@ -125,21 +113,17 @@ std::unique_ptr<GLRenderer> GLRenderer::New() noexcept
    glDepthRange(0, 1);
    glClearDepth(1.0);

-
-    if (!OpenGL::BuildShaderProgram(kClearVS, kClearFS, result->ClearShaderPlain, "ClearShader"))
+    if (!OpenGL::CompileVertexFragmentProgram(result->ClearShaderPlain,
+            kClearVS, kClearFS,
+            "ClearShader",
+            {{"vPosition", 0}},
+            {{"oColor", 0}, {"oAttr", 1}}))
        return nullptr;

-    glBindAttribLocation(result->ClearShaderPlain[2], 0, "vPosition");
-    glBindFragDataLocation(result->ClearShaderPlain[2], 0, "oColor");
-    glBindFragDataLocation(result->ClearShaderPlain[2], 1, "oAttr");
-
-    if (!OpenGL::LinkShaderProgram(result->ClearShaderPlain))
-        return nullptr;
-
-    result->ClearUniformLoc[0] = glGetUniformLocation(result->ClearShaderPlain[2], "uColor");
-    result->ClearUniformLoc[1] = glGetUniformLocation(result->ClearShaderPlain[2], "uDepth");
-    result->ClearUniformLoc[2] = glGetUniformLocation(result->ClearShaderPlain[2], "uOpaquePolyID");
-    result->ClearUniformLoc[3] = glGetUniformLocation(result->ClearShaderPlain[2], "uFogFlag");
+    result->ClearUniformLoc[0] = glGetUniformLocation(result->ClearShaderPlain, "uColor");
+    result->ClearUniformLoc[1] = glGetUniformLocation(result->ClearShaderPlain, "uDepth");
+    result->ClearUniformLoc[2] = glGetUniformLocation(result->ClearShaderPlain, "uOpaquePolyID");
+    result->ClearUniformLoc[3] = glGetUniformLocation(result->ClearShaderPlain, "uFogFlag");

    memset(result->RenderShader, 0, sizeof(RenderShader));

@ -167,42 +151,35 @@ std::unique_ptr<GLRenderer> GLRenderer::New() noexcept
    if (!result->BuildRenderShader(RenderFlag_ShadowMask | RenderFlag_WBuffer, kRenderVS_W, kRenderFS_WSM))
        return nullptr;

-    if (!OpenGL::BuildShaderProgram(kFinalPassVS, kFinalPassEdgeFS, result->FinalPassEdgeShader, "FinalPassEdgeShader"))
+    if (!OpenGL::CompileVertexFragmentProgram(result->FinalPassEdgeShader,
+            kFinalPassVS, kFinalPassEdgeFS,
+            "FinalPassEdgeShader",
+            {{"vPosition", 0}},
+            {{"oColor", 0}}))
+        return nullptr;
+    if (!OpenGL::CompileVertexFragmentProgram(result->FinalPassFogShader,
+            kFinalPassVS, kFinalPassFogFS,
+            "FinalPassFogShader",
+            {{"vPosition", 0}},
+            {{"oColor", 0}}))
        return nullptr;

-    if (!OpenGL::BuildShaderProgram(kFinalPassVS, kFinalPassFogFS, result->FinalPassFogShader, "FinalPassFogShader"))
-        return nullptr;
+    GLuint uni_id = glGetUniformBlockIndex(result->FinalPassEdgeShader, "uConfig");
+    glUniformBlockBinding(result->FinalPassEdgeShader, uni_id, 0);

-    glBindAttribLocation(result->FinalPassEdgeShader[2], 0, "vPosition");
-    glBindFragDataLocation(result->FinalPassEdgeShader[2], 0, "oColor");
-
-    if (!OpenGL::LinkShaderProgram(result->FinalPassEdgeShader))
-        return nullptr;
-
-    GLint uni_id = glGetUniformBlockIndex(result->FinalPassEdgeShader[2], "uConfig");
-    glUniformBlockBinding(result->FinalPassEdgeShader[2], uni_id, 0);
-
-    glUseProgram(result->FinalPassEdgeShader[2]);
-
-    uni_id = glGetUniformLocation(result->FinalPassEdgeShader[2], "DepthBuffer");
+    glUseProgram(result->FinalPassEdgeShader);
+    uni_id = glGetUniformLocation(result->FinalPassEdgeShader, "DepthBuffer");
    glUniform1i(uni_id, 0);
-    uni_id = glGetUniformLocation(result->FinalPassEdgeShader[2], "AttrBuffer");
+    uni_id = glGetUniformLocation(result->FinalPassEdgeShader, "AttrBuffer");
    glUniform1i(uni_id, 1);

-    glBindAttribLocation(result->FinalPassFogShader[2], 0, "vPosition");
-    glBindFragDataLocation(result->FinalPassFogShader[2], 0, "oColor");
+    uni_id = glGetUniformBlockIndex(result->FinalPassFogShader, "uConfig");
+    glUniformBlockBinding(result->FinalPassFogShader, uni_id, 0);

-    if (!OpenGL::LinkShaderProgram(result->FinalPassFogShader))
-        return nullptr;
-
-    uni_id = glGetUniformBlockIndex(result->FinalPassFogShader[2], "uConfig");
-    glUniformBlockBinding(result->FinalPassFogShader[2], uni_id, 0);
-
-    glUseProgram(result->FinalPassFogShader[2]);
-
-    uni_id = glGetUniformLocation(result->FinalPassFogShader[2], "DepthBuffer");
+    glUseProgram(result->FinalPassFogShader);
+    uni_id = glGetUniformLocation(result->FinalPassFogShader, "DepthBuffer");
    glUniform1i(uni_id, 0);
-    uni_id = glGetUniformLocation(result->FinalPassFogShader[2], "AttrBuffer");
+    uni_id = glGetUniformLocation(result->FinalPassFogShader, "AttrBuffer");
    glUniform1i(uni_id, 1);


@ -255,29 +232,26 @@ std::unique_ptr<GLRenderer> GLRenderer::New() noexcept
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, result->IndexBufferID);
    glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(IndexBuffer), nullptr, GL_DYNAMIC_DRAW);

-    glGenFramebuffers(4, &result->FramebufferID[0]);
-    glBindFramebuffer(GL_FRAMEBUFFER, result->FramebufferID[0]);
-
-    glGenTextures(8, &result->FramebufferTex[0]);
-    result->FrontBuffer = 0;
+    glGenFramebuffers(1, &result->MainFramebuffer);

    // color buffers
-    SetupDefaultTexParams(result->FramebufferTex[0]);
-    SetupDefaultTexParams(result->FramebufferTex[1]);
+    glGenTextures(1, &result->ColorBufferTex);
+    SetupDefaultTexParams(result->ColorBufferTex);

    // depth/stencil buffer
-    SetupDefaultTexParams(result->FramebufferTex[4]);
-    SetupDefaultTexParams(result->FramebufferTex[6]);
+    glGenTextures(1, &result->DepthBufferTex);
+    SetupDefaultTexParams(result->DepthBufferTex);

    // attribute buffer
    // R: opaque polyID (for edgemarking)
    // G: edge flag
    // B: fog flag
-    SetupDefaultTexParams(result->FramebufferTex[5]);
-    SetupDefaultTexParams(result->FramebufferTex[7]);
+    glGenTextures(1, &result->AttrBufferTex);
+    SetupDefaultTexParams(result->AttrBufferTex);

    // downscale framebuffer for display capture (always 256x192)
-    SetupDefaultTexParams(result->FramebufferTex[3]);
+    glGenTextures(1, &result->DownScaleBufferTex);
+    SetupDefaultTexParams(result->DownScaleBufferTex);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 256, 192, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);

    glEnable(GL_BLEND);
@ -315,8 +289,12 @@ GLRenderer::~GLRenderer()
    glDeleteTextures(1, &TexMemID);
    glDeleteTextures(1, &TexPalMemID);

-    glDeleteFramebuffers(4, &FramebufferID[0]);
-    glDeleteTextures(8, &FramebufferTex[0]);
+    glDeleteFramebuffers(1, &MainFramebuffer);
+    glDeleteFramebuffers(1, &DownscaleFramebuffer);
+    glDeleteTextures(1, &ColorBufferTex);
+    glDeleteTextures(1, &DepthBufferTex);
+    glDeleteTextures(1, &AttrBufferTex);
+    glDeleteTextures(1, &DownScaleBufferTex);

    glDeleteVertexArrays(1, &VertexArrayID);
    glDeleteBuffers(1, &VertexBufferID);
@ -327,8 +305,8 @@ GLRenderer::~GLRenderer()

    for (int i = 0; i < 16; i++)
    {
-        if (!RenderShader[i][2]) continue;
-        OpenGL::DeleteShaderProgram(RenderShader[i]);
+        if (!RenderShader[i]) continue;
+        glDeleteProgram(RenderShader[i]);
    }
 }

@ -361,40 +339,25 @@ void GLRenderer::SetRenderSettings(bool betterpolygons, int scale) noexcept
    ScreenW = 256 * scale;
    ScreenH = 192 * scale;

-    glBindTexture(GL_TEXTURE_2D, FramebufferTex[0]);
-    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
-    glBindTexture(GL_TEXTURE_2D, FramebufferTex[1]);
+    glBindTexture(GL_TEXTURE_2D, ColorBufferTex);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);

-    glBindTexture(GL_TEXTURE_2D, FramebufferTex[4]);
+    glBindTexture(GL_TEXTURE_2D, DepthBufferTex);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, ScreenW, ScreenH, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL);
-    glBindTexture(GL_TEXTURE_2D, FramebufferTex[5]);
+    glBindTexture(GL_TEXTURE_2D, AttrBufferTex);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, ScreenW, ScreenH, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);

-    glBindTexture(GL_TEXTURE_2D, FramebufferTex[6]);
-    glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, ScreenW, ScreenH, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL);
-    glBindTexture(GL_TEXTURE_2D, FramebufferTex[7]);
-    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, ScreenW, ScreenH, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
-
-    glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[3]);
-    glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, FramebufferTex[3], 0);
+    glBindFramebuffer(GL_FRAMEBUFFER, DownscaleFramebuffer);
+    glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, DownScaleBufferTex, 0);

    GLenum fbassign[2] = {GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1};

-    glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]);
-    glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, FramebufferTex[0], 0);
-    glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, FramebufferTex[4], 0);
-    glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, FramebufferTex[5], 0);
+    glBindFramebuffer(GL_FRAMEBUFFER, MainFramebuffer);
+    glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, ColorBufferTex, 0);
+    glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, DepthBufferTex, 0);
+    glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, AttrBufferTex, 0);
    glDrawBuffers(2, fbassign);

-    glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[1]);
-    glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, FramebufferTex[1], 0);
-    glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, FramebufferTex[6], 0);
-    glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, FramebufferTex[7], 0);
-    glDrawBuffers(2, fbassign);
-
-    glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]);
-
    glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID);
    glBufferData(GL_PIXEL_PACK_BUFFER, 256*192*4, NULL, GL_DYNAMIC_READ);

@ -1103,9 +1066,9 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h)
        glStencilMask(0);

        glActiveTexture(GL_TEXTURE0);
-        glBindTexture(GL_TEXTURE_2D, FramebufferTex[FrontBuffer ? 6 : 4]);
+        glBindTexture(GL_TEXTURE_2D, DepthBufferTex);
        glActiveTexture(GL_TEXTURE1);
-        glBindTexture(GL_TEXTURE_2D, FramebufferTex[FrontBuffer ? 7 : 5]);
+        glBindTexture(GL_TEXTURE_2D, AttrBufferTex);

        glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID);
        glBindVertexArray(ClearVertexArrayID);
@ -1115,7 +1078,7 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h)
            // edge marking
            // TODO: depth/polyid values at screen edges

-            glUseProgram(FinalPassEdgeShader[2]);
+            glUseProgram(FinalPassEdgeShader);

            glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE);

@ -1126,7 +1089,7 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h)
        {
            // fog

-            glUseProgram(FinalPassFogShader[2]);
+            glUseProgram(FinalPassFogShader);

            if (gpu3d.RenderDispCnt & (1<<6))
                glBlendFuncSeparate(GL_ZERO, GL_ONE, GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA);
@ -1154,7 +1117,7 @@ void GLRenderer::RenderFrame(GPU& gpu)
    CurShaderID = -1;

    glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
-    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FramebufferID[FrontBuffer]);
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, MainFramebuffer);

    ShaderConfig.uScreenSize[0] = ScreenW;
    ShaderConfig.uScreenSize[1] = ScreenH;
@ -1260,7 +1223,7 @@ void GLRenderer::RenderFrame(GPU& gpu)
    // TODO: check whether 'clear polygon ID' affects translucent polyID
    // (for example when alpha is 1..30)
    {
-        glUseProgram(ClearShaderPlain[2]);
+        glUseProgram(ClearShaderPlain);
        glDepthFunc(GL_ALWAYS);

        u32 r = gpu.GPU3D.RenderClearAttr1 & 0x1F;
@ -1320,8 +1283,6 @@ void GLRenderer::RenderFrame(GPU& gpu)

        RenderSceneChunk(gpu.GPU3D, 0, 192);
    }
-
-    FrontBuffer = FrontBuffer ? 0 : 1;
 }

 void GLRenderer::Stop(const GPU& gpu)
@ -1331,16 +1292,14 @@ void GLRenderer::Stop(const GPU& gpu)

 void GLRenderer::PrepareCaptureFrame()
 {
-    // TODO: make sure this picks the right buffer when doing antialiasing
-    int original_fb = FrontBuffer^1;
-
-    glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferID[original_fb]);
+    glBindFramebuffer(GL_READ_FRAMEBUFFER, MainFramebuffer);
    glReadBuffer(GL_COLOR_ATTACHMENT0);
-    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FramebufferID[3]);
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, DownscaleFramebuffer);
    glDrawBuffer(GL_COLOR_ATTACHMENT0);
    glBlitFramebuffer(0, 0, ScreenW, ScreenH, 0, 0, 256, 192, GL_COLOR_BUFFER_BIT, GL_NEAREST);

-    glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferID[3]);
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID);
+    glBindFramebuffer(GL_READ_FRAMEBUFFER, DownscaleFramebuffer);
    glReadPixels(0, 0, 256, 192, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
 }

@ -1349,12 +1308,18 @@ void GLRenderer::Blit(const GPU& gpu)
    CurGLCompositor.RenderFrame(gpu, *this);
 }

+void GLRenderer::BindOutputTexture(int buffer)
+{
+    CurGLCompositor.BindOutputTexture(buffer);
+}
+
 u32* GLRenderer::GetLine(int line)
 {
    int stride = 256;

    if (line == 0)
    {
+        glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID);
        u8* data = (u8*)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
        if (data) memcpy(&Framebuffer[stride*0], data, 4*stride*192);
        glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
@ -1374,7 +1339,7 @@ u32* GLRenderer::GetLine(int line)

 void GLRenderer::SetupAccelFrame()
 {
-    glBindTexture(GL_TEXTURE_2D, FramebufferTex[FrontBuffer]);
+    glBindTexture(GL_TEXTURE_2D, ColorBufferTex);
 }

 }
--- a/src/GPU3D_OpenGL.h
+++ b/src/GPU3D_OpenGL.h
@ -44,12 +44,11 @@ public:
    void Stop(const GPU& gpu) override;
    u32* GetLine(int line) override;

-    void SetupAccelFrame();
+    void SetupAccelFrame() override;
    void PrepareCaptureFrame() override;
    void Blit(const GPU& gpu) override;

-    [[nodiscard]] const GLCompositor& GetCompositor() const noexcept { return CurGLCompositor; }
-    GLCompositor& GetCompositor() noexcept { return CurGLCompositor; }
+    void BindOutputTexture(int buffer) override;

    static std::unique_ptr<GLRenderer> New() noexcept;
 private:
@ -77,7 +76,7 @@ private:
    GLCompositor CurGLCompositor;
    RendererPolygon PolygonList[2048] {};

-    bool BuildRenderShader(u32 flags, const char* vs, const char* fs);
+    bool BuildRenderShader(u32 flags, const std::string& vs, const std::string& fs);
    void UseRenderShader(u32 flags);
    void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const;
    u32* SetupVertex(const Polygon* poly, int vid, const Vertex* vtx, u32 vtxattr, u32* vptr) const;
@ -96,13 +95,13 @@ private:
    };


-    GLuint ClearShaderPlain[3] {};
+    GLuint ClearShaderPlain {};

-    GLuint RenderShader[16][3] {};
+    GLuint RenderShader[16] {};
    GLuint CurShaderID = -1;

-    GLuint FinalPassEdgeShader[3] {};
-    GLuint FinalPassFogShader[3] {};
+    GLuint FinalPassEdgeShader {};
+    GLuint FinalPassFogShader {};

    // std140 compliant structure
    struct
@ -155,12 +154,12 @@ private:
    bool BetterPolygons {};
    int ScreenW {}, ScreenH {};

-    GLuint FramebufferTex[8] {};
-    int FrontBuffer {};
-    GLuint FramebufferID[4] {}, PixelbufferID {};
+    GLuint ColorBufferTex {}, DepthBufferTex {}, AttrBufferTex {};
+    GLuint DownScaleBufferTex {};
+    GLuint PixelbufferID {};
+
+    GLuint MainFramebuffer {}, DownscaleFramebuffer {};
    u32 Framebuffer[256*192] {};
-
-
 };
 }
 #endif
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@ -95,8 +95,8 @@ void SoftRenderer::EnableRenderThread()
    }
 }

-SoftRenderer::SoftRenderer(bool threaded) noexcept
-    : Renderer3D(false), Threaded(threaded)
+SoftRenderer::SoftRenderer() noexcept
+    : Renderer3D(false)
 {
    Sema_RenderStart = Platform::Semaphore_Create();
    Sema_RenderDone = Platform::Semaphore_Create();
--- a/src/GPU3D_Soft.h
+++ b/src/GPU3D_Soft.h
@ -29,7 +29,7 @@ namespace melonDS
 class SoftRenderer : public Renderer3D
 {
 public:
-    SoftRenderer(bool threaded = false) noexcept;
+    SoftRenderer() noexcept;
    ~SoftRenderer() override;
    void Reset(GPU& gpu) override;

@ -504,7 +504,7 @@ private:

    // threading

-    bool Threaded;
+    bool Threaded = false;
    Platform::Thread* RenderThread;
    std::atomic_bool RenderThreadRunning;
    std::atomic_bool RenderThreadRendering;
--- a/src/GPU3D_Texcache.cpp
+++ b/src/GPU3D_Texcache.cpp
@ -0,0 +1,269 @@
+#include "GPU3D_Texcache.h"
+
+namespace melonDS
+{
+
+inline u16 ColorAvg(u16 color0, u16 color1)
+{
+    u32 r0 = color0 & 0x001F;
+    u32 g0 = color0 & 0x03E0;
+    u32 b0 = color0 & 0x7C00;
+    u32 r1 = color1 & 0x001F;
+    u32 g1 = color1 & 0x03E0;
+    u32 b1 = color1 & 0x7C00;
+
+    u32 r = (r0 + r1) >> 1;
+    u32 g = ((g0 + g1) >> 1) & 0x03E0;
+    u32 b = ((b0 + b1) >> 1) & 0x7C00;
+
+    return r | g | b;
+}
+
+inline u16 Color5of3(u16 color0, u16 color1)
+{
+    u32 r0 = color0 & 0x001F;
+    u32 g0 = color0 & 0x03E0;
+    u32 b0 = color0 & 0x7C00;
+    u32 r1 = color1 & 0x001F;
+    u32 g1 = color1 & 0x03E0;
+    u32 b1 = color1 & 0x7C00;
+
+    u32 r = (r0*5 + r1*3) >> 3;
+    u32 g = ((g0*5 + g1*3) >> 3) & 0x03E0;
+    u32 b = ((b0*5 + b1*3) >> 3) & 0x7C00;
+
+    return r | g | b;
+}
+
+inline u16 Color3of5(u16 color0, u16 color1)
+{
+    u32 r0 = color0 & 0x001F;
+    u32 g0 = color0 & 0x03E0;
+    u32 b0 = color0 & 0x7C00;
+    u32 r1 = color1 & 0x001F;
+    u32 g1 = color1 & 0x03E0;
+    u32 b1 = color1 & 0x7C00;
+
+    u32 r = (r0*3 + r1*5) >> 3;
+    u32 g = ((g0*3 + g1*5) >> 3) & 0x03E0;
+    u32 b = ((b0*3 + b1*5) >> 3) & 0x7C00;
+
+    return r | g | b;
+}
+
+inline u32 ConvertRGB5ToRGB8(u16 val)
+{
+    return (((u32)val & 0x1F) << 3)
+        | (((u32)val & 0x3E0) << 6)
+        | (((u32)val & 0x7C00) << 9);
+}
+inline u32 ConvertRGB5ToBGR8(u16 val)
+{
+    return (((u32)val & 0x1F) << 9)
+        | (((u32)val & 0x3E0) << 6)
+        | (((u32)val & 0x7C00) << 3);
+}
+inline u32 ConvertRGB5ToRGB6(u16 val)
+{
+    u8 r = (val & 0x1F) << 1;
+    u8 g = (val & 0x3E0) >> 4;
+    u8 b = (val & 0x7C00) >> 9;
+    if (r) r++;
+    if (g) g++;
+    if (b) b++;
+    return (u32)r | ((u32)g << 8) | ((u32)b << 16);
+}
+
+template <int outputFmt>
+void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData)
+{
+    for (u32 i = 0; i < width*height; i++)
+    {
+        u16 value = *(u16*)&texData[i * 2];
+
+        switch (outputFmt)
+        {
+        case outputFmt_RGB6A5:
+            output[i] = ConvertRGB5ToRGB6(value) | (value & 0x8000 ? 0x1F000000 : 0);
+            break;
+        case outputFmt_RGBA8:
+            output[i] = ConvertRGB5ToRGB8(value) | (value & 0x8000 ? 0xFF000000 : 0);
+            break;
+        case outputFmt_BGRA8:
+            output[i] = ConvertRGB5ToBGR8(value) | (value & 0x8000 ? 0xFF000000 : 0);
+            break;
+        }
+    }
+}
+
+template void ConvertBitmapTexture<outputFmt_RGB6A5>(u32 width, u32 height, u32* output, u8* texData);
+
+template <int outputFmt>
+void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u8* texAuxData, u16* palData)
+{
+    // we process a whole block at the time
+    for (int y = 0; y < height / 4; y++)
+    {
+        for (int x = 0; x < width / 4; x++)
+        {
+            u32 data = ((u32*)texData)[x + y * (width / 4)];
+            u16 auxData = ((u16*)texAuxData)[x + y * (width / 4)];
+
+            u32 paletteOffset = auxData & 0x3FFF;
+            u16 color0 = palData[paletteOffset*2] | 0x8000;
+            u16 color1 = palData[paletteOffset*2+1] | 0x8000;
+            u16 color2, color3;
+
+            switch ((auxData >> 14) & 0x3)
+            {
+            case 0:
+                color2 = palData[paletteOffset*2+2] | 0x8000;
+                color3 = 0;
+                break;
+            case 1:
+                {
+                    u32 r0 = color0 & 0x001F;
+                    u32 g0 = color0 & 0x03E0;
+                    u32 b0 = color0 & 0x7C00;
+                    u32 r1 = color1 & 0x001F;
+                    u32 g1 = color1 & 0x03E0;
+                    u32 b1 = color1 & 0x7C00;
+
+                    u32 r = (r0 + r1) >> 1;
+                    u32 g = ((g0 + g1) >> 1) & 0x03E0;
+                    u32 b = ((b0 + b1) >> 1) & 0x7C00;
+                    color2 = r | g | b | 0x8000;
+                }
+                color3 = 0;
+                break;
+            case 2:
+                color2 = palData[paletteOffset*2+2] | 0x8000;
+                color3 = palData[paletteOffset*2+3] | 0x8000;
+                break;
+            case 3:
+                {
+                    u32 r0 = color0 & 0x001F;
+                    u32 g0 = color0 & 0x03E0;
+                    u32 b0 = color0 & 0x7C00;
+                    u32 r1 = color1 & 0x001F;
+                    u32 g1 = color1 & 0x03E0;
+                    u32 b1 = color1 & 0x7C00;
+
+                    u32 r = (r0*5 + r1*3) >> 3;
+                    u32 g = ((g0*5 + g1*3) >> 3) & 0x03E0;
+                    u32 b = ((b0*5 + b1*3) >> 3) & 0x7C00;
+
+                    color2 = r | g | b | 0x8000;
+                }
+                {
+                    u32 r0 = color0 & 0x001F;
+                    u32 g0 = color0 & 0x03E0;
+                    u32 b0 = color0 & 0x7C00;
+                    u32 r1 = color1 & 0x001F;
+                    u32 g1 = color1 & 0x03E0;
+                    u32 b1 = color1 & 0x7C00;
+
+                    u32 r = (r0*3 + r1*5) >> 3;
+                    u32 g = ((g0*3 + g1*5) >> 3) & 0x03E0;
+                    u32 b = ((b0*3 + b1*5) >> 3) & 0x7C00;
+
+                    color3 = r | g | b | 0x8000;
+                }
+                break;
+            }
+
+            // in 2020 our default data types are big enough to be used as lookup tables...
+            u64 packed = color0 | ((u64)color1 << 16) | ((u64)color2 << 32) | ((u64)color3 << 48);
+
+            for (int j = 0; j < 4; j++)
+            {
+                for (int i = 0; i < 4; i++)
+                {
+                    u16 color = (packed >> 16 * (data >> 2 * (i + j * 4))) & 0xFFFF;
+                    u32 res;
+                    switch (outputFmt)
+                    {
+                    case outputFmt_RGB6A5: res = ConvertRGB5ToRGB6(color)
+                        | ((color & 0x8000) ? 0x1F000000 : 0); break;
+                    case outputFmt_RGBA8: res = ConvertRGB5ToRGB8(color)
+                        | ((color & 0x8000) ? 0xFF000000 : 0); break;
+                    case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color)
+                        | ((color & 0x8000) ? 0xFF000000 : 0); break;
+                    }
+                    output[x * 4 + i + (y * 4 + j) * width] = res;
+                }
+            }
+        }
+    }
+}
+
+template void ConvertCompressedTexture<outputFmt_RGB6A5>(u32, u32, u32*, u8*, u8*, u16*);
+
+template <int outputFmt, int X, int Y>
+void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData)
+{
+    for (int y = 0; y < height; y++)
+    {
+        for (int x = 0; x < width; x++)
+        {
+            u8 val = texData[x + y * width];
+
+            u32 idx = val & ((1 << Y) - 1);
+
+            u16 color = palData[idx];
+            u32 alpha = (val >> Y) & ((1 << X) - 1);
+            if (X != 5)
+                alpha = alpha * 4 + alpha / 2;
+
+            u32 res;
+            switch (outputFmt)
+            {
+            case outputFmt_RGB6A5: res = ConvertRGB5ToRGB6(color) | alpha << 24; break;
+            // make sure full alpha == 255
+            case outputFmt_RGBA8: res = ConvertRGB5ToRGB8(color) | (alpha << 27 | (alpha & 0x1C) << 22); break;
+            case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color) | (alpha << 27 | (alpha & 0x1C) << 22); break;
+            }
+            output[x + y * width] = res;
+        }
+    }
+}
+
+template void ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(u32, u32, u32*, u8*, u16*);
+template void ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(u32, u32, u32*, u8*, u16*);
+
+template <int outputFmt, int colorBits>
+void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent)
+{
+    for (int y = 0; y < height; y++)
+    {
+        for (int x = 0; x < width / (8 / colorBits); x++)
+        {
+            u8 val = texData[x + y * (width / (8 / colorBits))];
+
+            for (int i = 0; i < 8 / colorBits; i++)
+            {
+                u32 index = (val >> (i * colorBits)) & ((1 << colorBits) - 1);
+                u16 color = palData[index];
+
+                bool transparent = color0Transparent && index == 0;
+                u32 res;
+                switch (outputFmt)
+                {
+                case outputFmt_RGB6A5: res = ConvertRGB5ToRGB6(color)
+                    | (transparent ? 0 : 0x1F000000); break;
+                case outputFmt_RGBA8: res = ConvertRGB5ToRGB8(color)
+                    | (transparent ? 0 : 0xFF000000); break;
+                case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color)
+                    | (transparent ? 0 : 0xFF000000); break;
+                }
+                output[x * (8 / colorBits) + y * width + i] = res;
+            }
+        }
+    }
+}
+
+template void ConvertNColorsTexture<outputFmt_RGB6A5, 2>(u32, u32, u32*, u8*, u16*, bool);
+template void ConvertNColorsTexture<outputFmt_RGB6A5, 4>(u32, u32, u32*, u8*, u16*, bool);
+template void ConvertNColorsTexture<outputFmt_RGB6A5, 8>(u32, u32, u32*, u8*, u16*, bool);
+
+}
--- a/src/GPU3D_Texcache.h
+++ b/src/GPU3D_Texcache.h
@ -0,0 +1,310 @@
+#ifndef GPU3D_TEXCACHE
+#define GPU3D_TEXCACHE
+
+#include "types.h"
+#include "GPU.h"
+
+#include <assert.h>
+#include <unordered_map>
+#include <vector>
+
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash/xxhash.h"
+
+namespace melonDS
+{
+
+inline u32 TextureWidth(u32 texparam)
+{
+    return 8 << ((texparam >> 20) & 0x7);
+}
+
+inline u32 TextureHeight(u32 texparam)
+{
+    return 8 << ((texparam >> 23) & 0x7);
+}
+
+enum
+{
+    outputFmt_RGB6A5,
+    outputFmt_RGBA8,
+    outputFmt_BGRA8
+};
+
+template <int outputFmt>
+void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData);
+template <int outputFmt>
+void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u8* texAuxData, u16* palData);
+template <int outputFmt, int X, int Y>
+void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData);
+template <int outputFmt, int colorBits>
+void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent);
+
+template <typename TexLoaderT, typename TexHandleT>
+class Texcache
+{
+public:
+    Texcache(const TexLoaderT& texloader)
+        : TexLoader(texloader) // probably better if this would be a move constructor???
+    {}
+
+    bool Update(GPU& gpu)
+    {
+        auto textureDirty = gpu.VRAMDirty_Texture.DeriveState(gpu.VRAMMap_Texture, gpu);
+        auto texPalDirty = gpu.VRAMDirty_TexPal.DeriveState(gpu.VRAMMap_TexPal, gpu);
+
+        bool textureChanged = gpu.MakeVRAMFlat_TextureCoherent(textureDirty);
+        bool texPalChanged = gpu.MakeVRAMFlat_TexPalCoherent(texPalDirty);
+
+        if (textureChanged || texPalChanged)
+        {
+            //printf("check invalidation %d\n", TexCache.size());
+            for (auto it = Cache.begin(); it != Cache.end();)
+            {
+                TexCacheEntry& entry = it->second;
+                if (textureChanged)
+                {
+                    for (u32 i = 0; i < 2; i++)
+                    {
+                        u32 startBit = entry.TextureRAMStart[i] / VRAMDirtyGranularity;
+                        u32 bitsCount = ((entry.TextureRAMStart[i] + entry.TextureRAMSize[i] + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
+
+                        u32 startEntry = startBit >> 6;
+                        u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
+                        for (u32 j = startEntry; j < startEntry + entriesCount; j++)
+                        {
+                            if (GetRangedBitMask(j, startBit, bitsCount) & textureDirty.Data[j])
+                            {
+                                u64 newTexHash = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
+
+                                if (newTexHash != entry.TextureHash[i])
+                                    goto invalidate;
+                            }
+                        }
+                    }
+                }
+
+                if (texPalChanged && entry.TexPalSize > 0)
+                {
+                    u32 startBit = entry.TexPalStart / VRAMDirtyGranularity;
+                    u32 bitsCount = ((entry.TexPalStart + entry.TexPalSize + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
+
+                    u32 startEntry = startBit >> 6;
+                    u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
+                    for (u32 j = startEntry; j < startEntry + entriesCount; j++)
+                    {
+                        if (GetRangedBitMask(j, startBit, bitsCount) & texPalDirty.Data[j])
+                        {
+                            u64 newPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
+                            if (newPalHash != entry.TexPalHash)
+                                goto invalidate;
+                        }
+                    }
+                }
+
+                it++;
+                continue;
+            invalidate:
+                FreeTextures[entry.WidthLog2][entry.HeightLog2].push_back(entry.Texture);
+
+                //printf("invalidating texture %d\n", entry.ImageDescriptor);
+
+                it = Cache.erase(it);
+            }
+
+            return true;
+        }
+
+        return false;
+    }
+
+    void GetTexture(GPU& gpu, u32 texParam, u32 palBase, TexHandleT& textureHandle, u32& layer, u32*& helper)
+    {
+        // remove sampling and texcoord gen params
+        texParam &= ~0xC00F0000;
+
+        u32 fmt = (texParam >> 26) & 0x7;
+        u64 key = texParam;
+        if (fmt != 7)
+        {
+            key |= (u64)palBase << 32;
+            if (fmt == 5)
+                key &= ~((u64)1 << 29);
+        }
+        //printf("%" PRIx64 " %" PRIx32 " %" PRIx32 "\n", key, texParam, palBase);
+
+        assert(fmt != 0 && "no texture is not a texture format!");
+
+        auto it = Cache.find(key);
+
+        if (it != Cache.end())
+        {
+            textureHandle = it->second.Texture.TextureID;
+            layer = it->second.Texture.Layer;
+            helper = &it->second.LastVariant;
+            return;
+        }
+
+        u32 widthLog2 = (texParam >> 20) & 0x7;
+        u32 heightLog2 = (texParam >> 23) & 0x7;
+        u32 width = 8 << widthLog2;
+        u32 height = 8 << heightLog2;
+
+        u32 addr = (texParam & 0xFFFF) * 8;
+
+        TexCacheEntry entry = {0};
+
+        entry.TextureRAMStart[0] = addr;
+        entry.WidthLog2 = widthLog2;
+        entry.HeightLog2 = heightLog2;
+
+        // apparently a new texture
+        if (fmt == 7)
+        {
+            entry.TextureRAMSize[0] = width*height*2;
+
+            ConvertBitmapTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, &gpu.VRAMFlat_Texture[addr]);
+        }
+        else if (fmt == 5)
+        {
+            u8* texData = &gpu.VRAMFlat_Texture[addr];
+            u32 slot1addr = 0x20000 + ((addr & 0x1FFFC) >> 1);
+            if (addr >= 0x40000)
+                slot1addr += 0x10000;
+            u8* texAuxData = &gpu.VRAMFlat_Texture[slot1addr];
+
+            u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palBase*16);
+
+            entry.TextureRAMSize[0] = width*height/16*4;
+            entry.TextureRAMStart[1] = slot1addr;
+            entry.TextureRAMSize[1] = width*height/16*2;
+            entry.TexPalStart = palBase*16;
+            entry.TexPalSize = 0x10000;
+
+            ConvertCompressedTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, texData, texAuxData, palData);
+        }
+        else
+        {
+            u32 texSize, palAddr = palBase*16, numPalEntries;
+            switch (fmt)
+            {
+            case 1: texSize = width*height; numPalEntries = 32; break;
+            case 6: texSize = width*height; numPalEntries = 8; break;
+            case 2: texSize = width*height/4; numPalEntries = 4; palAddr >>= 1; break;
+            case 3: texSize = width*height/2; numPalEntries = 16; break;
+            case 4: texSize = width*height; numPalEntries = 256; break;
+            }
+
+            palAddr &= 0x1FFFF;
+
+            /*printf("creating texture | fmt: %d | %dx%d | %08x | %08x\n", fmt, width, height, addr, palAddr);
+            svcSleepThread(1000*1000);*/
+
+            entry.TextureRAMSize[0] = texSize;
+            entry.TexPalStart = palAddr;
+            entry.TexPalSize = numPalEntries*2;
+
+            u8* texData = &gpu.VRAMFlat_Texture[addr];
+            u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palAddr);
+
+            //assert(entry.TexPalStart+entry.TexPalSize <= 128*1024*1024);
+
+            bool color0Transparent = texParam & (1 << 29);
+
+            switch (fmt)
+            {
+            case 1: ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(width, height, DecodingBuffer, texData, palData); break;
+            case 6: ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(width, height, DecodingBuffer, texData, palData); break;
+            case 2: ConvertNColorsTexture<outputFmt_RGB6A5, 2>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
+            case 3: ConvertNColorsTexture<outputFmt_RGB6A5, 4>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
+            case 4: ConvertNColorsTexture<outputFmt_RGB6A5, 8>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
+            }
+        }
+
+        for (int i = 0; i < 2; i++)
+        {
+            if (entry.TextureRAMSize[i])
+                entry.TextureHash[i] = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
+        }
+        if (entry.TexPalSize)
+            entry.TexPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
+
+        auto& texArrays = TexArrays[widthLog2][heightLog2];
+        auto& freeTextures = FreeTextures[widthLog2][heightLog2];
+
+        if (freeTextures.size() == 0)
+        {
+            texArrays.resize(texArrays.size()+1);
+            TexHandleT& array = texArrays[texArrays.size()-1];
+
+            u32 layers = std::min<u32>((8*1024*1024) / (width*height*4), 64);
+
+            // allocate new array texture
+            //printf("allocating new layer set for %d %d %d %d\n", width, height, texArrays.size()-1, array.ImageDescriptor);
+            array = TexLoader.GenerateTexture(width, height, layers);
+
+            for (u32 i = 0; i < layers; i++)
+            {
+                freeTextures.push_back(TexArrayEntry{array, i});
+            }
+        }
+
+        TexArrayEntry storagePlace = freeTextures[freeTextures.size()-1];
+        freeTextures.pop_back();
+
+        entry.Texture = storagePlace;
+
+        TexLoader.UploadTexture(storagePlace.TextureID, width, height, storagePlace.Layer, DecodingBuffer);
+        //printf("using storage place %d %d | %d %d (%d)\n", width, height, storagePlace.TexArrayIdx, storagePlace.LayerIdx, array.ImageDescriptor);
+
+        textureHandle = storagePlace.TextureID;
+        layer = storagePlace.Layer;
+        helper = &Cache.emplace(std::make_pair(key, entry)).first->second.LastVariant;
+    }
+
+    void Reset()
+    {
+        for (u32 i = 0; i < 8; i++)
+        {
+            for (u32 j = 0; j < 8; j++)
+            {
+                for (u32 k = 0; k < TexArrays[i][j].size(); k++)
+                    TexLoader.DeleteTexture(TexArrays[i][j][k]);
+                TexArrays[i][j].clear();
+                FreeTextures[i][j].clear();
+            }
+        }
+        Cache.clear();
+    }
+private:
+    struct TexArrayEntry
+    {
+        TexHandleT TextureID;
+        u32 Layer;
+    };
+
+    struct TexCacheEntry
+    {
+        u32 LastVariant; // very cheap way to make variant lookup faster
+
+        u32 TextureRAMStart[2], TextureRAMSize[2];
+        u32 TexPalStart, TexPalSize;
+        u8 WidthLog2, HeightLog2;
+        TexArrayEntry Texture;
+
+        u64 TextureHash[2];
+        u64 TexPalHash;
+    };
+    std::unordered_map<u64, TexCacheEntry> Cache;
+
+    TexLoaderT TexLoader;
+
+    std::vector<TexArrayEntry> FreeTextures[8][8];
+    std::vector<TexHandleT> TexArrays[8][8];
+
+    u32 DecodingBuffer[1024*1024];
+};
+
+}
+
+#endif
--- a/src/GPU3D_TexcacheOpenGL.cpp
+++ b/src/GPU3D_TexcacheOpenGL.cpp
@ -0,0 +1,29 @@
+#include "GPU3D_TexcacheOpenGL.h"
+
+namespace melonDS
+{
+
+GLuint TexcacheOpenGLLoader::GenerateTexture(u32 width, u32 height, u32 layers)
+{
+    GLuint texarray;
+    glGenTextures(1, &texarray);
+    glBindTexture(GL_TEXTURE_2D_ARRAY, texarray);
+    glTexStorage3D(GL_TEXTURE_2D_ARRAY, 1, GL_RGBA8UI, width, height, layers);
+    return texarray;
+}
+
+void TexcacheOpenGLLoader::UploadTexture(GLuint handle, u32 width, u32 height, u32 layer, void* data)
+{
+    glBindTexture(GL_TEXTURE_2D_ARRAY, handle);
+    glTexSubImage3D(GL_TEXTURE_2D_ARRAY,
+        0, 0, 0, layer,
+        width, height, 1,
+        GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, data);
+}
+
+void TexcacheOpenGLLoader::DeleteTexture(GLuint handle)
+{
+    glDeleteTextures(1, &handle);
+}
+
+}
--- a/src/GPU3D_TexcacheOpenGL.h
+++ b/src/GPU3D_TexcacheOpenGL.h
@ -0,0 +1,25 @@
+#ifndef GPU3D_TEXCACHEOPENGL
+#define GPU3D_TEXCACHEOPENGL
+
+#include "GPU3D_Texcache.h"
+#include "OpenGLSupport.h"
+
+namespace melonDS
+{
+
+template <typename, typename>
+class Texcache;
+
+class TexcacheOpenGLLoader
+{
+public:
+    GLuint GenerateTexture(u32 width, u32 height, u32 layers);
+    void UploadTexture(GLuint handle, u32 width, u32 height, u32 layer, void* data);
+    void DeleteTexture(GLuint handle);
+};
+
+using TexcacheOpenGL = Texcache<TexcacheOpenGLLoader, GLuint>;
+
+}
+
+#endif
--- a/src/GPU_OpenGL.cpp
+++ b/src/GPU_OpenGL.cpp
@ -36,32 +36,27 @@ using namespace OpenGL;
 std::optional<GLCompositor> GLCompositor::New() noexcept
 {
    assert(glBindAttribLocation != nullptr);
+    GLuint CompShader {};

-    std::array<GLuint, 3> CompShader {};
-    if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Nearest, &CompShader[0], "CompositorShader"))
-        return std::nullopt;
-
-    glBindAttribLocation(CompShader[2], 0, "vPosition");
-    glBindAttribLocation(CompShader[2], 1, "vTexcoord");
-    glBindFragDataLocation(CompShader[2], 0, "oColor");
-
-    if (!OpenGL::LinkShaderProgram(CompShader.data()))
-        // OpenGL::LinkShaderProgram already deletes the shader program object
-        // if linking the shaders together failed.
+    if (!OpenGL::CompileVertexFragmentProgram(CompShader,
+            kCompositorVS, kCompositorFS_Nearest, 
+            "CompositorShader",
+            {{"vPosition", 0}, {"vTexcoord", 1}},
+            {{"oColor", 0}}))
        return std::nullopt;

    return { GLCompositor(CompShader) };
 }

-GLCompositor::GLCompositor(std::array<GLuint, 3> compShader) noexcept : CompShader(compShader)
+GLCompositor::GLCompositor(GLuint compShader) noexcept : CompShader(compShader)
 {
-    CompScaleLoc = glGetUniformLocation(CompShader[2], "u3DScale");
-    Comp3DXPosLoc = glGetUniformLocation(CompShader[2], "u3DXPos");
+    CompScaleLoc = glGetUniformLocation(CompShader, "u3DScale");
+    Comp3DXPosLoc = glGetUniformLocation(CompShader, "u3DXPos");

-    glUseProgram(CompShader[2]);
-    GLuint screenTextureUniform = glGetUniformLocation(CompShader[2], "ScreenTex");
+    glUseProgram(CompShader);
+    GLuint screenTextureUniform = glGetUniformLocation(CompShader, "ScreenTex");
    glUniform1i(screenTextureUniform, 0);
-    GLuint _3dTextureUniform = glGetUniformLocation(CompShader[2], "_3DTex");
+    GLuint _3dTextureUniform = glGetUniformLocation(CompShader, "_3DTex");
    glUniform1i(_3dTextureUniform, 1);

    // all this mess is to prevent bleeding
@ -136,7 +131,7 @@ GLCompositor::~GLCompositor()
    glDeleteVertexArrays(1, &CompVertexArrayID);
    glDeleteBuffers(1, &CompVertexBufferID);

-    OpenGL::DeleteShaderProgram(CompShader.data());
+    glDeleteProgram(CompShader);
 }


@ -174,7 +169,7 @@ GLCompositor& GLCompositor::operator=(GLCompositor&& other) noexcept
        CompVertices = other.CompVertices;

        // Clean up these resources before overwriting them
-        OpenGL::DeleteShaderProgram(CompShader.data());
+        glDeleteProgram(CompShader);
        CompShader = other.CompShader;

        glDeleteBuffers(1, &CompVertexBufferID);
@ -244,11 +239,11 @@ void GLCompositor::Stop(const GPU& gpu) noexcept
    glBindFramebuffer(GL_FRAMEBUFFER, 0);
 }

-void GLCompositor::RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept
+void GLCompositor::RenderFrame(const GPU& gpu, Renderer3D& renderer) noexcept
 {
-    int frontbuf = gpu.FrontBuffer;
+    int backbuf = gpu.FrontBuffer ^ 1;
    glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
-    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[frontbuf]);
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[backbuf]);

    glDisable(GL_DEPTH_TEST);
    glDisable(GL_STENCIL_TEST);
@ -260,7 +255,7 @@ void GLCompositor::RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept
    glClear(GL_COLOR_BUFFER_BIT);

    // TODO: select more shaders (filtering, etc)
-    OpenGL::UseShaderProgram(CompShader.data());
+    glUseProgram(CompShader);
    glUniform1ui(CompScaleLoc, Scale);

    // TODO: support setting this midframe, if ever needed
@ -269,12 +264,12 @@ void GLCompositor::RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept
    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, CompScreenInputTex);

-    if (gpu.Framebuffer[frontbuf][0] && gpu.Framebuffer[frontbuf][1])
+    if (gpu.Framebuffer[backbuf][0] && gpu.Framebuffer[backbuf][1])
    {
        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256*3 + 1, 192, GL_RGBA_INTEGER,
-                        GL_UNSIGNED_BYTE, gpu.Framebuffer[frontbuf][0].get());
+                        GL_UNSIGNED_BYTE, gpu.Framebuffer[backbuf][0].get());
        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256*3 + 1, 192, GL_RGBA_INTEGER,
-                        GL_UNSIGNED_BYTE, gpu.Framebuffer[frontbuf][1].get());
+                        GL_UNSIGNED_BYTE, gpu.Framebuffer[backbuf][1].get());
    }

    glActiveTexture(GL_TEXTURE1);
--- a/src/GPU_OpenGL.h
+++ b/src/GPU_OpenGL.h
@ -28,6 +28,7 @@ namespace melonDS
 class GPU;
 struct RenderSettings;
 class GLRenderer;
+class Renderer3D;
 class GLCompositor
 {
 public:
@ -42,14 +43,14 @@ public:
    [[nodiscard]] int GetScaleFactor() const noexcept { return Scale; }

    void Stop(const GPU& gpu) noexcept;
-    void RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept;
+    void RenderFrame(const GPU& gpu, Renderer3D& renderer) noexcept;
    void BindOutputTexture(int buf);
 private:
-    GLCompositor(std::array<GLuint, 3> CompShader) noexcept;
+    GLCompositor(GLuint CompShader) noexcept;
    int Scale = 0;
    int ScreenH = 0, ScreenW = 0;

-    std::array<GLuint, 3> CompShader {};
+    GLuint CompShader {};
    GLuint CompScaleLoc = 0;
    GLuint Comp3DXPosLoc = 0;

--- a/src/NDS.cpp
+++ b/src/NDS.cpp
@ -35,6 +35,7 @@
 #include "Platform.h"
 #include "FreeBIOS.h"
 #include "Args.h"
+#include "version.h"

 #include "DSi.h"
 #include "DSi_SPI_TSC.h"
@ -2839,7 +2840,7 @@ u8 NDS::ARM9IORead8(u32 addr)
    if(addr >= 0x04FFFA00 && addr < 0x04FFFA10)
    {
        // FIX: GBATek says this should be padded with spaces
-        static char const emuID[16] = "melonDS " MELONDS_VERSION;
+        static char const emuID[16] = "melonDS " MELONDS_VERSION_BASE;
        auto idx = addr - 0x04FFFA00;
        return (u8)(emuID[idx]);
    }
--- a/src/NonStupidBitfield.h
+++ b/src/NonStupidBitfield.h
@ -26,11 +26,38 @@
 #include <initializer_list>
 #include <algorithm>

+namespace melonDS
+{
+
+inline u64 GetRangedBitMask(u32 idx, u32 startBit, u32 bitsCount)
+{
+    u32 startEntry = startBit >> 6;
+    u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
+
+    if (entriesCount > 1)
+    {
+        if (idx == startEntry)
+            return 0xFFFFFFFFFFFFFFFF << (startBit & 0x3F);
+        if (((startBit + bitsCount) & 0x3F) && idx == startEntry + entriesCount - 1)
+            return ~(0xFFFFFFFFFFFFFFFF << ((startBit + bitsCount) & 0x3F));
+
+        return 0xFFFFFFFFFFFFFFFF;
+    }
+    else if (idx == startEntry)
+    {
+        return bitsCount == 64
+            ? 0xFFFFFFFFFFFFFFFF
+            : ((1ULL << bitsCount) - 1) << (startBit & 0x3F);
+    }
+    else
+    {
+        return 0;
+    }
+}
+
 // like std::bitset but less stupid and optimised for 
 // our use case (keeping track of memory invalidations)

-namespace melonDS
-{
 template <u32 Size>
 struct NonStupidBitField
 {
@ -166,6 +193,11 @@ struct NonStupidBitField
        return Ref{*this, idx};
    }

+    bool operator[](u32 idx) const
+    {
+        return Data[idx >> 6] & (1ULL << (idx & 0x3F));
+    }
+
    void SetRange(u32 startBit, u32 bitsCount)
    {
        u32 startEntry = startBit >> 6;
@ -187,6 +219,26 @@ struct NonStupidBitField
        }
    }

+    int Min() const
+    {
+        for (int i = 0; i < DataLength; i++)
+        {
+            if (Data[i])
+                return i * 64 + __builtin_ctzll(Data[i]);
+        }
+        return -1;
+    }
+
+    int Max() const
+    {
+        for (int i = DataLength - 1; i >= 0; i--)
+        {
+            if (Data[i])
+                return i * 64 + (63 - __builtin_clzll(Data[i]));
+        }
+        return -1;
+    }
+
    NonStupidBitField& operator|=(const NonStupidBitField<Size>& other)
    {
        for (u32 i = 0; i < DataLength; i++)
@ -195,6 +247,7 @@ struct NonStupidBitField
        }
        return *this;
    }
+
    NonStupidBitField& operator&=(const NonStupidBitField<Size>& other)
    {
        for (u32 i = 0; i < DataLength; i++)
@ -203,6 +256,20 @@ struct NonStupidBitField
        }
        return *this;
    }
+
+    operator bool() const
+    {
+        for (int i = 0; i < DataLength - 1; i++)
+        {
+            if (Data[i])
+                return true;
+        }
+        if (Data[DataLength-1] & ((Size&0x3F) ? ~(0xFFFFFFFFFFFFFFFF << (Size&0x3F)) : 0xFFFFFFFFFFFFFFFF))
+        {
+            return true;
+        }
+        return false;
+    }
 };

 }
--- a/src/OpenGLSupport.cpp
+++ b/src/OpenGLSupport.cpp
@ -18,6 +18,14 @@

 #include "OpenGLSupport.h"

+#include <unordered_map>
+#include <vector>
+
+#include <assert.h>
+
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash/xxhash.h"
+
 namespace melonDS
 {

@ -27,9 +35,158 @@ using Platform::LogLevel;
 namespace OpenGL
 {

-bool BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char* name)
+struct ShaderCacheEntry
+{
+    u32 Length;
+    u8* Data;
+    u32 BinaryFormat;
+
+    ShaderCacheEntry(u8* data, u32 length, u32 binaryFmt)
+        : Length(length), Data(data), BinaryFormat(binaryFmt)
+    {
+        assert(data != nullptr);
+    }
+
+    ShaderCacheEntry(const ShaderCacheEntry&) = delete;
+    ShaderCacheEntry(ShaderCacheEntry&& other)
+    {
+        Data = other.Data;
+        Length = other.Length;
+        BinaryFormat = other.BinaryFormat;
+
+        other.Data = nullptr;
+        other.Length = 0;
+        other.BinaryFormat = 0;
+    }
+
+    ~ShaderCacheEntry()
+    {
+        if (Data) // check whether it was moved
+            delete[] Data;
+    }
+};
+
+std::unordered_map<u64, ShaderCacheEntry> ShaderCache;
+std::vector<u64> NewShaders;
+
+constexpr u32 ShaderCacheMagic = 0x11CAC4E1;
+constexpr u32 ShaderCacheVersion = 1;
+
+void LoadShaderCache()
+{
+    // for now the shader cache only contains only compute shaders
+    // because they take the longest to compile
+    Platform::FileHandle* file = Platform::OpenLocalFile("shadercache", Platform::FileMode::Read);
+    if (file == nullptr)
+    {
+        Log(LogLevel::Error, "Could not find shader cache\n");
+        return;
+    }
+
+    u32 magic, version, numPrograms;
+    if (Platform::FileRead(&magic, 4, 1, file) != 1 || magic != ShaderCacheMagic)
+    {
+        Log(LogLevel::Error, "Shader cache file has invalid magic\n");
+        goto fileInvalid;
+    }
+
+    if (Platform::FileRead(&version, 4, 1, file) != 1 || version != ShaderCacheVersion)
+    {
+        Log(LogLevel::Error, "Shader cache file has bad version\n");
+        goto fileInvalid;
+    }
+
+    if (Platform::FileRead(&numPrograms, 4, 1, file) != 1)
+    {
+        Log(LogLevel::Error, "Shader cache file invalid program count\n");
+        goto fileInvalid;
+    }
+
+    // not the best approach, because once changes pile up
+    // we read and overwrite the old files
+    for (u32 i = 0; i < numPrograms; i++)
+    {
+        int error = 3;
+
+        u32 length, binaryFormat;
+        u64 sourceHash;
+        error -= Platform::FileRead(&sourceHash, 8, 1, file);
+        error -= Platform::FileRead(&length, 4, 1, file);
+        error -= Platform::FileRead(&binaryFormat, 4, 1, file);
+
+        if (error != 0)
+        {
+            Log(LogLevel::Error, "Invalid shader cache entry\n");
+            goto fileInvalid;
+        }
+
+        u8* data = new u8[length];
+        if (Platform::FileRead(data, length, 1, file) != 1)
+        {
+            Log(LogLevel::Error, "Could not read shader cache entry data\n");
+            delete[] data;
+            goto fileInvalid;
+        }
+
+        ShaderCache.erase(sourceHash);
+        ShaderCache.emplace(sourceHash, ShaderCacheEntry(data, length, binaryFormat));
+    }
+
+fileInvalid:
+    Platform::CloseFile(file);
+}
+
+void SaveShaderCache()
+{
+    Platform::FileHandle* file = Platform::OpenLocalFile("shadercache", Platform::FileMode::ReadWrite);
+
+    if (file == nullptr)
+    {
+        Log(LogLevel::Error, "Could not open or create shader cache file\n");
+        return;
+    }
+
+    int written = 3;
+    u32 magic = ShaderCacheMagic, version = ShaderCacheVersion, numPrograms = ShaderCache.size();
+    written -= Platform::FileWrite(&magic, 4, 1, file);
+    written -= Platform::FileWrite(&version, 4, 1, file);
+    written -= Platform::FileWrite(&numPrograms, 4, 1, file);
+
+    if (written != 0)
+    {
+        Log(LogLevel::Error, "Could not write shader cache header\n");
+        goto writeError;
+    }
+
+    Platform::FileSeek(file, 0, Platform::FileSeekOrigin::End);
+
+    printf("new shaders %d\n", NewShaders.size());
+
+    for (u64 newShader : NewShaders)
+    {
+        int error = 4;
+        auto it = ShaderCache.find(newShader);
+
+        error -= Platform::FileWrite(&it->first, 8, 1, file);
+        error -= Platform::FileWrite(&it->second.Length, 4, 1, file);
+        error -= Platform::FileWrite(&it->second.BinaryFormat, 4, 1, file);
+        error -= Platform::FileWrite(it->second.Data, it->second.Length, 1, file);
+
+        if (error != 0)
+        {
+            Log(LogLevel::Error, "Could not insert new shader cache entry\n");
+            goto writeError;
+        }
+    }
+
+writeError:
+    Platform::CloseFile(file);
+
+    NewShaders.clear();
+}
+
+bool CompilerShader(GLuint& id, const std::string& source, const std::string& name, const std::string& type)
 {
-    int len;
    int res;

    if (!glCreateShader)
@ -38,61 +195,32 @@ bool BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char*
        return false;
    }

-    ids[0] = glCreateShader(GL_VERTEX_SHADER);
-    len = strlen(vs);
-    glShaderSource(ids[0], 1, &vs, &len);
-    glCompileShader(ids[0]);
+    const char* sourceC = source.c_str();
+    int len = source.length();
+    glShaderSource(id, 1, &sourceC, &len);

-    glGetShaderiv(ids[0], GL_COMPILE_STATUS, &res);
+    glCompileShader(id);
+
+    glGetShaderiv(id, GL_COMPILE_STATUS, &res);
    if (res != GL_TRUE)
    {
-        glGetShaderiv(ids[0], GL_INFO_LOG_LENGTH, &res);
+        glGetShaderiv(id, GL_INFO_LOG_LENGTH, &res);
        if (res < 1) res = 1024;
        char* log = new char[res+1];
-        glGetShaderInfoLog(ids[0], res+1, NULL, log);
-        Log(LogLevel::Error, "OpenGL: failed to compile vertex shader %s: %s\n", name, log);
-        Log(LogLevel::Debug, "shader source:\n--\n%s\n--\n", vs);
+        glGetShaderInfoLog(id, res+1, NULL, log);
+        Log(LogLevel::Error, "OpenGL: failed to compile %s shader %s: %s\n", type.c_str(), name.c_str(), log);
+        Log(LogLevel::Debug, "shader source:\n--\n%s\n--\n", source.c_str());
        delete[] log;

-        glDeleteShader(ids[0]);
+        glDeleteShader(id);

        return false;
    }

-    ids[1] = glCreateShader(GL_FRAGMENT_SHADER);
-    len = strlen(fs);
-    glShaderSource(ids[1], 1, &fs, &len);
-    glCompileShader(ids[1]);
-
-    glGetShaderiv(ids[1], GL_COMPILE_STATUS, &res);
-    if (res != GL_TRUE)
-    {
-        glGetShaderiv(ids[1], GL_INFO_LOG_LENGTH, &res);
-        if (res < 1) res = 1024;
-        char* log = new char[res+1];
-        glGetShaderInfoLog(ids[1], res+1, NULL, log);
-        Log(LogLevel::Error, "OpenGL: failed to compile fragment shader %s: %s\n", name, log);
-        //printf("shader source:\n--\n%s\n--\n", fs);
-        delete[] log;
-
-        Platform::FileHandle* logf = Platform::OpenFile("shaderfail.log", Platform::FileMode::WriteText);
-        Platform::FileWrite(fs, len+1, 1, logf);
-        Platform::CloseFile(logf);
-
-        glDeleteShader(ids[0]);
-        glDeleteShader(ids[1]);
-
-        return false;
-    }
-
-    ids[2] = glCreateProgram();
-    glAttachShader(ids[2], ids[0]);
-    glAttachShader(ids[2], ids[1]);
-
    return true;
 }

-bool LinkShaderProgram(GLuint* ids)
+bool LinkProgram(GLuint& result, GLuint* ids, int numIds)
 {
    int res;

@ -102,46 +230,132 @@ bool LinkShaderProgram(GLuint* ids)
        return false;
    }

-    glLinkProgram(ids[2]);
+    for (int i = 0; i < numIds; i++)
+    {
+        glAttachShader(result, ids[i]);
+    }

-    glDetachShader(ids[2], ids[0]);
-    glDetachShader(ids[2], ids[1]);
+    glLinkProgram(result);

-    glDeleteShader(ids[0]);
-    glDeleteShader(ids[1]);
+    for (int i = 0; i < numIds; i++)
+        glDetachShader(result, ids[i]);

-    glGetProgramiv(ids[2], GL_LINK_STATUS, &res);
+    glGetProgramiv(result, GL_LINK_STATUS, &res);
    if (res != GL_TRUE)
    {
-        glGetProgramiv(ids[2], GL_INFO_LOG_LENGTH, &res);
+        glGetProgramiv(result, GL_INFO_LOG_LENGTH, &res);
        if (res < 1) res = 1024;
        char* log = new char[res+1];
-        glGetProgramInfoLog(ids[2], res+1, NULL, log);
+        glGetProgramInfoLog(result, res+1, NULL, log);
        Log(LogLevel::Error, "OpenGL: failed to link shader program: %s\n", log);
        delete[] log;

-        glDeleteProgram(ids[2]);
-
        return false;
    }

    return true;
 }

-void DeleteShaderProgram(GLuint* ids)
+bool CompileComputeProgram(GLuint& result, const std::string& source, const std::string& name)
 {
-    if (glDeleteProgram)
-    { // If OpenGL isn't loaded, then there's no shader program to delete
-        glDeleteProgram(ids[2]);
+    result = glCreateProgram();
+
+    /*u64 sourceHash = XXH64(source.data(), source.size(), 0);
+    auto it = ShaderCache.find(sourceHash);
+    if (it != ShaderCache.end())
+    {
+        glProgramBinary(result, it->second.BinaryFormat, it->second.Data, it->second.Length);
+
+        GLint linkStatus;
+        glGetProgramiv(result, GL_LINK_STATUS, &linkStatus);
+        if (linkStatus == GL_TRUE)
+        {
+            Log(LogLevel::Info, "Restored shader %s from cache\n", name.c_str());
+            return true;
        }
+        else
+        {
+        }
+    }*/
+    Log(LogLevel::Error, "Shader %s from cache was rejected\n", name.c_str());
+
+    GLuint shader;
+    bool linkingSucess = false;
+
+    if (!glCreateShader || !glDeleteShader)
+        goto error;
+
+    shader = glCreateShader(GL_COMPUTE_SHADER);
+
+    if (!CompilerShader(shader, source, name, "compute"))
+        goto error;
+
+    linkingSucess = LinkProgram(result, &shader, 1);
+
+error:
+    glDeleteShader(shader);
+
+    if (!linkingSucess)
+    {
+        glDeleteProgram(result);
+    }
+    /*else
+    {
+        GLint length;
+        GLenum format;
+        glGetProgramiv(result, GL_PROGRAM_BINARY_LENGTH, &length);
+
+        u8* buffer = new u8[length];
+        glGetProgramBinary(result, length, nullptr, &format, buffer);
+
+        ShaderCache.emplace(sourceHash, ShaderCacheEntry(buffer, length, format));
+        NewShaders.push_back(sourceHash);
+    }*/
+
+    return linkingSucess;
 }

-void UseShaderProgram(GLuint* ids)
+bool CompileVertexFragmentProgram(GLuint& result,
+    const std::string& vs, const std::string& fs,
+    const std::string& name,
+    const std::initializer_list<AttributeTarget>& vertexInAttrs,
+    const std::initializer_list<AttributeTarget>& fragmentOutAttrs)
 {
-    if (glUseProgram)
-    { // If OpenGL isn't loaded, then there's no shader program to use
-        glUseProgram(ids[2]);
+    GLuint shaders[2] =
+    {
+        glCreateShader(GL_VERTEX_SHADER),
+        glCreateShader(GL_FRAGMENT_SHADER)
+    };
+    result = glCreateProgram();
+
+    bool linkingSucess = false;
+
+    if (!CompilerShader(shaders[0], vs, name, "vertex"))
+        goto error;
+
+    if (!CompilerShader(shaders[1], fs, name, "fragment"))
+        goto error;
+
+
+    for (const AttributeTarget& target : vertexInAttrs)
+    {
+        glBindAttribLocation(result, target.Location, target.Name);
    }
+    for (const AttributeTarget& target : fragmentOutAttrs)
+    {
+        glBindFragDataLocation(result, target.Location, target.Name);
+    }
+
+    linkingSucess = LinkProgram(result, shaders, 2);
+
+error:
+    glDeleteShader(shaders[1]);
+    glDeleteShader(shaders[0]);
+
+    if (!linkingSucess)
+        glDeleteProgram(result);
+
+    return linkingSucess;
 }

 }
--- a/src/OpenGLSupport.h
+++ b/src/OpenGLSupport.h
@ -28,10 +28,23 @@
 namespace melonDS::OpenGL
 {

-bool BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char* name);
-bool LinkShaderProgram(GLuint* ids);
-void DeleteShaderProgram(GLuint* ids);
-void UseShaderProgram(GLuint* ids);
+void LoadShaderCache();
+void SaveShaderCache();
+
+struct AttributeTarget
+{
+    const char* Name;
+    u32 Location;
+};
+
+
+bool CompileVertexFragmentProgram(GLuint& result,
+    const std::string& vs, const std::string& fs,
+    const std::string& name,
+    const std::initializer_list<AttributeTarget>& vertexInAttrs,
+    const std::initializer_list<AttributeTarget>& fragmentOutAttrs);
+
+bool CompileComputeProgram(GLuint& result, const std::string& source, const std::string& name);

 }

--- a/src/debug/GdbProto.cpp
+++ b/src/debug/GdbProto.cpp
@ -1,6 +1,6 @@

 #ifdef _WIN32
-#include <WS2tcpip.h>
+#include <ws2tcpip.h>
 #include <winsock.h>
 #include <winsock2.h>
 #endif
--- a/src/debug/GdbStub.cpp
+++ b/src/debug/GdbStub.cpp
@ -1,6 +1,6 @@

 #ifdef _WIN32
-#include <WS2tcpip.h>
+#include <ws2tcpip.h>
 #include <winsock.h>
 #include <winsock2.h>
 #endif
--- a/src/debug/GdbStub.h
+++ b/src/debug/GdbStub.h
@ -3,6 +3,7 @@
 #define GDBSTUB_H_

 #include <stddef.h>
+#include <sys/types.h>
 #include <map>
 #include <vector>

--- a/src/frontend/libslirp/.clang-format
+++ b/src/frontend/libslirp/.clang-format
@ -0,0 +1,58 @@
+# https://clang.llvm.org/docs/ClangFormat.html
+# https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+---
+Language:        Cpp
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false # although we like it, it creates churn
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: true
+AlignOperands:   true
+AlignTrailingComments: false # churn
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account
+AlwaysBreakBeforeMultilineStrings: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   true
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeElse:      false
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+BreakBeforeTernaryOperators: false
+BreakStringLiterals: true
+ColumnLimit:     80
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+DisableFormat:   false
+IndentCaseLabels: false
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ?
+MacroBlockEnd:   '.*_END$'
+MaxEmptyLinesToKeep: 2
+PointerAlignment: Right
+ReflowComments:  true
+SortIncludes:    false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInContainerLiterals: true
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Auto
+UseTab:          Never
+...
--- a/src/frontend/libslirp/.gitignore
+++ b/src/frontend/libslirp/.gitignore
@ -0,0 +1,11 @@
+*.[aod]
+*.gcda
+*.gcno
+*.gcov
+*.lib
+*.obj
+/build/
+/TAGS
+/cscope*
+/src/libslirp-version.h
+/tags
--- a/src/frontend/libslirp/.gitlab-ci.yml
+++ b/src/frontend/libslirp/.gitlab-ci.yml
@ -0,0 +1,110 @@
+image: fedora:latest
+
+variables:
+  DEPS: meson ninja-build
+        gcc libasan liblsan libubsan pkg-config glib2-devel
+        mingw64-gcc mingw64-pkg-config mingw64-glib2
+        clang-analyzer git-core
+
+before_script:
+  - dnf install -y $DEPS
+  - git fetch --tags https://gitlab.freedesktop.org/slirp/libslirp.git
+  - git describe
+
+build:
+  script:
+    - meson --werror build || (cat build/meson-logs/meson-log.txt && exit 1)
+    - ninja -C build
+    - (cd build && meson test) || (cat build/meson-logs/testlog.txt && exit 1)
+    - ninja -C build scan-build
+
+build-asan:
+  script:
+    - CFLAGS=-fsanitize=address meson --werror build || (cat build/meson-logs/meson-log.txt && exit 1)
+    - ninja -C build
+    - (cd build && ASAN_OPTIONS=detect_leaks=0 meson test) || (cat build/meson-logs/testlog.txt && exit 1)
+
+build-lsan:
+  script:
+    - CFLAGS=-fsanitize=leak meson --werror build || (cat build/meson-logs/meson-log.txt && exit 1)
+    - ninja -C build
+    - (cd build && meson test) || (cat build/meson-logs/testlog.txt && exit 1)
+
+build-usan:
+  script:
+    - CFLAGS=-fsanitize=undefined meson --werror build || (cat build/meson-logs/meson-log.txt && exit 1)
+    - ninja -C build
+    - (cd build && meson test) || (cat build/meson-logs/testlog.txt && exit 1)
+
+fuzz:
+  parallel:
+    matrix:
+      - TARGET: [arp, ip-header, udp, udp-h, tftp, dhcp, icmp, tcp, tcp-h, ndp, ip6-header, udp6, udp6-h, tftp6, icmp6, tcp6, tcp6-h]
+  script:
+    - CC=clang CXX=clang++ meson build -Dllvm-fuzz=true || (cat build/meson-logs/meson-log.txt && exit 1)
+    - ninja -C build
+    - build/fuzzing/fuzz-$TARGET -seed=1234 -runs=1000000 fuzzing/IN_$TARGET
+  artifacts:
+    when: on_failure
+    paths:
+      - crash-*
+      - leak-*
+      - oom-*
+      - timeout-*
+
+build-mingw64:
+  script:
+    - (mkdir buildw && cd buildw && mingw64-meson --werror) || (cat buildw/meson-logs/meson-log.txt && exit 1)
+    - ninja -C buildw
+
+Coverity:
+  only:
+    refs:
+      - master
+      - coverity
+  script:
+  - dnf update -y
+  - dnf install -y curl clang
+  - curl -o /tmp/cov-analysis-linux64.tgz https://scan.coverity.com/download/linux64
+    --form project=$COVERITY_SCAN_PROJECT_NAME --form token=$COVERITY_SCAN_TOKEN
+  - tar xfz /tmp/cov-analysis-linux64.tgz
+  - CC=clang meson build
+  - cov-analysis-linux64-*/bin/cov-build --dir cov-int ninja -C build
+  - tar cfz cov-int.tar.gz cov-int
+  - curl https://scan.coverity.com/builds?project=$COVERITY_SCAN_PROJECT_NAME
+    --form token=$COVERITY_SCAN_TOKEN --form email=$GITLAB_USER_EMAIL
+    --form file=@cov-int.tar.gz --form version="`git describe --tags`"
+    --form description="`git describe --tags` / $CI_COMMIT_TITLE / $CI_COMMIT_REF_NAME:$CI_PIPELINE_ID "
+
+integration-slirp4netns:
+  variables:
+    SLIRP4NETNS_VERSION: "v1.1.12"
+    # Consumed by `make benchmark`
+    BENCHMARK_IPERF3_DURATION: "10"
+  script:
+  # Install libslirp
+  - meson build
+  - ninja -C build install
+  # Register the path of libslirp.so.0
+  - echo /usr/local/lib64 >/etc/ld.so.conf.d/libslirp.conf
+  - ldconfig
+  # Install the dependencies of slirp4netns and its test suite
+  # TODO: install udhcpc for `slirp4netns/tests/test-slirp4netns-dhcp.sh` (currently skipped, due to lack of udhcpc)
+  - dnf install -y autoconf automake findutils iperf3 iproute iputils jq libcap-devel libseccomp-devel nmap-ncat util-linux
+  # Check whether the runner environment is configured correctly
+  - unshare -rn true || (echo Make sure you have relaxed seccomp and appamor && exit 1)
+  - unshare -rn ip tap add tap0 mode tap || (echo Make sure you have /dev/net/tun && exit 1)
+  # Install slirp4netns
+  - git clone https://github.com/rootless-containers/slirp4netns -b "${SLIRP4NETNS_VERSION}"
+  - cd slirp4netns
+  - ./autogen.sh
+  - ./configure
+  - make
+  - make install
+  - slirp4netns --version
+  # Run slirp4netns integration test
+  - make distcheck || (cat $(find . -name 'test-suite.log' ) && exit 1)
+  # Run benchmark test to ensure that libslirp can actually handle packets, with several MTU configurations
+  - make benchmark MTU=1500
+  - make benchmark MTU=512
+  - make benchmark MTU=65520
--- a/src/frontend/libslirp/.gitpublish
+++ b/src/frontend/libslirp/.gitpublish
@ -0,0 +1,3 @@
+[gitpublishprofile "default"]
+base = master
+to = slirp@lists.freedesktop.org
--- a/src/frontend/libslirp/CHANGELOG.md
+++ b/src/frontend/libslirp/CHANGELOG.md
@ -0,0 +1,238 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [4.8.0] - TODO
+
+## Security
+
+  - tcp: Fix testing for last fragment
+  - tftp: Fix use-after-free
+
+### Added
+
+  - Add support for Haiku !123
+  - ncsi: Add manufacturer's ID !122
+  - ncsi: Add Get Version ID command !122
+  - ncsi: Add out-of-band ethernet address !125
+  - ncsi: Add Mellanox Get Mac Address handler !125
+  - icmp6: Add echo request forwarding support
+  - Add fuzzing infrastructure
+
+### Fixed
+
+  - Fix missing cleanups
+  - windows: Build fixes
+  - ipv6: Use target address from Neighbor Advertisement !129
+  - dns: Reject domain-search when any entry ends with ".."
+  - dns: Use localhost as dns when /etc/resolv.conf empty !130
+  - icmp: Handle ICMP packets as IPPROTO_IP on BSD !133
+  - eth: pad ethernet frames to 60 bytes #34
+
+### Removed
+
+  - windows: Bump the minimum Windows version to Windows 7
+
+## [4.7.0] - 2022-04-26
+
+### Added
+
+  - Allow disabling the internal DHCP server !22
+  - icmp: Support falling back on trying a SOCK_RAW socket !92
+  - Support Unix sockets in hostfwd !103
+  - IPv6 DNS proxying support !110
+  - bootp: add support for UEFI HTTP boot !111
+  - New callback that supports CFI better !117
+
+### Fixed
+
+  - dhcp: Always send DHCP_OPT_LEN bytes in options !97
+  - Fix Haiku build !98 !99
+  - Fix memory leak when using libresolv !100
+  - Ensure sin6_scope_id is zero for global addresses !102
+  - resolv: fix IPv6 resolution on Darwin !104
+  - socket: Initialize so_type in socreate !109
+  - Handle ECONNABORTED from recv !116
+
+## [4.6.1] - 2021-06-18
+
+### Fixed
+
+ - Fix DHCP regression introduced in 4.6.0. !95
+
+## [4.6.0] - 2021-06-14
+
+### Added
+
+ - mbuf: Add debugging helpers for allocation. !90
+
+### Changed
+
+ -  Revert "Set macOS deployment target to macOS 10.4". !93
+
+### Fixed
+
+ - mtod()-related buffer overflows (CVE-2021-3592 #44, CVE-2021-3593 #45,
+   CVE-2021-3594 #47, CVE-2021-3595 #46).
+ - poll_fd: add missing fd registration for UDP and ICMP
+ - ncsi: make ncsi_calculate_checksum work with unaligned data. !89
+ - Various typos and doc fixes. !88
+
+## [4.5.0] - 2021-05-18
+
+### Added
+
+ - IPv6 forwarding. !62 !75 !77
+ - slirp_neighbor_info() to dump the ARP/NDP tables. !71
+
+### Changed
+
+ - Lazy guest address resolution for IPv6. !81
+ - Improve signal handling when spawning a child. !61
+ - Set macOS deployment target to macOS 10.4. !72
+ - slirp_add_hostfwd: Ensure all error paths set errno. !80
+ - More API documentation.
+
+### Fixed
+
+ - Assertion failure on unspecified IPv6 address. !86
+ - Disable polling for PRI on MacOS, fixing some closing streams issues. !73 
+ - Various memory leak fixes on fastq/batchq. !68
+ - Memory leak on IPv6 fast-send. !67
+ - Slow socket response on Windows. !64
+ - Misc build and code cleanups. !60 !63 !76 !79 !84
+
+## [4.4.0] - 2020-12-02
+
+### Added
+
+ - udp, udp6, icmp: handle TTL value. !48
+ - Enable forwarding ICMP errors. !49
+ - Add DNS resolving for iOS. !54
+
+### Changed
+
+ - Improve meson subproject() support. !53
+ - Removed Makefile-based build system. !56
+
+### Fixed
+
+ - socket: consume empty packets. !55
+ - check pkt_len before reading protocol header (CVE-2020-29129). !57
+ - ip_stripoptions use memmove (fixes undefined behaviour). !47
+ - various Coverity-related changes/fixes.
+
+## [4.3.1] - 2020-07-08
+
+### Changed
+
+ - A silent truncation could occur in `slirp_fmt()`, which will now print a
+   critical message. See also #22.
+
+### Fixed
+
+ - CVE-2020-10756 - Drop bogus IPv6 messages that could lead to data leakage.
+   See !44 and !42.
+ - Fix win32 builds by using the SLIRP_PACKED definition.
+ - Various coverity scan errors fixed. !41
+ - Fix new GCC warnings. !43
+
+## [4.3.0] - 2020-04-22
+
+### Added
+
+ - `SLIRP_VERSION_STRING` macro, with the git sha suffix when building from git
+ - `SlirpConfig.disable_dns`, to disable DNS redirection #16
+
+### Changed
+
+ - `slirp_version_string()` now has the git sha suffix when building form git
+ - Limit DNS redirection to port 53 #16
+
+### Fixed
+
+ - Fix build regression with mingw & NetBSD
+ - Fix use-afte-free in `ip_reass()` (CVE-2020-1983)
+
+## [4.2.0] - 2020-03-17
+
+### Added
+
+ - New API function `slirp_add_unix`: add a forward rule to a Unix socket.
+ - New API function `slirp_remove_guestfwd`: remove a forward rule previously
+   added by `slirp_add_exec`, `slirp_add_unix` or `slirp_add_guestfwd`
+ - New `SlirpConfig.outbound_addr{,6}` fields to bind output socket to a
+   specific address
+
+### Changed
+
+ - socket: do not fallback on host loopback if `get_dns_addr()` failed
+   or the address is in slirp network
+
+### Fixed
+
+ - ncsi: fix checksum OOB memory access
+ - `tcp_emu()`: fix OOB accesses
+ - tftp: restrict relative path access
+ - state: fix loading of guestfwd state
+
+## [4.1.0] - 2019-12-02
+
+### Added
+
+ - The `slirp_new()` API, simpler and more extensible than `slirp_init()`.
+ - Allow custom MTU configuration.
+ - Option to disable host loopback connections.
+ - CI now runs scan-build too.
+
+### Changed
+
+ - Disable `tcp_emu()` by default. `tcp_emu()` is known to have caused
+   several CVEs, and not useful today in most cases. The feature can
+   be still enabled by setting `SlirpConfig.enable_emu` to true.
+ - meson build system is now `subproject()` friendly.
+ - Replace remaining `malloc()`/`free()` with glib (which aborts on OOM)
+ - Various code cleanups.
+
+### Deprecated
+
+ - The `slirp_init()` API.
+
+### Fixed
+
+ - `getpeername()` error after `shutdown(SHUT_WR)`.
+ - Exec forward: correctly parse command lines that contain spaces.
+ - Allow 0.0.0.0 destination address.
+ - Make host receive broadcast packets.
+ - Various memory related fixes (heap overflow, leaks, NULL
+   dereference).
+ - Compilation warnings, dead code.
+
+## [4.0.0] - 2019-05-24
+
+### Added
+
+ - Installable as a shared library.
+ - meson build system
+   (& make build system for in-tree QEMU integration)
+
+### Changed
+
+ - Standalone project, removing any QEMU dependency.
+ - License clarifications.
+
+[Unreleased]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.8.0...master
+[4.8.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.7.0...v4.8.0
+[4.7.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.6.1...v4.7.0
+[4.6.1]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.6.0...v4.6.1
+[4.6.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.5.0...v4.6.0
+[4.5.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.4.0...v4.5.0
+[4.4.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.3.1...v4.4.0
+[4.3.1]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.3.0...v4.3.1
+[4.3.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.2.0...v4.3.0
+[4.2.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.1.0...v4.2.0
+[4.1.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.0.0...v4.1.0
+[4.0.0]: https://gitlab.freedesktop.org/slirp/libslirp/commits/v4.0.0
--- a/src/frontend/libslirp/CMakeLists.txt
+++ b/src/frontend/libslirp/CMakeLists.txt
@ -0,0 +1,65 @@
+cmake_minimum_required(VERSION 3.16)
+
+project(libslirp VERSION 4.8.0 LANGUAGES C)
+
+set(SLIRP_MAJOR_VERSION "${libslirp_VERSION_MAJOR}")
+set(SLIRP_MINOR_VERSION "${libslirp_VERSION_MINOR}")
+set(SLIRP_MICRO_VERSION "${libslirp_VERSION_PATCH}")
+set(SLIRP_VERSION_STRING "\"${libslirp_VERSION}\"")
+
+set(SOURCES
+	src/arp_table.c
+	src/bootp.c
+	src/cksum.c
+	src/dhcpv6.c
+	src/dnssearch.c
+	src/if.c
+	src/ip6_icmp.c
+	src/ip6_input.c
+	src/ip6_output.c
+	src/ip_icmp.c
+	src/ip_input.c
+	src/ip_output.c
+	src/mbuf.c
+	src/misc.c
+	src/ncsi.c
+	src/ndp_table.c
+	src/sbuf.c
+	src/slirp.c
+	src/socket.c
+	src/state.c
+	src/stream.c
+	src/tcp_input.c
+	src/tcp_output.c
+	src/tcp_subr.c
+	src/tcp_timer.c
+	src/tftp.c
+	src/udp6.c
+	src/udp.c
+	src/util.c
+	src/version.c
+	src/vmstate.c
+
+	# glib shim
+	glib/glib.c
+)
+
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/libslirp-version.h.in" "${CMAKE_CURRENT_BINARY_DIR}/libslirp-version.h")
+
+add_library(slirp STATIC ${SOURCES})
+target_compile_definitions(slirp PUBLIC LIBSLIRP_STATIC_BUILD)
+
+target_include_directories(slirp PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/glib")
+target_include_directories(slirp PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/src")
+target_include_directories(slirp PUBLIC "${CMAKE_CURRENT_BINARY_DIR}")
+
+target_compile_definitions(slirp PRIVATE BUILDING_LIBSLIRP)
+target_compile_definitions(slirp PRIVATE "G_LOG_DOMAIN=\"Slirp\"")
+
+if (WIN32)
+	target_link_libraries(slirp PRIVATE ws2_32 iphlpapi)
+elseif(HAIKU)
+	target_Link_libraries(slirp PRIVATE network)
+elseif(APPLE)
+	target_link_libraries(slirp PRIVATE resolv)
+endif()
--- a/src/frontend/libslirp/COPYRIGHT
+++ b/src/frontend/libslirp/COPYRIGHT
@ -0,0 +1,62 @@
+Slirp was written by Danny Gasparovski.
+Copyright (c), 1995,1996 All Rights Reserved.
+
+Slirp is free software; "free" as in you don't have to pay for it, and you
+are free to do whatever you want with it.  I do not accept any donations,
+monetary or otherwise, for Slirp.  Instead, I would ask you to pass this
+potential donation to your favorite charity.  In fact, I encourage
+*everyone* who finds Slirp useful to make a small donation to their
+favorite charity (for example, GreenPeace).  This is not a requirement, but
+a suggestion from someone who highly values the service they provide.
+
+The copyright terms and conditions:
+
+---BEGIN---
+
+ Copyright (c) 1995,1996 Danny Gasparovski.  All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+ 3. Neither the name of the copyright holder nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+---END---
+
+This basically means you can do anything you want with the software, except
+1) call it your own, and 2) claim warranty on it.  There is no warranty for
+this software.  None.  Nada.  If you lose a million dollars while using
+Slirp, that's your loss not mine.  So, ***USE AT YOUR OWN RISK!***.
+
+If these conditions cannot be met due to legal restrictions (E.g. where it
+is against the law to give out Software without warranty), you must cease
+using the software and delete all copies you have.
+
+Slirp uses code that is copyrighted by the following people/organizations:
+
+Juha Pirkola.
+Gregory M. Christy.
+The Regents of the University of California.
+Carnegie Mellon University.
+The Australian National University.
+RSA Data Security, Inc.
+
+Please read the top of each source file for the details on the various
+copyrights.
--- a/src/frontend/libslirp/README.md
+++ b/src/frontend/libslirp/README.md
@ -0,0 +1,60 @@
+# libslirp
+
+libslirp is a user-mode networking library used by virtual machines,
+containers or various tools.
+
+## Getting Started
+
+### Prerequisites
+
+A C compiler, meson and glib2 development libraries.
+
+(see also [.gitlab-ci.yml](.gitlab-ci.yml) DEPS variable for the list
+of dependencies on Fedora)
+
+### Building
+
+You may build and install the shared library with meson:
+
+``` sh
+meson build
+ninja -C build install
+```
+And configure QEMU with --enable-slirp=system to link against it.
+
+(QEMU may build with the submodule static library using --enable-slirp=git)
+
+### Testing
+
+Unfortunately, there are no automated tests available.
+
+You may run QEMU ``-net user`` linked with your development version.
+
+## Contributing
+
+Feel free to open issues on the [project
+issues](https://gitlab.freedesktop.org/slirp/libslirp/issues) page.
+
+You may clone the [gitlab
+project](https://gitlab.freedesktop.org/slirp/libslirp) and create a
+merge request.
+
+Contributing with gitlab allows gitlab workflow, tracking issues,
+running CI etc.
+
+Alternatively, you may send patches to slirp@lists.freedesktop.org
+mailing list.
+
+## Versioning
+
+We intend to use [libtool's
+versioning](https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html)
+for the shared libraries and use [SemVer](http://semver.org/) for
+project versions.
+
+For the versions available, see the [tags on this
+repository](https://gitlab.freedesktop.org/slirp/libslirp/releases).
+
+## License
+
+See the [COPYRIGHT](COPYRIGHT) file for details.
--- a/src/frontend/libslirp/fuzzing/IN_arp/arp.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_arp/arp.pcap
--- a/src/frontend/libslirp/fuzzing/IN_dhcp/dhcp.pkt
+++ b/src/frontend/libslirp/fuzzing/IN_dhcp/dhcp.pkt
--- a/src/frontend/libslirp/fuzzing/IN_dhcp/dhcp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_dhcp/dhcp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_icmp/icmp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_icmp/icmp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_icmp/ping_10-0-2-2.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_icmp/ping_10-0-2-2.pcap
--- a/src/frontend/libslirp/fuzzing/IN_icmp6/icmp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_icmp6/icmp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_icmp6/ndp.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_icmp6/ndp.pcap
@ -0,0 +1 @@
+../IN_ndp/ndp.pcap
--- a/src/frontend/libslirp/fuzzing/IN_icmp6/ping_10-0-2-2.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_icmp6/ping_10-0-2-2.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip-header/DNS_freedesktop_1-1-1-1.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip-header/DNS_freedesktop_1-1-1-1.pcap
@ -0,0 +1 @@
+../IN_udp/DNS_freedesktop_1-1-1-1.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip-header/dhcp.pkt
+++ b/src/frontend/libslirp/fuzzing/IN_ip-header/dhcp.pkt
@ -0,0 +1 @@
+../IN_dhcp/dhcp.pkt
--- a/src/frontend/libslirp/fuzzing/IN_ip-header/dhcp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip-header/dhcp_capture.pcap
@ -0,0 +1 @@
+../IN_dhcp/dhcp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip-header/icmp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip-header/icmp_capture.pcap
@ -0,0 +1 @@
+../IN_icmp/icmp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip-header/nc-10.0.2.2-8080.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip-header/nc-10.0.2.2-8080.pcap
@ -0,0 +1 @@
+../IN_tcp/nc-10.0.2.2-8080.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip-header/nc-ident.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip-header/nc-ident.pcap
@ -0,0 +1 @@
+../IN_tcp/nc-ident.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip-header/ping_10-0-2-2.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip-header/ping_10-0-2-2.pcap
@ -0,0 +1 @@
+../IN_icmp/ping_10-0-2-2.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip-header/tcp_qemucapt.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip-header/tcp_qemucapt.pcap
@ -0,0 +1 @@
+../IN_tcp/tcp_qemucapt.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip-header/tftp-get-blah.pkt
+++ b/src/frontend/libslirp/fuzzing/IN_ip-header/tftp-get-blah.pkt
@ -0,0 +1 @@
+../IN_tftp/tftp-get-blah.pkt
--- a/src/frontend/libslirp/fuzzing/IN_ip-header/tftp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip-header/tftp_capture.pcap
@ -0,0 +1 @@
+../IN_tftp/tftp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip-header/tftp_get_libslirp-txt.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip-header/tftp_get_libslirp-txt.pcap
@ -0,0 +1 @@
+../IN_tftp/tftp_get_libslirp-txt.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip6-header/DNS_freedesktop_1-1-1-1.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip6-header/DNS_freedesktop_1-1-1-1.pcap
@ -0,0 +1 @@
+../IN_udp6/DNS_freedesktop_1-1-1-1.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip6-header/icmp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip6-header/icmp_capture.pcap
@ -0,0 +1 @@
+../IN_icmp6/icmp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip6-header/ping_10-0-2-2.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip6-header/ping_10-0-2-2.pcap
@ -0,0 +1 @@
+../IN_icmp6/ping_10-0-2-2.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip6-header/tcp_qemucapt.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip6-header/tcp_qemucapt.pcap
@ -0,0 +1 @@
+../IN_tcp6/tcp_qemucapt.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip6-header/tftp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip6-header/tftp_capture.pcap
@ -0,0 +1 @@
+../IN_udp6/tftp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ip6-header/tftp_get_libslirp-txt.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ip6-header/tftp_get_libslirp-txt.pcap
@ -0,0 +1 @@
+../IN_udp6/tftp_get_libslirp-txt.pcap
--- a/src/frontend/libslirp/fuzzing/IN_ndp/ndp.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_ndp/ndp.pcap
--- a/src/frontend/libslirp/fuzzing/IN_tcp-d
+++ b/src/frontend/libslirp/fuzzing/IN_tcp-d
@ -0,0 +1 @@
+IN_tcp
--- a/src/frontend/libslirp/fuzzing/IN_tcp-h
+++ b/src/frontend/libslirp/fuzzing/IN_tcp-h
@ -0,0 +1 @@
+IN_tcp
--- a/src/frontend/libslirp/fuzzing/IN_tcp/nc-10.0.2.2-8080.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_tcp/nc-10.0.2.2-8080.pcap
--- a/src/frontend/libslirp/fuzzing/IN_tcp/nc-ident.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_tcp/nc-ident.pcap
--- a/src/frontend/libslirp/fuzzing/IN_tcp/tcp_qemucapt.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_tcp/tcp_qemucapt.pcap
--- a/src/frontend/libslirp/fuzzing/IN_tcp6-d
+++ b/src/frontend/libslirp/fuzzing/IN_tcp6-d
@ -0,0 +1 @@
+IN_tcp6
--- a/src/frontend/libslirp/fuzzing/IN_tcp6-h
+++ b/src/frontend/libslirp/fuzzing/IN_tcp6-h
@ -0,0 +1 @@
+IN_tcp6
--- a/src/frontend/libslirp/fuzzing/IN_tcp6/tcp_qemucapt.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_tcp6/tcp_qemucapt.pcap
--- a/src/frontend/libslirp/fuzzing/IN_tftp/tftp-get-blah.pkt
+++ b/src/frontend/libslirp/fuzzing/IN_tftp/tftp-get-blah.pkt
--- a/src/frontend/libslirp/fuzzing/IN_tftp/tftp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_tftp/tftp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_tftp/tftp_get_libslirp-txt.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_tftp/tftp_get_libslirp-txt.pcap
--- a/src/frontend/libslirp/fuzzing/IN_tftp6/tftp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_tftp6/tftp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_tftp6/tftp_get_libslirp-txt.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_tftp6/tftp_get_libslirp-txt.pcap
--- a/src/frontend/libslirp/fuzzing/IN_udp-h
+++ b/src/frontend/libslirp/fuzzing/IN_udp-h
@ -0,0 +1 @@
+IN_udp
--- a/src/frontend/libslirp/fuzzing/IN_udp/DNS_freedesktop_1-1-1-1.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_udp/DNS_freedesktop_1-1-1-1.pcap
--- a/src/frontend/libslirp/fuzzing/IN_udp/dhcp.pkt
+++ b/src/frontend/libslirp/fuzzing/IN_udp/dhcp.pkt
@ -0,0 +1 @@
+../IN_dhcp/dhcp.pkt
--- a/src/frontend/libslirp/fuzzing/IN_udp/dhcp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_udp/dhcp_capture.pcap
@ -0,0 +1 @@
+../IN_dhcp/dhcp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_udp/tftp-get-blah.pkt
+++ b/src/frontend/libslirp/fuzzing/IN_udp/tftp-get-blah.pkt
@ -0,0 +1 @@
+../IN_tftp/tftp-get-blah.pkt
--- a/src/frontend/libslirp/fuzzing/IN_udp/tftp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_udp/tftp_capture.pcap
@ -0,0 +1 @@
+../IN_tftp/tftp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_udp/tftp_get_libslirp-txt.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_udp/tftp_get_libslirp-txt.pcap
@ -0,0 +1 @@
+../IN_tftp/tftp_get_libslirp-txt.pcap
--- a/src/frontend/libslirp/fuzzing/IN_udp6-h
+++ b/src/frontend/libslirp/fuzzing/IN_udp6-h
@ -0,0 +1 @@
+IN_udp6
--- a/src/frontend/libslirp/fuzzing/IN_udp6/DNS_freedesktop_1-1-1-1.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_udp6/DNS_freedesktop_1-1-1-1.pcap
--- a/src/frontend/libslirp/fuzzing/IN_udp6/tftp_capture.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_udp6/tftp_capture.pcap
@ -0,0 +1 @@
+../IN_tftp6/tftp_capture.pcap
--- a/src/frontend/libslirp/fuzzing/IN_udp6/tftp_get_libslirp-txt.pcap
+++ b/src/frontend/libslirp/fuzzing/IN_udp6/tftp_get_libslirp-txt.pcap
@ -0,0 +1 @@
+../IN_tftp6/tftp_get_libslirp-txt.pcap
--- a/src/frontend/libslirp/fuzzing/README.md
+++ b/src/frontend/libslirp/fuzzing/README.md
@ -0,0 +1,59 @@
+# Fuzzing libslirp state and instructions
+
+## Current state
+We chose to use libFuzzer because of its custom mutator feature, which allows to keep coherent informations inside the packets being sent to libslirp. This ease the process of fuzzing as packets are less likely to be rejected early during processing them.
+
+In the current state, the `meson.build` file is not compatible with the original one used by libSlirp main repository but it should be easy to merge them in a clean way. Also **in the current state, it seems that there is a memory leak inside the fuzzing code**, which make it run out of memory. The current goal is to find and get rid of this leak to allow fuzzing for longer without the process being interrupted because of it.
+
+Six harness are currently available, more are to be added later to focus on other parts of the code :
+
+- **fuzz-ip-header** : the mutator focuses on the ip header field informations,
+- **fuzz-udp** : the mutator only work on udp packets, mutating the udp header and content, or only one or the other (-h,-d),
+- **fuzz-tcp** : the mutator targets tcp packets, header+data or only one or the other, or only one or the other (-h,-d),
+- **fuzz-icmp** : the mutator focuses on icmp packets,
+
+These harness should be good starting examples on how to fuzz libslirp using libFuzzer.
+
+## Running the fuzzer
+
+Building the fuzzers/harness requires the use of clang as libFuzzer is part of LLVM.
+You can build it running :
+
+`CC=clang meson build && ninja -C build`
+
+It will build the fuzzer in the ./build/fuzzing/ directory.
+
+A script named `fuzzing/coverage.py` is available to generate coverage informations. **It makes a lot of assumptions on the directory structure** and should be read before use.
+
+To run the fuzzer, simply run some of:
+
+- `build/fuzzing/fuzz-ip-header fuzzing/IN_ip-header`
+- `build/fuzzing/fuzz-udp fuzzing/IN_udp`
+- `build/fuzzing/fuzz-udp-h fuzzing/IN_udp-h`
+- `build/fuzzing/fuzz-tftp fuzzing/IN_tftp`
+- `build/fuzzing/fuzz-dhcp fuzzing/IN_dhcp`
+- `build/fuzzing/fuzz-icmp fuzzing/IN_icmp`
+- `build/fuzzing/fuzz-tcp fuzzing/IN_tcp`
+
+Your current directory should be a separate directory as crashes to it. New inputs found by the fuzzer will go directly in the `IN` folder.
+
+# Adding new files to the corpus
+
+In its current state, the fuzzing code is taking pcap files as input, we produced some using `tcpdump` on linux inside qemu with default settings.
+Those files should be captured using the `EN10MB (Ethernet)` data link type, this can be set with the flag `-y` but it seems this can't be done while listening on all interfaces (`-i any`).
+New files should give new coverage, to ensure a new file is usefull the `coverage.py` script (see next section) can be used to compare the coverage with and without that new file.
+
+# Coverage
+
+The `coverage.py` script allows to see coverage informations about the corpus. It makes a lot of assumptions on the directory structure so it should be read and probably modified before running it.
+It must be called with the protocol to cover: `python coverage.py udp report`.
+To generate coverage informations, the following flags are passed to the fuzzer and libslirp :
+
+- g
+- fsanitize-coverage=edge,indirect-calls,trace-cmp
+- fprofile-instr-generate
+- fcoverage-mapping
+
+The last 2 arguments should also be passed to the linker.
+
+Then the `llvm-profdata` and `llvm-cov` tools can be used to generate a report and a fancy set of HTML files with line-coverage informations.
--- a/src/frontend/libslirp/fuzzing/coverage.py
+++ b/src/frontend/libslirp/fuzzing/coverage.py
@ -0,0 +1,37 @@
+from os import chdir,listdir,environ
+from os.path import isfile,join,isdir
+from subprocess import DEVNULL, run
+import sys
+
+ignored_files = "-ignore-filename-regex=glib -ignore-filename-regex=fuzz -ignore-filename-regex=helper -ignore-filename-regex=h$"
+
+if __name__ == "__main__":
+    chdir("build/fuzzing/out")
+    available_targets = [exe for exe in listdir("../") if isfile(join("..", exe))]
+    available_corpus_path = [exe for exe in listdir("../../../fuzzing/") if isdir(join("../../../fuzzing/", exe))]
+    available_result_types = ["export", "show", "report"]
+    if len(sys.argv) != 4 or sys.argv[1] not in available_targets or sys.argv[2] not in available_corpus_path or sys.argv[3] not in available_result_types:
+        print("usage : python coverage.py fuzz_target IN_protol result_type")
+        print(" - available targets : ")
+        print(available_targets)
+        print(" - available_corpus_path : ")
+        print(available_corpus_path)
+        print(" - available result types : ")
+        print(available_result_types)
+        exit(0)
+    fuzzing_target = sys.argv[1]
+    corpus_path = "../../../fuzzing/"+sys.argv[2]+"/"
+    result_type = sys.argv[3]
+    if fuzzing_target in available_targets:
+        environ["LLVM_PROFILE_FILE"] = fuzzing_target + "_%p.profraw"
+        corpus = listdir(corpus_path)
+        for f in corpus:
+            #print(corpus_path+f)
+            run(["../" + fuzzing_target, corpus_path+f,"-detect_leaks=0"], stdin=DEVNULL, stdout=DEVNULL, stderr=DEVNULL)
+        run(["llvm-profdata merge -sparse " + fuzzing_target + "_*.profraw -o " + fuzzing_target + ".profdata"], shell=True)
+        if result_type == "export" :
+            run(["llvm-cov show ../" + fuzzing_target + " -format=html -output-dir=../report -instr-profile=" + fuzzing_target + ".profdata " + ignored_files], shell=True)
+        elif result_type == "show" :
+            run(["llvm-cov show ../" + fuzzing_target + " -instr-profile=" + fuzzing_target + ".profdata " + ignored_files], shell=True)
+        else:
+            run(["llvm-cov report ../" + fuzzing_target + " -instr-profile=" + fuzzing_target + ".profdata " + ignored_files], shell=True)
--- a/src/frontend/libslirp/fuzzing/fuzz-input.options
+++ b/src/frontend/libslirp/fuzzing/fuzz-input.options
@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 1024
--- a/src/frontend/libslirp/fuzzing/fuzz-main.c
+++ b/src/frontend/libslirp/fuzzing/fuzz-main.c
@ -0,0 +1,35 @@
+#include <glib.h>
+#include <stdlib.h>
+
+#define MIN_NUMBER_OF_RUNS 1
+#define EXIT_TEST_SKIP 77
+
+extern int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size);
+
+int main(int argc, char **argv)
+{
+    int i, j;
+
+    for (i = 1; i < argc; i++) {
+        GError *err = NULL;
+        char *name = argv[i];
+        char *buf;
+        size_t size;
+
+        if (!g_file_get_contents(name, &buf, &size, &err)) {
+            g_warning("Failed to read '%s': %s", name, err->message);
+            g_clear_error(&err);
+            return EXIT_FAILURE;
+        }
+
+        g_print("%s...\n", name);
+        for (j = 0; j < MIN_NUMBER_OF_RUNS; j++) {
+            if (LLVMFuzzerTestOneInput((void *)buf, size) == EXIT_TEST_SKIP) {
+                return EXIT_TEST_SKIP;
+            }
+        }
+        g_free(buf);
+    }
+
+    return EXIT_SUCCESS;
+}
--- a/src/frontend/libslirp/fuzzing/helper.c
+++ b/src/frontend/libslirp/fuzzing/helper.c
@ -0,0 +1,271 @@
+#include "helper.h"
+#include <glib.h>
+#include <stdlib.h>
+#include "../src/libslirp.h"
+#include "../src/ip6.h"
+#include "slirp_base_fuzz.h"
+
+#define MIN_NUMBER_OF_RUNS 1
+#define EXIT_TEST_SKIP 77
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+struct in6_addr ip6_host;
+struct in6_addr ip6_dns;
+
+/// Function to compute the checksum of the ip header, should be compatible with
+/// TCP and UDP checksum calculation too.
+uint16_t compute_checksum(uint8_t *Data, size_t Size)
+{
+    uint32_t sum = 0;
+    uint16_t *Data_as_u16 = (uint16_t *)Data;
+
+    for (size_t i = 0; i < Size / 2; i++) {
+        uint16_t val = ntohs(*(Data_as_u16 + i));
+        sum += val;
+    }
+    if (Size % 2 == 1)
+        sum += Data[Size - 1] << 8;
+
+    uint16_t carry = sum >> 16;
+    uint32_t sum_val = carry + (sum & 0xFFFF);
+    uint16_t result = (sum_val >> 16) + (sum_val & 0xFFFF);
+    return ~result;
+}
+
+int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
+{
+    /* FIXME: fail on some addr? */
+    return 0;
+}
+
+int listen(int sockfd, int backlog)
+{
+    return 0;
+}
+
+int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
+{
+    /* FIXME: fail on some addr? */
+    return 0;
+}
+
+ssize_t send(int sockfd, const void *buf, size_t len, int flags)
+{
+    /* FIXME: partial send? */
+    return len;
+}
+
+ssize_t sendto(int sockfd, const void *buf, size_t len, int flags,
+               const struct sockaddr *dest_addr, socklen_t addrlen)
+{
+    /* FIXME: partial send? */
+    return len;
+}
+
+ssize_t recv(int sockfd, void *buf, size_t len, int flags)
+{
+    memset(buf, 0, len);
+    return len / 2;
+}
+
+ssize_t recvfrom(int sockfd, void *buf, size_t len, int flags,
+                 struct sockaddr *src_addr, socklen_t *addrlen)
+{
+    memset(buf, 0, len);
+    memset(src_addr, 0, *addrlen);
+    return len / 2;
+}
+
+int setsockopt(int sockfd, int level, int optname, const void *optval,
+               socklen_t optlen)
+{
+    return 0;
+}
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+static void empty_logging_func(const gchar *log_domain,
+                               GLogLevelFlags log_level, const gchar *message,
+                               gpointer user_data)
+{
+}
+#endif
+
+/* Disables logging for oss-fuzz. Must be used with each target. */
+static void fuzz_set_logging_func(void)
+{
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    g_log_set_default_handler(empty_logging_func, NULL);
+#endif
+}
+
+static ssize_t send_packet(const void *pkt, size_t pkt_len, void *opaque)
+{
+    return pkt_len;
+}
+
+static int64_t clock_get_ns(void *opaque)
+{
+    return 0;
+}
+
+static void *timer_new(SlirpTimerCb cb, void *cb_opaque, void *opaque)
+{
+    return NULL;
+}
+
+static void timer_mod(void *timer, int64_t expire_timer, void *opaque)
+{
+}
+
+static void timer_free(void *timer, void *opaque)
+{
+}
+
+static void guest_error(const char *msg, void *opaque)
+{
+}
+
+static void register_poll_fd(int fd, void *opaque)
+{
+}
+
+static void unregister_poll_fd(int fd, void *opaque)
+{
+}
+
+static void notify(void *opaque)
+{
+}
+
+static const SlirpCb slirp_cb = {
+    .send_packet = send_packet,
+    .guest_error = guest_error,
+    .clock_get_ns = clock_get_ns,
+    .timer_new = timer_new,
+    .timer_mod = timer_mod,
+    .timer_free = timer_free,
+    .register_poll_fd = register_poll_fd,
+    .unregister_poll_fd = unregister_poll_fd,
+    .notify = notify,
+};
+
+#define MAX_EVID 1024
+static int fake_events[MAX_EVID];
+
+static int add_poll_cb(int fd, int events, void *opaque)
+{
+    g_assert(fd < G_N_ELEMENTS(fake_events));
+    fake_events[fd] = events;
+    return fd;
+}
+
+static int get_revents_cb(int idx, void *opaque)
+{
+    return fake_events[idx] & ~(SLIRP_POLL_ERR | SLIRP_POLL_HUP);
+}
+
+// Fuzzing strategy is the following : 
+//  LLVMFuzzerTestOneInput :
+//      - build a slirp instance,
+//      - extract the packets from the pcap one by one,
+//      - send the data to `slirp_input`
+//      - call `slirp_pollfds_fill` and `slirp_pollfds_poll` to advance slirp
+//      - cleanup slirp when the whole pcap has been unwrapped.
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
+{
+    Slirp *slirp = NULL;
+    struct in_addr net = { .s_addr = htonl(0x0a000200) }; /* 10.0.2.0 */
+    struct in_addr mask = { .s_addr = htonl(0xffffff00) }; /* 255.255.255.0 */
+    struct in_addr host = { .s_addr = htonl(0x0a000202) }; /* 10.0.2.2 */
+    struct in_addr fwd = { .s_addr = htonl(0x0a000205) }; /* 10.0.2.5 */
+    struct in_addr dhcp = { .s_addr = htonl(0x0a00020f) }; /* 10.0.2.15 */
+    struct in_addr dns = { .s_addr = htonl(0x0a000203) }; /* 10.0.2.3 */
+    struct in6_addr ip6_prefix;
+    int ret, vprefix6_len = 64;
+    const char *vhostname = NULL;
+    const char *tftp_server_name = NULL;
+    const char *tftp_export = "fuzzing/tftp";
+    const char *bootfile = NULL;
+    const char **dnssearch = NULL;
+    const char *vdomainname = NULL;
+    const pcap_hdr_t *hdr = (const void *)data;
+    const pcaprec_hdr_t *rec = NULL;
+    uint32_t timeout = 0;
+
+    if (size < sizeof(pcap_hdr_t)) {
+        return 0;
+    }
+    data += sizeof(*hdr);
+    size -= sizeof(*hdr);
+
+    if (hdr->magic_number == 0xd4c3b2a1) {
+        g_debug("FIXME: byteswap fields");
+        return 0;
+    } /* else assume native pcap file */
+    if (hdr->network != 1) {
+        return 0;
+    }
+
+    setenv("SLIRP_FUZZING", "1", 0);
+
+    fuzz_set_logging_func();
+
+    ret = inet_pton(AF_INET6, "fec0::", &ip6_prefix);
+    g_assert_cmpint(ret, ==, 1);
+
+    ip6_host = ip6_prefix;
+    ip6_host.s6_addr[15] |= 2;
+    ip6_dns = ip6_prefix;
+    ip6_dns.s6_addr[15] |= 3;
+
+    slirp =
+        slirp_init(false, true, net, mask, host, true, ip6_prefix, vprefix6_len,
+                   ip6_host, vhostname, tftp_server_name, tftp_export, bootfile,
+                   dhcp, dns, ip6_dns, dnssearch, vdomainname, &slirp_cb, NULL);
+
+    slirp_add_exec(slirp, "cat", &fwd, 1234);
+
+
+    for ( ; size > sizeof(*rec); data += rec->incl_len, size -= rec->incl_len) {
+        rec = (const void *)data;
+        data += sizeof(*rec);
+        size -= sizeof(*rec);
+
+        if (rec->incl_len != rec->orig_len) {
+            g_debug("unsupported rec->incl_len != rec->orig_len");
+            break;
+        }
+        if (rec->incl_len > size) {
+            break;
+        }
+
+        if (rec->incl_len >= 14) {
+            if (data[12] == 0x08 && data[13] == 0x00) {
+                /* IPv4 */
+                if (rec->incl_len >= 14 + 16) {
+                    uint32_t ipsource = * (uint32_t*) (data + 14 + 12);
+
+                    // This an answer, which we will produce, so don't receive
+                    if (ipsource == htonl(0x0a000202) || ipsource == htonl(0x0a000203))
+                        continue;
+                }
+            } else if (data[12] == 0x86 && data[13] ==  0xdd) {
+                if (rec->incl_len >= 14 + 24) {
+                    struct in6_addr *ipsource = (struct in6_addr *) (data + 14 + 8);
+
+                    // This an answer, which we will produce, so don't receive
+                    if (in6_equal(ipsource, &ip6_host) || in6_equal(ipsource, &ip6_dns))
+                        continue;
+                }
+            }
+        }
+
+        slirp_input(slirp, data, rec->incl_len);
+        slirp_pollfds_fill(slirp, &timeout, add_poll_cb, NULL);
+        slirp_pollfds_poll(slirp, 0, get_revents_cb, NULL);
+    }
+
+    slirp_cleanup(slirp);
+
+    return 0;
+}
--- a/src/frontend/libslirp/fuzzing/helper.h
+++ b/src/frontend/libslirp/fuzzing/helper.h
@ -0,0 +1,24 @@
+#ifndef _HELPER_H
+#define _HELPER_H
+
+#ifdef _WIN32
+/* as defined in sdkddkver.h */
+#ifndef _WIN32_WINNT
+#define _WIN32_WINNT 0x0600 /* Vista */
+#endif
+#include <ws2tcpip.h>
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <netinet/in.h>
+
+#define PSEUDO_IP_SIZE (4*2 + 4)
+#define PSEUDO_IPV6_SIZE (16*2 + 4)
+
+uint16_t compute_checksum(uint8_t *Data, size_t Size);
+
+extern struct in6_addr ip6_host;
+extern struct in6_addr ip6_dns;
+
+#endif /* _HELPER_H */
--- a/Show More
+++ b/Show More