Compare commits

...

16 Commits

Author SHA1 Message Date
Jakly
a2b30c972a
Merge 19e8774ad0 into 7c1d2a64f4 2024-11-12 12:56:34 +01:00
Nadia Holmquist Pedersen
7c1d2a64f4 Set WIN32_LEAN_AND_MEAN, gets rid of the winsock2 warnings and probably
Some checks failed
macOS / ${{ matrix.arch }} (arm64) (push) Has been cancelled
macOS / ${{ matrix.arch }} (x86_64) (push) Has been cancelled
Ubuntu / x86_64 (push) Has been cancelled
Ubuntu / aarch64 (push) Has been cancelled
Windows / build (push) Has been cancelled
macOS / Universal binary (push) Has been cancelled
speeds up compilation a tiny bit

oh and NOMINMAX too for good measure while we're at it
2024-11-11 14:18:05 +01:00
Nadia Holmquist Pedersen
b2f6fab6f4 cmake: use interface include directories properly
and fix an indent I guess
2024-11-11 12:06:12 +01:00
Jaklyy
19e8774ad0 fix crash under freak circumstances 2024-05-10 05:54:46 -04:00
Jaklyy
60b28d846f make it more clear what's actually being done 2024-04-06 17:55:03 -04:00
Jaklyy
fd650cf133 minor clean up i forgot to do 2024-03-30 13:23:41 -04:00
Jaklyy
3f42215602 ok this makes a *lot* more sense 2024-03-30 08:28:04 -04:00
Jaklyy
8f450faa56 idk 2024-03-29 14:06:52 -04:00
Jaklyy
c5b9c3d36d optimization attempt 2024-03-29 13:38:12 -04:00
Jaklyy
9ee9389ee1 attempt to optimize quotient/remainder calc 2024-03-17 15:03:47 -04:00
Jaklyy
ee91d7f8f3 notes + optimization
should only compile to one div instruction per path now
2024-03-17 10:42:52 -04:00
Jaklyy
b6fa43b0cf implement approximation of z0 > z1 case 2024-03-16 22:10:21 -04:00
Jaklyy
672e6d03fa betterer approximation 2024-03-16 22:10:21 -04:00
Jaklyy
668e5580e0 better approximation 2024-03-16 22:10:21 -04:00
Jaklyy
2c457de681 rework to actually work 2024-03-16 22:10:21 -04:00
Jaklyy
f4bae5a5d5 best approximation so far 2024-03-16 22:10:21 -04:00
5 changed files with 55 additions and 65 deletions

View File

@ -127,6 +127,8 @@ if (ENABLE_JIT)
endif() endif()
endif() endif()
target_include_directories(core INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}")
set(MELONDS_VERSION_SUFFIX "$ENV{MELONDS_VERSION_SUFFIX}" CACHE STRING "Suffix to add to displayed melonDS version") set(MELONDS_VERSION_SUFFIX "$ENV{MELONDS_VERSION_SUFFIX}" CACHE STRING "Suffix to add to displayed melonDS version")
option(MELONDS_EMBED_BUILD_INFO "Embed detailed build info into the binary" OFF) option(MELONDS_EMBED_BUILD_INFO "Embed detailed build info into the binary" OFF)
set(MELONDS_GIT_BRANCH "$ENV{MELONDS_GIT_BRANCH}" CACHE STRING "The Git branch used for this build") set(MELONDS_GIT_BRANCH "$ENV{MELONDS_GIT_BRANCH}" CACHE STRING "The Git branch used for this build")
@ -178,13 +180,14 @@ endif()
if (WIN32) if (WIN32)
target_link_libraries(core PRIVATE ole32 comctl32 wsock32 ws2_32) target_link_libraries(core PRIVATE ole32 comctl32 wsock32 ws2_32)
target_compile_definitions(core PUBLIC WIN32_LEAN_AND_MEAN NOMINMAX)
elseif(NOT APPLE AND NOT HAIKU) elseif(NOT APPLE AND NOT HAIKU)
check_library_exists(rt shm_open "" NEED_LIBRT) check_library_exists(rt shm_open "" NEED_LIBRT)
if (NEED_LIBRT) if (NEED_LIBRT)
target_link_libraries(core PRIVATE rt) target_link_libraries(core PRIVATE rt)
endif() endif()
elseif(HAIKU) elseif(HAIKU)
target_link_libraries(core PRIVATE network) target_link_libraries(core PRIVATE network)
endif() endif()
if (ENABLE_JIT_PROFILING) if (ENABLE_JIT_PROFILING)

View File

@ -622,7 +622,7 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y
rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1], polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1],
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y, polygon->WBuffer);
} }
void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const
@ -649,7 +649,7 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32
rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1], polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1],
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y, polygon->WBuffer);
} }
void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const
@ -755,8 +755,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]);
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]);
// right vertical edges are pushed 1px to the left as long as either: // right vertical edges are pushed 1px to the left as long as either:
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
@ -841,7 +841,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
int edge; int edge;
s32 x = xstart; s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr); Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr);
if (x < 0) x = 0; if (x < 0) x = 0;
s32 xlimit; s32 xlimit;
@ -863,7 +863,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
@ -889,7 +889,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
@ -915,7 +915,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
@ -980,8 +980,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]);
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]);
// right vertical edges are pushed 1px to the left as long as either: // right vertical edges are pushed 1px to the left as long as either:
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
@ -1091,7 +1091,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
int edge; int edge;
s32 x = xstart; s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr); Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr);
if (x < 0) x = 0; if (x < 0) x = 0;
s32 xlimit; s32 xlimit;
@ -1130,7 +1130,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
// if depth test against the topmost pixel fails, test // if depth test against the topmost pixel fails, test
// against the pixel underneath // against the pixel underneath
@ -1226,7 +1226,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
// if depth test against the topmost pixel fails, test // if depth test against the topmost pixel fails, test
// against the pixel underneath // against the pixel underneath
@ -1318,7 +1318,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
// if depth test against the topmost pixel fails, test // if depth test against the topmost pixel fails, test
// against the pixel underneath // against the pixel underneath

View File

@ -69,23 +69,24 @@ private:
{ {
public: public:
constexpr Interpolator() {} constexpr Interpolator() {}
constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1) constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0, s32 z1)
{ {
Setup(x0, x1, w0, w1); Setup(x0, x1, w0, w1, wbuffer, z0, z1);
} }
constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1) constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0 = 0, s32 z1 = 0)
{ {
this->x0 = x0; this->x0 = x0;
this->x1 = x1; this->x1 = x1;
this->xdiff = x1 - x0; this->xdiff = x1 - x0;
this->wbuffer = wbuffer;
// calculate reciprocal for Z interpolation // calculate increment and init counter for Z interpolation
// TODO eventually: use a faster reciprocal function? if (!dir && !wbuffer && xdiff != 0)
if (this->xdiff != 0) {
this->xrecip_z = (1<<22) / this->xdiff; this->zincr = ((z1 - z0) >> 1) / xdiff << 1;
else this->zcounter = z0;
this->xrecip_z = 0; }
// linear mode is used if both W values are equal and have // linear mode is used if both W values are equal and have
// low-order bits cleared (0-6 along X, 1-6 along Y) // low-order bits cleared (0-6 along X, 1-6 along Y)
@ -144,7 +145,7 @@ private:
constexpr s32 Interpolate(s32 y0, s32 y1) const constexpr s32 Interpolate(s32 y0, s32 y1) const
{ {
if (xdiff == 0 || y0 == y1) return y0; if (x == 0 || xdiff == 0 || y0 == y1) return y0;
if (!linear) if (!linear)
{ {
@ -164,9 +165,9 @@ private:
} }
} }
constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const constexpr s32 InterpolateZ(s32 z0, s32 z1)
{ {
if (xdiff == 0 || z0 == z1) return z0; if (x == 0 || xdiff == 0 || z0 == z1) return z0;
if (wbuffer) if (wbuffer)
{ {
@ -179,37 +180,22 @@ private:
else else
{ {
// Z-buffering: linear interpolation // Z-buffering: linear interpolation
// still doesn't quite match hardware... // not perfect, but close
s32 base = 0, disp = 0, factor = 0;
if (z0 < z1)
{
base = z0;
disp = z1 - z0;
factor = x;
}
else
{
base = z1;
disp = z0 - z1,
factor = xdiff - x;
}
if (dir) if (dir)
{ {
int shift = 0; // interpolating along y uses a different algorithm than x
while (disp > 0x3FF) // this algo probably isn't quite right though...
{ if (z0 < z1)
disp >>= 1; return z0 + (s64)(z1-z0) * x / xdiff;
shift++; else
} return z1 + (s64)(z0-z1) * (xdiff-x) / xdiff;
return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift);
} }
else else
{ {
disp >>= 9; // unoptimized algorithm is: z0 + ((z1-z0 >> 1) / xdiff * x << 1);
return base + (((s64)disp * factor * xrecip_z) >> 13); // or alternatively there's: z0 + (z1-z0) / (xdiff<<1) * (x<<1);
return zcounter += zincr;
} }
} }
} }
@ -219,8 +205,10 @@ private:
int shift; int shift;
bool linear; bool linear;
bool wbuffer;
s32 xrecip_z; s32 zincr;
s32 zcounter;
s32 w0n, w0d, w1d; s32 w0n, w0d, w1d;
u32 yfactor; u32 yfactor;
@ -244,7 +232,7 @@ private:
Increment = 0; Increment = 0;
XMajor = false; XMajor = false;
Interp.Setup(0, 0, 0, 0); Interp.Setup(0, 0, 0, 0, false);
Interp.SetX(0); Interp.SetX(0);
xcov_incr = 0; xcov_incr = 0;
@ -252,7 +240,7 @@ private:
return x0; return x0;
} }
constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y, bool wbuffer)
{ {
this->x0 = x0; this->x0 = x0;
this->y = y; this->y = y;
@ -318,7 +306,7 @@ private:
s32 x = XVal(); s32 x = XVal();
int interpoffset = (Increment >= 0x40000) && (side ^ Negative); int interpoffset = (Increment >= 0x40000) && (side ^ Negative);
Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1); Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1, wbuffer);
Interp.SetX(y); Interp.SetX(y);
// used for calculating AA coverage // used for calculating AA coverage

View File

@ -91,8 +91,7 @@ add_compile_definitions(ARCHIVE_SUPPORT_ENABLED)
add_executable(melonDS ${SOURCES_QT_SDL}) add_executable(melonDS ${SOURCES_QT_SDL})
add_subdirectory("../../net" add_subdirectory("../../net"
"${CMAKE_BINARY_DIR}/net" ${CMAKE_BINARY_DIR}/net)
)
target_link_libraries(melonDS PRIVATE net-utils) target_link_libraries(melonDS PRIVATE net-utils)
@ -171,10 +170,10 @@ if (BUILD_STATIC)
endif() endif()
endif() endif()
target_include_directories(melonDS PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") target_include_directories(melonDS PUBLIC
target_include_directories(melonDS PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") "${CMAKE_CURRENT_SOURCE_DIR}"
target_include_directories(melonDS PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../..") "${CMAKE_CURRENT_SOURCE_DIR}/..")
target_include_directories(melonDS PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../net")
if (USE_QT6) if (USE_QT6)
target_include_directories(melonDS PUBLIC ${Qt6Gui_PRIVATE_INCLUDE_DIRS}) target_include_directories(melonDS PUBLIC ${Qt6Gui_PRIVATE_INCLUDE_DIRS})
else() else()

View File

@ -11,9 +11,9 @@ add_library(net-utils STATIC
MPInterface.cpp MPInterface.cpp
) )
target_include_directories(net-utils PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") target_include_directories(net-utils PUBLIC
target_include_directories(net-utils PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") "${CMAKE_CURRENT_SOURCE_DIR}"
"${CMAKE_CURRENT_SOURCE_DIR}/..")
option(USE_SYSTEM_LIBSLIRP "Use system libslirp instead of the bundled version" OFF) option(USE_SYSTEM_LIBSLIRP "Use system libslirp instead of the bundled version" OFF)
if (USE_SYSTEM_LIBSLIRP) if (USE_SYSTEM_LIBSLIRP)