diff --git a/src/GPU.cpp b/src/GPU.cpp index 1b272cc9..bb3d57fd 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -30,15 +30,17 @@ namespace melonDS using Platform::Log; using Platform::LogLevel; -#define LINE_CYCLES (355*6) +#define LINE_CYCLES (355*6) #define HBLANK_CYCLES (48+(256*6)) #define FRAME_CYCLES (LINE_CYCLES * 263) +#define READ_CYCLES (520) // CHECKME: Probably off by a little bit enum { LCD_StartHBlank = 0, LCD_StartScanline, LCD_FinishFrame, + LCD_ReadScanline, }; @@ -73,6 +75,8 @@ GPU::GPU(melonDS::NDS& nds, std::unique_ptr&& renderer3d, std::uniqu NDS.RegisterEventFunc(Event_LCD, LCD_StartHBlank, MemberEventFunc(GPU, StartHBlank)); NDS.RegisterEventFunc(Event_LCD, LCD_StartScanline, MemberEventFunc(GPU, StartScanline)); NDS.RegisterEventFunc(Event_LCD, LCD_FinishFrame, MemberEventFunc(GPU, FinishFrame)); + NDS.RegisterEventFunc(Event_LCD, LCD_ReadScanline, MemberEventFunc(GPU, ReadScanline)); + NDS.RegisterEventFunc(Event_DisplayFIFO, 0, MemberEventFunc(GPU, DisplayFIFO)); NDS.RegisterEventFunc(Event_DisplayFIFO, 0, MemberEventFunc(GPU, DisplayFIFO)); InitFramebuffers(); @@ -85,6 +89,7 @@ GPU::~GPU() noexcept NDS.UnregisterEventFunc(Event_LCD, LCD_StartHBlank); NDS.UnregisterEventFunc(Event_LCD, LCD_StartScanline); NDS.UnregisterEventFunc(Event_LCD, LCD_FinishFrame); + NDS.UnregisterEventFunc(Event_LCD, LCD_ReadScanline); NDS.UnregisterEventFunc(Event_DisplayFIFO, 0); } @@ -910,11 +915,10 @@ void GPU::StartHBlank(u32 line) noexcept if (DispStat[0] & (1<<4)) NDS.SetIRQ(0, IRQ_HBlank); if (DispStat[1] & (1<<4)) NDS.SetIRQ(1, IRQ_HBlank); - - if (VCount < 262) - NDS.ScheduleEvent(Event_LCD, true, (LINE_CYCLES - HBLANK_CYCLES), LCD_StartScanline, line+1); + if (VCount == 262 || VCount < 191) // this is probably wrong, but i haven't dug deep enough to prove it yet + NDS.ScheduleEvent(Event_LCD, true, (LINE_CYCLES - HBLANK_CYCLES - READ_CYCLES), LCD_ReadScanline, line); else - NDS.ScheduleEvent(Event_LCD, true, (LINE_CYCLES - HBLANK_CYCLES), LCD_FinishFrame, line+1); + NDS.ScheduleEvent(Event_LCD, true, (LINE_CYCLES - HBLANK_CYCLES), LCD_StartScanline, line+1); } void GPU::FinishFrame(u32 lines) noexcept @@ -949,6 +953,19 @@ void GPU::BlankFrame() noexcept TotalScanlines = 263; } +void GPU::ReadScanline(u32 line) noexcept +{ + int scanline; + scanline = (VCount == 262 ? 0 : (line+1)); + GPU3D.ScanlineSync(scanline); + if (GPU3D.UnderflowFlagVCount == scanline) GPU3D.DispCnt |= (1<<12); + + if (VCount != 262) + NDS.ScheduleEvent(Event_LCD, true, READ_CYCLES, LCD_StartScanline, line+1); + else + NDS.ScheduleEvent(Event_LCD, true, READ_CYCLES, LCD_FinishFrame, line+1); +} + void GPU::StartScanline(u32 line) noexcept { if (line == 0) @@ -1002,13 +1019,6 @@ void GPU::StartScanline(u32 line) noexcept NDS.ScheduleEvent(Event_DisplayFIFO, false, 32, 0, 0); } - if (VCount == GPU3D.UnderflowFlagVCount) - { - // appears to get set the vcount before the underflow occured? - // probably gets updated the instant the underflow happened, which might be annoying to work out with precision. - GPU3D.DispCnt |= (1<<12); - } - if (VCount == 262) { // frame end @@ -1020,7 +1030,7 @@ void GPU::StartScanline(u32 line) noexcept { if (VCount == 192) { - // in reality rendering already finishes at line 144 + // in reality rendering already finishes at line 144 (can take up to ~191 depending on load) // and games might already start to modify texture memory. // That doesn't matter for us because we cache the entire // texture memory anyway and only update it before the start diff --git a/src/GPU.h b/src/GPU.h index 780d5e01..e1f4b89d 100644 --- a/src/GPU.h +++ b/src/GPU.h @@ -506,6 +506,7 @@ public: void BlankFrame() noexcept; void StartScanline(u32 line) noexcept; void StartHBlank(u32 line) noexcept; + void ReadScanline(u32 line) noexcept; void DisplayFIFO(u32 x) noexcept; diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 4c177e4c..cbd2721c 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -2547,6 +2547,10 @@ void GPU3D::SetRenderXPos(u16 xpos) noexcept RenderXPos = xpos & 0x01FF; } +void GPU3D::ScanlineSync(int line) noexcept +{ + CurrentRenderer->ScanlineSync(line); +} u32* GPU3D::GetLine(int line) noexcept { diff --git a/src/GPU3D.h b/src/GPU3D.h index d35894d6..ada40fb1 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -114,6 +114,7 @@ public: void SetRenderXPos(u16 xpos) noexcept; [[nodiscard]] u16 GetRenderXPos() const noexcept { return RenderXPos; } + void ScanlineSync(int line) noexcept; u32* GetLine(int line) noexcept; void WriteToGXFIFO(u32 val) noexcept; @@ -454,6 +455,7 @@ public: virtual void RenderFrame(GPU& gpu) = 0; virtual void RestartFrame(GPU& gpu) {}; virtual u32* GetLine(int line) = 0; + virtual void ScanlineSync(int line) {}; virtual void Blit(const GPU& gpu) {}; virtual void PrepareCaptureFrame() {} protected: diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index c3b162d1..d99bbba6 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -1959,7 +1959,7 @@ void SoftRenderer::FinishPushScanline(s32 y, s32 pixelsremain) RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12);\ \ /* set the underflow flag if one of the scanlines came within 14 cycles of visible underflow */\ - if ((ScanlineTimeout <= RasterTiming) && (gpu.GPU3D.UnderflowFlagVCount == (u16)-1)) gpu.GPU3D.UnderflowFlagVCount = y-1; + if ((ScanlineTimeout <= RasterTiming) && (gpu.GPU3D.UnderflowFlagVCount == (u16)-1)) gpu.GPU3D.UnderflowFlagVCount = y - (y&1 ? 0 : 1); void SoftRenderer::RenderPolygonsFast(GPU& gpu, Polygon** polygons, int npolys) { @@ -2156,8 +2156,7 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu) RenderThreadRendering = false; } } - -u32* SoftRenderer::GetLine(int line) +void SoftRenderer::ScanlineSync(int line) { if (RenderThreadRunning.load(std::memory_order_relaxed)) { @@ -2167,7 +2166,10 @@ u32* SoftRenderer::GetLine(int line) // so we don't need to wait for a specific row) Platform::Semaphore_Wait(Sema_ScanlineCount); } +} +u32* SoftRenderer::GetLine(int line) +{ return &FinalBuffer[line * ScanlineWidth]; } diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 0c4baf79..4b1a8e52 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -40,6 +40,7 @@ public: void RenderFrame(GPU& gpu) override; void RestartFrame(GPU& gpu) override; u32* GetLine(int line) override; + void ScanlineSync(int line) override; void SetupRenderThread(GPU& gpu); void EnableRenderThread();