mirror of
https://git.ryujinx.app/ryubing/ryujinx.git
synced 2025-07-31 10:09:42 -06:00

Instead of one big 2048MB JIT Cache that'd crash the emulator when maxed out, we now have it where we add 256MB JIT Cache regions when needed, helping reduce allocated memory where games don't use the JIT cache for it, and helping bigger games that DO need JIT cache bigger than 2048MB!  (SSBU goes past the 2048MB JIT Cache limit that would normally crash Ryujinx ^) Also I added a BSD socket that Baba is You's networking for downloading custom levels uses.
341 lines
11 KiB
C#
341 lines
11 KiB
C#
using ARMeilleure.Memory;
|
|
using Ryujinx.Common;
|
|
using Ryujinx.Memory;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Diagnostics;
|
|
using System.Threading;
|
|
|
|
namespace Ryujinx.Cpu.LightningJit.Cache
|
|
{
|
|
class NoWxCache : IDisposable
|
|
{
|
|
private const int CodeAlignment = 4; // Bytes.
|
|
private const int SharedCacheSize = 2047 * 1024 * 1024;
|
|
private const int LocalCacheSize = 256 * 1024 * 1024;
|
|
|
|
// How many calls to the same function we allow until we pad the shared cache to force the function to become available there
|
|
// and allow the guest to take the fast path.
|
|
private const int MinCallsForPad = 8;
|
|
|
|
private class MemoryCache : IDisposable
|
|
{
|
|
private readonly ReservedRegion _region;
|
|
private readonly CacheMemoryAllocator _cacheAllocator;
|
|
|
|
public CacheMemoryAllocator Allocator => _cacheAllocator;
|
|
public nint Pointer => _region.Block.Pointer;
|
|
|
|
public MemoryCache(IJitMemoryAllocator allocator, ulong size)
|
|
{
|
|
_region = new(allocator, size);
|
|
_cacheAllocator = new((int)size);
|
|
}
|
|
|
|
public int Allocate(int codeSize)
|
|
{
|
|
codeSize = AlignCodeSize(codeSize);
|
|
|
|
int allocOffset = _cacheAllocator.Allocate(codeSize);
|
|
|
|
if (allocOffset < 0)
|
|
{
|
|
throw new OutOfMemoryException("JIT Cache exhausted.");
|
|
}
|
|
|
|
_region.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
|
|
|
|
return allocOffset;
|
|
}
|
|
|
|
public void Free(int offset, int size)
|
|
{
|
|
_cacheAllocator.Free(offset, size);
|
|
}
|
|
|
|
public void ReprotectAsRw(int offset, int size)
|
|
{
|
|
Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
|
Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
|
|
|
_region.Block.MapAsRw((ulong)offset, (ulong)size);
|
|
}
|
|
|
|
public void ReprotectAsRx(int offset, int size)
|
|
{
|
|
Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
|
Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
|
|
|
_region.Block.MapAsRx((ulong)offset, (ulong)size);
|
|
|
|
if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS())
|
|
{
|
|
JitSupportDarwin.SysIcacheInvalidate(_region.Block.Pointer + offset, size);
|
|
}
|
|
else
|
|
{
|
|
throw new PlatformNotSupportedException();
|
|
}
|
|
}
|
|
|
|
private static int AlignCodeSize(int codeSize)
|
|
{
|
|
return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
|
|
}
|
|
|
|
protected virtual void Dispose(bool disposing)
|
|
{
|
|
if (disposing)
|
|
{
|
|
_region.Dispose();
|
|
_cacheAllocator.Clear();
|
|
}
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
|
|
Dispose(disposing: true);
|
|
GC.SuppressFinalize(this);
|
|
}
|
|
}
|
|
|
|
private readonly IStackWalker _stackWalker;
|
|
private readonly Translator _translator;
|
|
private readonly MemoryCache _sharedCache;
|
|
private readonly MemoryCache _localCache;
|
|
private readonly PageAlignedRangeList _pendingMap;
|
|
private readonly Lock _lock = new();
|
|
|
|
class ThreadLocalCacheEntry
|
|
{
|
|
public readonly int Offset;
|
|
public readonly int Size;
|
|
public readonly nint FuncPtr;
|
|
private int _useCount;
|
|
|
|
public ThreadLocalCacheEntry(int offset, int size, nint funcPtr)
|
|
{
|
|
Offset = offset;
|
|
Size = size;
|
|
FuncPtr = funcPtr;
|
|
_useCount = 0;
|
|
}
|
|
|
|
public int IncrementUseCount()
|
|
{
|
|
return ++_useCount;
|
|
}
|
|
}
|
|
|
|
[ThreadStatic]
|
|
private static Dictionary<ulong, ThreadLocalCacheEntry> _threadLocalCache;
|
|
|
|
public NoWxCache(IJitMemoryAllocator allocator, IStackWalker stackWalker, Translator translator)
|
|
{
|
|
_stackWalker = stackWalker;
|
|
_translator = translator;
|
|
_sharedCache = new(allocator, SharedCacheSize);
|
|
_localCache = new(allocator, LocalCacheSize);
|
|
_pendingMap = new(_sharedCache.ReprotectAsRx, RegisterFunction);
|
|
}
|
|
|
|
public unsafe nint Map(nint framePointer, ReadOnlySpan<byte> code, ulong guestAddress, ulong guestSize)
|
|
{
|
|
if (TryGetThreadLocalFunction(guestAddress, out nint funcPtr))
|
|
{
|
|
return funcPtr;
|
|
}
|
|
|
|
lock (_lock)
|
|
{
|
|
if (!_pendingMap.Has(guestAddress) && !_translator.Functions.ContainsKey(guestAddress))
|
|
{
|
|
int funcOffset = _sharedCache.Allocate(code.Length);
|
|
|
|
funcPtr = _sharedCache.Pointer + funcOffset;
|
|
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
|
|
|
|
TranslatedFunction function = new(funcPtr, guestSize);
|
|
|
|
_pendingMap.Add(funcOffset, code.Length, guestAddress, function);
|
|
}
|
|
|
|
ClearThreadLocalCache(framePointer);
|
|
|
|
return AddThreadLocalFunction(code, guestAddress);
|
|
}
|
|
}
|
|
|
|
public unsafe nint MapPageAligned(ReadOnlySpan<byte> code)
|
|
{
|
|
lock (_lock)
|
|
{
|
|
// Ensure we will get an aligned offset from the allocator.
|
|
_pendingMap.Pad(_sharedCache.Allocator);
|
|
|
|
int sizeAligned = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
|
|
int funcOffset = _sharedCache.Allocate(sizeAligned);
|
|
|
|
Debug.Assert((funcOffset & ((int)MemoryBlock.GetPageSize() - 1)) == 0);
|
|
|
|
nint funcPtr = _sharedCache.Pointer + funcOffset;
|
|
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
|
|
|
|
_sharedCache.ReprotectAsRx(funcOffset, sizeAligned);
|
|
|
|
return funcPtr;
|
|
}
|
|
}
|
|
|
|
private bool TryGetThreadLocalFunction(ulong guestAddress, out nint funcPtr)
|
|
{
|
|
if ((_threadLocalCache ??= new()).TryGetValue(guestAddress, out ThreadLocalCacheEntry entry))
|
|
{
|
|
if (entry.IncrementUseCount() >= MinCallsForPad)
|
|
{
|
|
// Function is being called often, let's make it available in the shared cache so that the guest code
|
|
// can take the fast path and stop calling the emulator to get the function from the thread local cache.
|
|
// To do that we pad all "pending" function until they complete a page of memory, allowing us to reprotect them as RX.
|
|
|
|
lock (_lock)
|
|
{
|
|
_pendingMap.Pad(_sharedCache.Allocator);
|
|
}
|
|
}
|
|
|
|
funcPtr = entry.FuncPtr;
|
|
|
|
return true;
|
|
}
|
|
|
|
funcPtr = nint.Zero;
|
|
|
|
return false;
|
|
}
|
|
|
|
private void ClearThreadLocalCache(nint framePointer)
|
|
{
|
|
// Try to delete functions that are already on the shared cache
|
|
// and no longer being executed.
|
|
|
|
if (_threadLocalCache == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
IEnumerable<ulong> callStack = _stackWalker.GetCallStack(
|
|
framePointer,
|
|
_localCache.Pointer,
|
|
LocalCacheSize,
|
|
_sharedCache.Pointer,
|
|
SharedCacheSize);
|
|
|
|
List<(ulong, ThreadLocalCacheEntry)> toDelete = [];
|
|
|
|
foreach ((ulong address, ThreadLocalCacheEntry entry) in _threadLocalCache)
|
|
{
|
|
// We only want to delete if the function is already on the shared cache,
|
|
// otherwise we will keep translating the same function over and over again.
|
|
bool canDelete = !_pendingMap.Has(address);
|
|
if (!canDelete)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// We can only delete if the function is not part of the current thread call stack,
|
|
// otherwise we will crash the program when the thread returns to it.
|
|
foreach (ulong funcAddress in callStack)
|
|
{
|
|
if (funcAddress >= (ulong)entry.FuncPtr && funcAddress < (ulong)entry.FuncPtr + (ulong)entry.Size)
|
|
{
|
|
canDelete = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (canDelete)
|
|
{
|
|
toDelete.Add((address, entry));
|
|
}
|
|
}
|
|
|
|
int pageSize = (int)MemoryBlock.GetPageSize();
|
|
|
|
foreach ((ulong address, ThreadLocalCacheEntry entry) in toDelete)
|
|
{
|
|
_threadLocalCache.Remove(address);
|
|
|
|
int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
|
|
|
|
_localCache.Free(entry.Offset, sizeAligned);
|
|
_localCache.ReprotectAsRw(entry.Offset, sizeAligned);
|
|
}
|
|
}
|
|
|
|
public void ClearEntireThreadLocalCache()
|
|
{
|
|
// Thread is exiting, delete everything.
|
|
|
|
if (_threadLocalCache == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
int pageSize = (int)MemoryBlock.GetPageSize();
|
|
|
|
foreach ((_, ThreadLocalCacheEntry entry) in _threadLocalCache)
|
|
{
|
|
int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
|
|
|
|
_localCache.Free(entry.Offset, sizeAligned);
|
|
_localCache.ReprotectAsRw(entry.Offset, sizeAligned);
|
|
}
|
|
|
|
_threadLocalCache.Clear();
|
|
_threadLocalCache = null;
|
|
}
|
|
|
|
private unsafe nint AddThreadLocalFunction(ReadOnlySpan<byte> code, ulong guestAddress)
|
|
{
|
|
int alignedSize = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
|
|
int funcOffset = _localCache.Allocate(alignedSize);
|
|
|
|
Debug.Assert((funcOffset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
|
|
|
nint funcPtr = _localCache.Pointer + funcOffset;
|
|
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
|
|
|
|
(_threadLocalCache ??= new()).Add(guestAddress, new(funcOffset, code.Length, funcPtr));
|
|
|
|
_localCache.ReprotectAsRx(funcOffset, alignedSize);
|
|
|
|
return funcPtr;
|
|
}
|
|
|
|
private void RegisterFunction(ulong address, TranslatedFunction func)
|
|
{
|
|
TranslatedFunction oldFunc = _translator.Functions.GetOrAdd(address, func.GuestSize, func);
|
|
|
|
Debug.Assert(oldFunc == func);
|
|
|
|
_translator.RegisterFunction(address, func);
|
|
}
|
|
|
|
protected virtual void Dispose(bool disposing)
|
|
{
|
|
if (disposing)
|
|
{
|
|
_localCache.Dispose();
|
|
_sharedCache.Dispose();
|
|
}
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
Dispose(disposing: true);
|
|
GC.SuppressFinalize(this);
|
|
}
|
|
}
|
|
}
|