Merge pull request #9441 from skylersaleh/master

Apple M1 Support for MacOS
This commit is contained in:
Léo Lam
2021-05-24 12:39:01 +02:00
committed by GitHub
22 changed files with 598 additions and 38 deletions

View File

@ -21,6 +21,9 @@
#ifdef _WIN32
#include <Windows.h>
#endif
#ifdef __APPLE__
#include <libkern/OSCacheControl.h>
#endif
namespace Arm64Gen
{
@ -342,7 +345,7 @@ void ARM64XEmitter::FlushIcacheSection(u8* start, u8* end)
if (start == end)
return;
#if defined(IOS)
#if defined(IOS) || defined(__APPLE__)
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);
#elif defined(WIN32)

View File

@ -8,7 +8,7 @@
#include <string>
#include <thread>
#ifndef _WIN32
#if !defined(_WIN32) && !defined(__APPLE__)
#ifndef __FreeBSD__
#include <asm/hwcap.h>
#endif
@ -71,7 +71,17 @@ void CPUInfo::Detect()
vendor = CPUVendor::ARM;
bFlushToZero = true;
#ifdef _WIN32
#ifdef __APPLE__
num_cores = std::thread::hardware_concurrency();
// M-series CPUs have all of these
bFP = true;
bASIMD = true;
bAES = true;
bSHA1 = true;
bSHA2 = true;
bCRC32 = true;
#elif defined(_WIN32)
num_cores = std::thread::hardware_concurrency();
// Windows does not provide any mechanism for querying the system registers on ARMv8, unlike Linux

View File

@ -16,6 +16,7 @@
#include <windows.h>
#include "Common/StringUtil.h"
#else
#include <pthread.h>
#include <stdio.h>
#include <sys/mman.h>
#include <sys/types.h>
@ -38,9 +39,15 @@ void* AllocateExecutableMemory(size_t size)
#if defined(_WIN32)
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
void* ptr =
mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
int map_flags = MAP_ANON | MAP_PRIVATE;
#if defined(__APPLE__)
// This check is in place to prepare for x86_64 MAP_JIT support. While MAP_JIT did exist
// prior to 10.14, it had restrictions on the number of JIT allocations that were removed
// in 10.14.
if (__builtin_available(macOS 10.14, *))
map_flags |= MAP_JIT;
#endif
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, map_flags, -1, 0);
if (ptr == MAP_FAILED)
ptr = nullptr;
#endif
@ -50,6 +57,79 @@ void* AllocateExecutableMemory(size_t size)
return ptr;
}
// This function is used to provide a counter for the JITPageWrite*Execute*
// functions to enable nesting. The static variable is wrapped in a a function
// to allow those functions to be called inside of the constructor of a static
// variable portably.
//
// The variable is thread_local as the W^X mode is specific to each running thread.
static int& JITPageWriteNestCounter()
{
static thread_local int nest_counter = 0;
return nest_counter;
}
// Certain platforms (Mac OS on ARM) enforce that a single thread can only have write or
// execute permissions to pages at any given point of time. The two below functions
// are used to toggle between having write permissions or execute permissions.
//
// The default state of these allocations in Dolphin is for them to be executable,
// but not writeable. So, functions that are updating these pages should wrap their
// writes like below:
// JITPageWriteEnableExecuteDisable();
// PrepareInstructionStreamForJIT();
// JITPageWriteDisableExecuteEnable();
// These functions can be nested, in which case execution will only be enabled
// after the call to the JITPageWriteDisableExecuteEnable from the top most
// nesting level. Example:
// [JIT page is in execute mode for the thread]
// JITPageWriteEnableExecuteDisable();
// [JIT page is in write mode for the thread]
// JITPageWriteEnableExecuteDisable();
// [JIT page is in write mode for the thread]
// JITPageWriteDisableExecuteEnable();
// [JIT page is in write mode for the thread]
// JITPageWriteDisableExecuteEnable();
// [JIT page is in execute mode for the thread]
// Allows a thread to write to executable memory, but not execute the data.
void JITPageWriteEnableExecuteDisable()
{
#if defined(_M_ARM_64) && defined(__APPLE__)
if (JITPageWriteNestCounter() == 0)
{
if (__builtin_available(macOS 11.0, *))
{
pthread_jit_write_protect_np(0);
}
}
#endif
JITPageWriteNestCounter()++;
}
// Allows a thread to execute memory allocated for execution, but not write to it.
void JITPageWriteDisableExecuteEnable()
{
JITPageWriteNestCounter()--;
// Sanity check the NestCounter to identify underflow
// This can indicate the calls to JITPageWriteDisableExecuteEnable()
// are not matched with previous calls to JITPageWriteEnableExecuteDisable()
if (JITPageWriteNestCounter() < 0)
PanicAlertFmt("JITPageWriteNestCounter() underflowed");
#if defined(_M_ARM_64) && defined(__APPLE__)
if (JITPageWriteNestCounter() == 0)
{
if (__builtin_available(macOS 11.0, *))
{
pthread_jit_write_protect_np(1);
}
}
#endif
}
void* AllocateMemoryPages(size_t size)
{
@ -128,7 +208,10 @@ void WriteProtectMemory(void* ptr, size_t size, bool allowExecute)
DWORD oldValue;
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldValue))
PanicAlertFmt("WriteProtectMemory failed!\nVirtualProtect: {}", GetLastErrorString());
#else
#elif !(defined(_M_ARM_64) && defined(__APPLE__))
// MacOS 11.2 on ARM does not allow for changing the access permissions of pages
// that were marked executable, instead it uses the protections offered by MAP_JIT
// for write protection.
if (mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_EXEC) : PROT_READ) != 0)
PanicAlertFmt("WriteProtectMemory failed!\nmprotect: {}", LastStrerrorString());
#endif
@ -140,7 +223,10 @@ void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute)
DWORD oldValue;
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldValue))
PanicAlertFmt("UnWriteProtectMemory failed!\nVirtualProtect: {}", GetLastErrorString());
#else
#elif !(defined(_M_ARM_64) && defined(__APPLE__))
// MacOS 11.2 on ARM does not allow for changing the access permissions of pages
// that were marked executable, instead it uses the protections offered by MAP_JIT
// for write protection.
if (mprotect(ptr, size,
allowExecute ? (PROT_READ | PROT_WRITE | PROT_EXEC) : PROT_WRITE | PROT_READ) != 0)
{

View File

@ -10,6 +10,23 @@
namespace Common
{
void* AllocateExecutableMemory(size_t size);
// These two functions control the executable/writable state of the W^X memory
// allocations. More detailed documentation about them is in the .cpp file.
// In general where applicable the ScopedJITPageWriteAndNoExecute wrapper
// should be used to prevent bugs from not pairing up the calls properly.
// Allows a thread to write to executable memory, but not execute the data.
void JITPageWriteEnableExecuteDisable();
// Allows a thread to execute memory allocated for execution, but not write to it.
void JITPageWriteDisableExecuteEnable();
// RAII Wrapper around JITPageWrite*Execute*(). When this is in scope the thread can
// write to executable memory but not execute it.
struct ScopedJITPageWriteAndNoExecute
{
ScopedJITPageWriteAndNoExecute() { JITPageWriteEnableExecuteDisable(); }
~ScopedJITPageWriteAndNoExecute() { JITPageWriteDisableExecuteEnable(); }
};
void* AllocateMemoryPages(size_t size);
void FreeMemoryPages(void* ptr, size_t size);
void* AllocateAlignedMemory(size_t size, size_t alignment);

View File

@ -289,11 +289,17 @@ void DolphinAnalytics::MakeBaseBuilder()
s64 minor_version; // NSInteger minorVersion
s64 patch_version; // NSInteger patchVersion
};
// Under arm64, we need to call objc_msgSend to recieve a struct.
// On x86_64, we need to explicitly call objc_msgSend_stret for a struct.
#if _M_ARM_64
#define msgSend objc_msgSend
#else
#define msgSend objc_msgSend_stret
#endif
// NSOperatingSystemVersion version = [processInfo operatingSystemVersion]
OSVersion version = reinterpret_cast<OSVersion (*)(id, SEL)>(objc_msgSend_stret)(
OSVersion version = reinterpret_cast<OSVersion (*)(id, SEL)>(msgSend)(
processInfo, sel_getUid("operatingSystemVersion"));
#undef msgSend
builder.AddData("osx-ver-major", version.major_version);
builder.AddData("osx-ver-minor", version.minor_version);
builder.AddData("osx-ver-bugfix", version.patch_version);

View File

@ -67,6 +67,12 @@ typedef x86_thread_state64_t SContext;
#define CTX_R14 __r14
#define CTX_R15 __r15
#define CTX_RIP __rip
#elif _M_ARM_64
typedef arm_thread_state64_t SContext;
#define CTX_REG(x) __x[x]
#define CTX_LR __x[30]
#define CTX_SP __sp
#define CTX_PC __pc
#else
#error No context definition for architecture
#endif

View File

@ -25,6 +25,20 @@
#include <unistd.h> // Needed for _POSIX_VERSION
#endif
#if defined(__APPLE__)
#ifdef _M_X86_64
#define THREAD_STATE64_COUNT x86_THREAD_STATE64_COUNT
#define THREAD_STATE64 x86_THREAD_STATE64
#define thread_state64_t x86_thread_state64_t
#elif defined(_M_ARM_64)
#define THREAD_STATE64_COUNT ARM_THREAD_STATE64_COUNT
#define THREAD_STATE64 ARM_THREAD_STATE64
#define thread_state64_t arm_thread_state64_t
#else
#error Unsupported architecture
#endif
#endif
namespace EMM
{
#ifdef _WIN32
@ -123,7 +137,7 @@ static void ExceptionThread(mach_port_t port)
int64_t code[2];
int flavor;
mach_msg_type_number_t old_stateCnt;
natural_t old_state[x86_THREAD_STATE64_COUNT];
natural_t old_state[THREAD_STATE64_COUNT];
mach_msg_trailer_t trailer;
} msg_in;
@ -134,7 +148,7 @@ static void ExceptionThread(mach_port_t port)
kern_return_t RetCode;
int flavor;
mach_msg_type_number_t new_stateCnt;
natural_t new_state[x86_THREAD_STATE64_COUNT];
natural_t new_state[THREAD_STATE64_COUNT];
} msg_out;
#pragma pack()
memset(&msg_in, 0xee, sizeof(msg_in));
@ -165,13 +179,13 @@ static void ExceptionThread(mach_port_t port)
return;
}
if (msg_in.flavor != x86_THREAD_STATE64)
if (msg_in.flavor != THREAD_STATE64)
{
PanicAlertFmt("unknown flavor {} (expected {})", msg_in.flavor, x86_THREAD_STATE64);
PanicAlertFmt("unknown flavor {} (expected {})", msg_in.flavor, THREAD_STATE64);
return;
}
x86_thread_state64_t* state = (x86_thread_state64_t*)msg_in.old_state;
thread_state64_t* state = (thread_state64_t*)msg_in.old_state;
bool ok = JitInterface::HandleFault((uintptr_t)msg_in.code[1], state);
@ -184,9 +198,9 @@ static void ExceptionThread(mach_port_t port)
if (ok)
{
msg_out.RetCode = KERN_SUCCESS;
msg_out.flavor = x86_THREAD_STATE64;
msg_out.new_stateCnt = x86_THREAD_STATE64_COUNT;
memcpy(msg_out.new_state, msg_in.old_state, x86_THREAD_STATE64_COUNT * sizeof(natural_t));
msg_out.flavor = THREAD_STATE64;
msg_out.new_stateCnt = THREAD_STATE64_COUNT;
memcpy(msg_out.new_state, msg_in.old_state, THREAD_STATE64_COUNT * sizeof(natural_t));
}
else
{
@ -218,7 +232,7 @@ void InstallExceptionHandler()
// Debuggers set the task port, so we grab the thread port.
CheckKR("thread_set_exception_ports",
thread_set_exception_ports(mach_thread_self(), EXC_MASK_BAD_ACCESS, port,
EXCEPTION_STATE | MACH_EXCEPTION_CODES, x86_THREAD_STATE64));
EXCEPTION_STATE | MACH_EXCEPTION_CODES, THREAD_STATE64));
// ...and get rid of our copy so that MACH_NOTIFY_NO_SENDERS works.
CheckKR("mach_port_mod_refs",
mach_port_mod_refs(mach_task_self(), port, MACH_PORT_RIGHT_SEND, -1));

View File

@ -73,6 +73,8 @@ void JitArm64::Init()
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
{
// Ifdef this since the exception handler runs on a separate thread on macOS (ARM)
#if !(defined(__APPLE__) && defined(_M_ARM_64))
// We can't handle any fault from other threads.
if (!Core::IsCPUThread())
{
@ -80,6 +82,7 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
DoBacktrace(access_address, ctx);
return false;
}
#endif
bool success = false;
@ -124,6 +127,7 @@ void JitArm64::ClearCache()
m_handler_to_loc.clear();
blocks.Clear();
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
ClearCodeSpace();
farcode.ClearCodeSpace();
UpdateMemoryOptions();
@ -596,6 +600,7 @@ void JitArm64::Jit(u32)
{
ClearCache();
}
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
std::size_t block_size = m_code_buffer.size();
const u32 em_address = PowerPC::ppcState.pc;

View File

@ -59,11 +59,11 @@ void JitArm64BlockCache::WriteLinkBlock(Arm64Gen::ARM64XEmitter& emit,
void JitArm64BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest)
{
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
u8* location = source.exitPtrs;
ARM64XEmitter emit(location);
WriteLinkBlock(emit, source, dest);
emit.FlushIcache();
}
@ -71,9 +71,8 @@ void JitArm64BlockCache::WriteDestroyBlock(const JitBlock& block)
{
// Only clear the entry points as we might still be within this block.
ARM64XEmitter emit(block.checkedEntry);
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
while (emit.GetWritableCodePtr() <= block.normalEntry)
emit.BRK(0x123);
emit.FlushIcache();
}

View File

@ -289,6 +289,7 @@ bool JitArm64::HandleFastmemFault(uintptr_t access_address, SContext* ctx)
if ((const u8*)ctx->CTX_PC - fault_location > fastmem_area_length)
return false;
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
ARM64XEmitter emitter((u8*)fault_location);
emitter.BL(slow_handler_iter->second.slowmem_code);
@ -300,6 +301,7 @@ bool JitArm64::HandleFastmemFault(uintptr_t access_address, SContext* ctx)
m_fault_to_handler.erase(slow_handler_iter);
emitter.FlushIcache();
ctx->CTX_PC = reinterpret_cast<std::uintptr_t>(fault_location);
return true;
}

View File

@ -25,6 +25,8 @@ using namespace Arm64Gen;
void JitArm64::GenerateAsm()
{
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
// This value is all of the callee saved registers that we are required to save.
// According to the AACPS64 we need to save R19 ~ R30 and Q8 ~ Q15.
const u32 ALL_CALLEE_SAVED = 0x7FF80000;

View File

@ -71,9 +71,8 @@ CPUCoreBase* InitJitCore(PowerPC::CPUCore core)
break;
default:
PanicAlertFmtT("The selected CPU emulation core ({0}) is not available. "
"Please select a different CPU emulation core in the settings.",
core);
// Under this case the caller overrides the CPU core to the default and logs that
// it performed the override.
g_jit = nullptr;
return nullptr;
}

View File

@ -473,6 +473,8 @@ if(APPLE)
set_target_properties(dolphin-emu PROPERTIES
MACOSX_BUNDLE true
MACOSX_BUNDLE_INFO_PLIST ${CMAKE_CURRENT_SOURCE_DIR}/Info.plist.in
XCODE_ATTRIBUTE_CODE_SIGN_ENTITLEMENTS "${CMAKE_CURRENT_SOURCE_DIR}/DolphinEmu.entitlements"
XCODE_ATTRIBUTE_OTHER_CODE_SIGN_FLAGS "--deep --options=runtime"
OUTPUT_NAME Dolphin
)
@ -516,6 +518,22 @@ if(APPLE)
POST_BUILD COMMAND
${CMAKE_INSTALL_NAME_TOOL} -add_rpath "@executable_path/../Frameworks/"
$<TARGET_FILE:dolphin-emu>)
if(MACOS_CODE_SIGNING)
# Code sign make file builds
add_custom_command(TARGET dolphin-emu
POST_BUILD COMMAND
/usr/bin/codesign -f -s "${MACOS_CODE_SIGNING_IDENTITY}" --deep --options=runtime --entitlements "${CMAKE_SOURCE_DIR}/Source/Core/DolphinQt/DolphinEmu.entitlements" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Dolphin.app" || true)
# Code sign builds for build systems that do have release/debug variants (Xcode)
add_custom_command(TARGET dolphin-emu
POST_BUILD COMMAND
/usr/bin/codesign -f -s "${MACOS_CODE_SIGNING_IDENTITY}" --deep --options=runtime --entitlements "${CMAKE_SOURCE_DIR}/Source/Core/DolphinQt/DolphinEmu.entitlements" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG}/Dolphin.app" || true)
add_custom_command(TARGET dolphin-emu
POST_BUILD COMMAND
/usr/bin/codesign -f -s "${MACOS_CODE_SIGNING_IDENTITY}" --deep --options=runtime --entitlements "${CMAKE_SOURCE_DIR}/Source/Core/DolphinQt/DolphinEmu.entitlements" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE}/Dolphin.app" || true)
endif()
else()
install(TARGETS dolphin-emu RUNTIME DESTINATION ${bindir})
endif()

View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>com.apple.security.cs.allow-jit</key>
<true/>
<!-- Needed for GameCube microphone emulation -->
<key>com.apple.security.device.audio-input</key>
<true/>
<!-- TODO: It is likely this requirement is coming from Qt, but should confirm -->
<key>com.apple.security.automation.apple-events</key>
<true/>
<!-- This is needed to use adhoc signed linked libraries -->
<key>com.apple.security.cs.disable-library-validation</key>
<true/>
</dict>
</plist>

View File

@ -13,6 +13,8 @@ set(SOURCES
add_executable(MacUpdater ${SOURCES})
set(MacUpdater_NAME "Dolphin Updater")
set(MacUpdater_BIN_DIR ${CMAKE_BINARY_DIR}/Binaries)
set(MacUpdater_BUNDLE_PATH ${MacUpdater_BIN_DIR}/${MacUpdater_NAME}.app)
set_target_properties(MacUpdater PROPERTIES
MACOSX_BUNDLE true
@ -53,8 +55,24 @@ foreach(sb ${STORYBOARDS})
add_custom_command(TARGET MacUpdater POST_BUILD
COMMAND ${IBTOOL} --errors --warnings --notices --output-format human-readable-text
--compile ${MacUpdater_BIN_DIR}/${MacUpdater_NAME}.app/Contents/Resources/${sb}c
--compile ${MacUpdater_BUNDLE_PATH}/Contents/Resources/${sb}c
${CMAKE_CURRENT_SOURCE_DIR}/${sb}
COMMENT "Compiling Storyboard ${sb}...")
endforeach()
if(MACOS_CODE_SIGNING)
if (MACOS_CODE_SIGNING_IDENTITY_UPDATER STREQUAL "")
set(MACOS_CODE_SIGNING_IDENTITY_UPDATER "${MACOS_CODE_SIGNING_IDENTITY}")
endif()
# Make file build code sign
add_custom_command(TARGET MacUpdater POST_BUILD
COMMAND test ${MacUpdater_BUNDLE_PATH} || /usr/bin/codesign -f -s "${MACOS_CODE_SIGNING_IDENTITY_UPDATER}" --deep --options runtime ${MacUpdater_BUNDLE_PATH})
# Xcode build code sign
add_custom_command(TARGET MacUpdater POST_BUILD
COMMAND test "${CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG}/${MacUpdater_NAME}.app" || /usr/bin/codesign -f -s "${MACOS_CODE_SIGNING_IDENTITY_UPDATER}" --deep --options runtime "${CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG}/${MacUpdater_NAME}.app")
add_custom_command(TARGET MacUpdater POST_BUILD
COMMAND test "${CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE}/${MacUpdater_NAME}.app" || /usr/bin/codesign -f -s "${MACOS_CODE_SIGNING_IDENTITY_UPDATER}" --deep --options runtime "${CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE}/${MacUpdater_NAME}.app")
endif()

View File

@ -140,7 +140,11 @@ static std::string GetPlatformID()
#if defined _WIN32
return "win";
#elif defined __APPLE__
#if defined(MACOS_UNIVERSAL_BUILD)
return "macos-universal";
#else
return "macos";
#endif
#else
return "unknown";
#endif

View File

@ -54,6 +54,7 @@ VertexLoaderARM64::VertexLoaderARM64(const TVtxDesc& vtx_desc, const VAT& vtx_at
: VertexLoaderBase(vtx_desc, vtx_att), m_float_emit(this)
{
AllocCodeSpace(4096);
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
ClearCodeSpace();
GenerateVertexLoader();
WriteProtect();