mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-29 17:19:44 -06:00
Lots of various changes. CPU detect fix. Maybe a minor speed increase. CPU bugs remain.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@180 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
@ -28,7 +28,7 @@
|
||||
// * Caller fixes stack after call
|
||||
// * function subtract from stack for local storage only.
|
||||
// Scratch: EAX ECX EDX
|
||||
// Callee-save: EBX ESI EDI EBP
|
||||
// Callee-save: EBX ESI EDI EBP
|
||||
// Parameters: -
|
||||
|
||||
// Windows 64-bit
|
||||
@ -103,6 +103,11 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
void ABI_PushAllCallerSavedRegsAndAdjustStack();
|
||||
void ABI_PopAllCallerSavedRegsAndAdjustStack();
|
||||
|
||||
#ifdef _M_IX86
|
||||
inline int ABI_GetNumXMMRegs() { return 8; }
|
||||
#else
|
||||
inline int ABI_GetNumXMMRegs() { return 16; }
|
||||
#endif
|
||||
|
||||
#endif // _JIT_ABI_H
|
||||
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
//#include <config/i386/cpuid.h>
|
||||
#include <xmmintrin.h>
|
||||
void __cpuid(int info[4], int x) {}
|
||||
void __cpuid(int info[4], int x) {memset(info, 0, sizeof(info));}
|
||||
|
||||
#endif
|
||||
|
||||
@ -72,94 +72,94 @@ void CPUInfoStruct::Detect()
|
||||
isAMD = true;
|
||||
}
|
||||
|
||||
// Get the information associated with each valid Id
|
||||
for (unsigned int i = 0; i <= nIds; ++i)
|
||||
if (nIds >= 2)
|
||||
{
|
||||
__cpuid(CPUInfo, i);
|
||||
// Get the information associated with each valid Id
|
||||
__cpuid(CPUInfo, 1);
|
||||
|
||||
nSteppingID = CPUInfo[0] & 0xf;
|
||||
nModel = (CPUInfo[0] >> 4) & 0xf;
|
||||
nFamily = (CPUInfo[0] >> 8) & 0xf;
|
||||
nProcessorType = (CPUInfo[0] >> 12) & 0x3;
|
||||
nExtendedmodel = (CPUInfo[0] >> 16) & 0xf;
|
||||
nExtendedfamily = (CPUInfo[0] >> 20) & 0xff;
|
||||
nBrandIndex = CPUInfo[1] & 0xff;
|
||||
nCLFLUSHcachelinesize = ((CPUInfo[1] >> 8) & 0xff) * 8;
|
||||
nAPICPhysicalID = (CPUInfo[1] >> 24) & 0xff;
|
||||
bSSE3 = (CPUInfo[2] & 0x1) || false;
|
||||
bSSSE3 = (CPUInfo[2] & 0x200) || false;
|
||||
bMONITOR_MWAIT = (CPUInfo[2] & 0x8) || false;
|
||||
bCPLQualifiedDebugStore = (CPUInfo[2] & 0x10) || false;
|
||||
bThermalMonitor2 = (CPUInfo[2] & 0x100) || false;
|
||||
nFeatureInfo = CPUInfo[3];
|
||||
|
||||
// Interpret CPU feature information.
|
||||
if (i == 1)
|
||||
if (CPUInfo[2] & (1 << 23))
|
||||
{
|
||||
nSteppingID = CPUInfo[0] & 0xf;
|
||||
nModel = (CPUInfo[0] >> 4) & 0xf;
|
||||
nFamily = (CPUInfo[0] >> 8) & 0xf;
|
||||
nProcessorType = (CPUInfo[0] >> 12) & 0x3;
|
||||
nExtendedmodel = (CPUInfo[0] >> 16) & 0xf;
|
||||
nExtendedfamily = (CPUInfo[0] >> 20) & 0xff;
|
||||
nBrandIndex = CPUInfo[1] & 0xff;
|
||||
nCLFLUSHcachelinesize = ((CPUInfo[1] >> 8) & 0xff) * 8;
|
||||
nAPICPhysicalID = (CPUInfo[1] >> 24) & 0xff;
|
||||
bSSE3NewInstructions = (CPUInfo[2] & 0x1) || false;
|
||||
bSSSE3NewInstructions = (CPUInfo[2] & 0x200) || false;
|
||||
bMONITOR_MWAIT = (CPUInfo[2] & 0x8) || false;
|
||||
bCPLQualifiedDebugStore = (CPUInfo[2] & 0x10) || false;
|
||||
bThermalMonitor2 = (CPUInfo[2] & 0x100) || false;
|
||||
nFeatureInfo = CPUInfo[3];
|
||||
bPOPCNT = true;
|
||||
}
|
||||
|
||||
if (CPUInfo[2] & (1 << 23))
|
||||
{
|
||||
bPOPCNT = true;
|
||||
}
|
||||
if (CPUInfo[2] & (1 << 19))
|
||||
{
|
||||
bSSE4_1 = true;
|
||||
}
|
||||
|
||||
if (CPUInfo[2] & (1 << 19))
|
||||
{
|
||||
bSSE4_1 = true;
|
||||
}
|
||||
|
||||
if (CPUInfo[2] & (1 << 20))
|
||||
{
|
||||
bSSE4_2 = true;
|
||||
}
|
||||
if (CPUInfo[2] & (1 << 20))
|
||||
{
|
||||
bSSE4_2 = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Calling __cpuid with 0x80000000 as the InfoType argument
|
||||
// gets the number of valid extended IDs.
|
||||
__cpuid(CPUInfo, 0x80000000);
|
||||
nExIds = CPUInfo[0];
|
||||
memset(CPUBrandString, 0, sizeof(CPUBrandString));
|
||||
|
||||
// Get the information associated with each extended ID.
|
||||
for (unsigned int i = 0x80000000; i <= nExIds; ++i)
|
||||
if (bSSE3)
|
||||
{
|
||||
__cpuid(CPUInfo, i);
|
||||
// Only SSE3 CPU-s support extended infotypes
|
||||
// Calling __cpuid with 0x80000000 as the InfoType argument
|
||||
// gets the number of valid extended IDs.
|
||||
__cpuid(CPUInfo, 0x80000000);
|
||||
nExIds = CPUInfo[0];
|
||||
memset(CPUBrandString, 0, sizeof(CPUBrandString));
|
||||
|
||||
// Interpret CPU brand string and cache information.
|
||||
if (i == 0x80000001)
|
||||
// Get the information associated with each extended ID.
|
||||
for (unsigned int i = 0x80000000; i <= nExIds; ++i)
|
||||
{
|
||||
// This block seems bugged.
|
||||
nFeatureInfo2 = CPUInfo[1]; // ECX
|
||||
bSSE5 = (nFeatureInfo2 & (1 << 11)) ? true : false;
|
||||
bLZCNT = (nFeatureInfo2 & (1 << 5)) ? true : false;
|
||||
bSSE4A = (nFeatureInfo2 & (1 << 6)) ? true : false;
|
||||
bLAHFSAHF64 = (nFeatureInfo2 & (1 << 0)) ? true : false;
|
||||
__cpuid(CPUInfo, i);
|
||||
|
||||
CPU64bit = (CPUInfo[2] & (1 << 29)) ? true : false;
|
||||
}
|
||||
else if (i == 0x80000002)
|
||||
{
|
||||
memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo));
|
||||
}
|
||||
else if (i == 0x80000003)
|
||||
{
|
||||
memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo));
|
||||
}
|
||||
else if (i == 0x80000004)
|
||||
{
|
||||
memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo));
|
||||
}
|
||||
else if (i == 0x80000006)
|
||||
{
|
||||
nCacheLineSize = CPUInfo[2] & 0xff;
|
||||
nL2Associativity = (CPUInfo[2] >> 12) & 0xf;
|
||||
nCacheSizeK = (CPUInfo[2] >> 16) & 0xffff;
|
||||
}
|
||||
else if (i == 0x80000008)
|
||||
{
|
||||
int numLSB = (CPUInfo[2] >> 12) & 0xF;
|
||||
numCores = 1 << numLSB;
|
||||
//int coresPerDie = CPUInfo[2] & 0xFF;
|
||||
// numCores = coresPerDie;
|
||||
// Interpret CPU brand string and cache information.
|
||||
if (i == 0x80000001)
|
||||
{
|
||||
// This block seems bugged.
|
||||
nFeatureInfo2 = CPUInfo[1]; // ECX
|
||||
bSSE5 = (nFeatureInfo2 & (1 << 11)) ? true : false;
|
||||
bLZCNT = (nFeatureInfo2 & (1 << 5)) ? true : false;
|
||||
bSSE4A = (nFeatureInfo2 & (1 << 6)) ? true : false;
|
||||
bLAHFSAHF64 = (nFeatureInfo2 & (1 << 0)) ? true : false;
|
||||
|
||||
CPU64bit = (CPUInfo[2] & (1 << 29)) ? true : false;
|
||||
}
|
||||
else if (i == 0x80000002)
|
||||
{
|
||||
memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo));
|
||||
}
|
||||
else if (i == 0x80000003)
|
||||
{
|
||||
memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo));
|
||||
}
|
||||
else if (i == 0x80000004)
|
||||
{
|
||||
memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo));
|
||||
}
|
||||
else if (i == 0x80000006)
|
||||
{
|
||||
nCacheLineSize = CPUInfo[2] & 0xff;
|
||||
nL2Associativity = (CPUInfo[2] >> 12) & 0xf;
|
||||
nCacheSizeK = (CPUInfo[2] >> 16) & 0xffff;
|
||||
}
|
||||
else if (i == 0x80000008)
|
||||
{
|
||||
int numLSB = (CPUInfo[2] >> 12) & 0xF;
|
||||
numCores = 1 << numLSB;
|
||||
//int coresPerDie = CPUInfo[2] & 0xFF;
|
||||
// numCores = coresPerDie;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -222,9 +222,9 @@ void CPUInfoStruct::Detect()
|
||||
nIds <<= 1;
|
||||
bFXSAVE_FXRSTOR = (nFeatureInfo & nIds) ? true : false;
|
||||
nIds <<= 1;
|
||||
bSSEExtensions = (nFeatureInfo & nIds) ? true : false;
|
||||
bSSE = (nFeatureInfo & nIds) ? true : false;
|
||||
nIds <<= 1;
|
||||
bSSE2Extensions = (nFeatureInfo & nIds) ? true : false;
|
||||
bSSE2 = (nFeatureInfo & nIds) ? true : false;
|
||||
nIds <<= 1;
|
||||
bSelfSnoop = (nFeatureInfo & nIds) ? true : false;
|
||||
nIds <<= 1;
|
||||
|
@ -77,16 +77,16 @@ struct CPUInfoStruct
|
||||
bool bThermalMonitorandClockCtrl;
|
||||
bool bMMXTechnology;
|
||||
bool bFXSAVE_FXRSTOR;
|
||||
bool bSSEExtensions;
|
||||
bool bSSE2Extensions;
|
||||
bool bSSE3NewInstructions;
|
||||
bool bSSSE3NewInstructions;
|
||||
bool bSelfSnoop;
|
||||
bool bHyper_threadingTechnology;
|
||||
bool bThermalMonitor;
|
||||
bool bUnknown4;
|
||||
bool bPendBrkEN;
|
||||
|
||||
bool bSSE;
|
||||
bool bSSE2;
|
||||
bool bSSE3;
|
||||
bool bSSSE3;
|
||||
bool bPOPCNT;
|
||||
bool bSSE4_1;
|
||||
bool bSSE4_2;
|
||||
|
@ -17,6 +17,7 @@ files = ["ABI.cpp",
|
||||
"PortableSockets.cpp",
|
||||
"StringUtil.cpp",
|
||||
"TestFramework.cpp",
|
||||
"Thunk.cpp",
|
||||
"Timer.cpp",
|
||||
"Thread.cpp",
|
||||
"x64Emitter.cpp",
|
||||
|
147
Source/Core/Common/Src/Thunk.cpp
Normal file
147
Source/Core/Common/Src/Thunk.cpp
Normal file
@ -0,0 +1,147 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "Common.h"
|
||||
#include "Thunk.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "MemoryUtil.h"
|
||||
#include "ABI.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
#define THUNK_ARENA_SIZE 1024*1024*1
|
||||
|
||||
namespace {
|
||||
static std::map<void *, const u8 *> thunks;
|
||||
u8 GC_ALIGNED32(saved_fp_state[16 * 4 * 4]);
|
||||
u8 GC_ALIGNED32(saved_gpr_state[16 * 8]);
|
||||
}
|
||||
|
||||
static u8 *thunk_memory;
|
||||
static u8 *thunk_code;
|
||||
static const u8 *save_regs;
|
||||
static const u8 *load_regs;
|
||||
u32 saved_return;
|
||||
|
||||
void Thunk_Init()
|
||||
{
|
||||
thunk_memory = (u8 *)AllocateExecutableMemory(THUNK_ARENA_SIZE);
|
||||
thunk_code = thunk_memory;
|
||||
|
||||
GenContext ctx(&thunk_code);
|
||||
save_regs = GetCodePtr();
|
||||
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
|
||||
MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i));
|
||||
#ifdef _M_X64
|
||||
MOV(64, M(saved_gpr_state + 0 ), R(RCX));
|
||||
MOV(64, M(saved_gpr_state + 8 ), R(RDX));
|
||||
MOV(64, M(saved_gpr_state + 16), R(R8) );
|
||||
MOV(64, M(saved_gpr_state + 24), R(R9) );
|
||||
MOV(64, M(saved_gpr_state + 32), R(R10));
|
||||
MOV(64, M(saved_gpr_state + 40), R(R11));
|
||||
#ifndef _WIN32
|
||||
MOV(64, M(saved_gpr_state + 48), R(RSI));
|
||||
MOV(64, M(saved_gpr_state + 56), R(RDI));
|
||||
#endif
|
||||
#else
|
||||
MOV(32, M(saved_gpr_state + 0 ), R(RCX));
|
||||
MOV(32, M(saved_gpr_state + 4 ), R(RDX));
|
||||
#endif
|
||||
RET();
|
||||
load_regs = GetCodePtr();
|
||||
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
|
||||
MOVAPS((X64Reg)(XMM0 + i), M(saved_fp_state + i * 16));
|
||||
#ifdef _M_X64
|
||||
MOV(64, R(RCX), M(saved_gpr_state + 0 ));
|
||||
MOV(64, R(RDX), M(saved_gpr_state + 8 ));
|
||||
MOV(64, R(R8) , M(saved_gpr_state + 16));
|
||||
MOV(64, R(R9) , M(saved_gpr_state + 24));
|
||||
MOV(64, R(R10), M(saved_gpr_state + 32));
|
||||
MOV(64, R(R11), M(saved_gpr_state + 40));
|
||||
#ifndef _WIN32
|
||||
MOV(64, R(RSI), M(saved_gpr_state + 48));
|
||||
MOV(64, R(RDI), M(saved_gpr_state + 56));
|
||||
#endif
|
||||
#else
|
||||
MOV(32, R(RCX), M(saved_gpr_state + 0 ));
|
||||
MOV(32, R(RDX), M(saved_gpr_state + 4 ));
|
||||
#endif
|
||||
RET();
|
||||
}
|
||||
|
||||
void Thunk_Reset()
|
||||
{
|
||||
thunks.clear();
|
||||
thunk_code = thunk_memory;
|
||||
}
|
||||
|
||||
void Thunk_Shutdown()
|
||||
{
|
||||
Thunk_Reset();
|
||||
FreeMemoryPages(thunk_memory, THUNK_ARENA_SIZE);
|
||||
thunk_memory = 0;
|
||||
thunk_code = 0;
|
||||
}
|
||||
|
||||
void *ProtectFunction(void *function, int num_params)
|
||||
{
|
||||
std::map<void *, const u8 *>::iterator iter;
|
||||
iter = thunks.find(function);
|
||||
if (iter != thunks.end())
|
||||
return (void *)iter->second;
|
||||
|
||||
if (!thunk_memory)
|
||||
PanicAlert("Trying to protect functions before the emu is started. Bad bad bad.");
|
||||
|
||||
GenContext gen(&thunk_code);
|
||||
const u8 *call_point = GetCodePtr();
|
||||
// Make sure to align stack.
|
||||
#ifdef _M_X64
|
||||
#ifdef _WIN32
|
||||
SUB(64, R(ESP), Imm8(0x28));
|
||||
#else
|
||||
SUB(64, R(ESP), Imm8(0x8));
|
||||
#endif
|
||||
CALL((void*)save_regs);
|
||||
CALL((void*)function);
|
||||
CALL((void*)load_regs);
|
||||
#ifdef _WIN32
|
||||
ADD(64, R(ESP), Imm8(0x28));
|
||||
#else
|
||||
ADD(64, R(ESP), Imm8(0x8));
|
||||
#endif
|
||||
RET();
|
||||
#else
|
||||
//INT3();
|
||||
CALL((void*)save_regs);
|
||||
// Re-push parameters from previous stack frame
|
||||
for (int i = 0; i < num_params; i++) {
|
||||
// ESP is changing, so we do not need i
|
||||
PUSH(32, MDisp(ESP, (num_params) * 4));
|
||||
}
|
||||
CALL(function);
|
||||
if (num_params)
|
||||
ADD(32, R(ESP), Imm8(num_params * 4));
|
||||
CALL((void*)load_regs);
|
||||
RET();
|
||||
#endif
|
||||
|
||||
thunks[function] = call_point;
|
||||
return (void *)call_point;
|
||||
}
|
39
Source/Core/Common/Src/Thunk.h
Normal file
39
Source/Core/Common/Src/Thunk.h
Normal file
@ -0,0 +1,39 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef _THUNK_H
|
||||
#define _THUNK_H
|
||||
|
||||
// This simple class creates a wrapper around a C/C++ function that saves all fp state
|
||||
// before entering it, and restores it upon exit. This is required to be able to selectively
|
||||
// call functions from generated code, without inflicting the performance hit and increase
|
||||
// of complexity that it means to protect the generated code from this problem.
|
||||
|
||||
// This process is called thunking.
|
||||
|
||||
// There will only ever be one level of thunking on the stack, plus,
|
||||
// we don't want to pollute the stack, so we store away regs somewhere global.
|
||||
// NOT THREAD SAFE. This may only be used from the CPU thread.
|
||||
// Any other thread using this stuff will be FATAL.
|
||||
|
||||
void Thunk_Init();
|
||||
void Thunk_Reset();
|
||||
void Thunk_Shutdown();
|
||||
|
||||
void *ProtectFunction(void *function, int num_params);
|
||||
|
||||
#endif
|
@ -1156,7 +1156,7 @@ namespace Gen
|
||||
void MOVDDUP(X64Reg regOp, OpArg arg)
|
||||
{
|
||||
// TODO(ector): check SSE3 flag
|
||||
if (cpu_info.bSSE3NewInstructions)
|
||||
if (cpu_info.bSSE3)
|
||||
{
|
||||
WriteSSEOp(64, 0x12, false, regOp, arg); //SSE3
|
||||
}
|
||||
@ -1205,7 +1205,7 @@ namespace Gen
|
||||
}
|
||||
|
||||
void PSHUFB(X64Reg dest, OpArg arg) {
|
||||
if (!cpu_info.bSSE3NewInstructions) {
|
||||
if (!cpu_info.bSSSE3) {
|
||||
PanicAlert("Trying to use PSHUFB on a system that doesn't support it. Bad programmer.");
|
||||
}
|
||||
Write8(0x66);
|
||||
|
@ -14,6 +14,9 @@
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!!
|
||||
|
||||
#ifndef _DOLPHIN_INTEL_CODEGEN
|
||||
#define _DOLPHIN_INTEL_CODEGEN
|
||||
|
||||
@ -92,6 +95,26 @@ namespace Gen
|
||||
const u8 *GetCodePtr();
|
||||
u8 *GetWritableCodePtr();
|
||||
|
||||
|
||||
// Safe way to temporarily redirect the code generator.
|
||||
class GenContext
|
||||
{
|
||||
u8 **code_ptr_ptr;
|
||||
u8 *saved_ptr;
|
||||
public:
|
||||
GenContext(u8 **code_ptr_ptr_)
|
||||
{
|
||||
saved_ptr = GetWritableCodePtr();
|
||||
code_ptr_ptr = code_ptr_ptr_;
|
||||
SetCodePtr(*code_ptr_ptr);
|
||||
}
|
||||
~GenContext()
|
||||
{
|
||||
*code_ptr_ptr = GetWritableCodePtr();
|
||||
SetCodePtr(saved_ptr);
|
||||
}
|
||||
};
|
||||
|
||||
enum NormalOp {
|
||||
nrmADD,
|
||||
nrmADC,
|
||||
|
Reference in New Issue
Block a user