Lots of various changes. CPU detect fix. Maybe a minor speed increase. CPU bugs remain.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@180 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard
2008-08-12 20:05:45 +00:00
parent 29102ecbc6
commit 575bdd9166
27 changed files with 400 additions and 192 deletions

View File

@ -28,7 +28,7 @@
// * Caller fixes stack after call
// * function subtract from stack for local storage only.
// Scratch: EAX ECX EDX
// Callee-save: EBX ESI EDI EBP
// Callee-save: EBX ESI EDI EBP
// Parameters: -
// Windows 64-bit
@ -103,6 +103,11 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack();
void ABI_PushAllCallerSavedRegsAndAdjustStack();
void ABI_PopAllCallerSavedRegsAndAdjustStack();
#ifdef _M_IX86
inline int ABI_GetNumXMMRegs() { return 8; }
#else
inline int ABI_GetNumXMMRegs() { return 16; }
#endif
#endif // _JIT_ABI_H

View File

@ -21,7 +21,7 @@
//#include <config/i386/cpuid.h>
#include <xmmintrin.h>
void __cpuid(int info[4], int x) {}
void __cpuid(int info[4], int x) {memset(info, 0, sizeof(info));}
#endif
@ -72,94 +72,94 @@ void CPUInfoStruct::Detect()
isAMD = true;
}
// Get the information associated with each valid Id
for (unsigned int i = 0; i <= nIds; ++i)
if (nIds >= 2)
{
__cpuid(CPUInfo, i);
// Get the information associated with each valid Id
__cpuid(CPUInfo, 1);
nSteppingID = CPUInfo[0] & 0xf;
nModel = (CPUInfo[0] >> 4) & 0xf;
nFamily = (CPUInfo[0] >> 8) & 0xf;
nProcessorType = (CPUInfo[0] >> 12) & 0x3;
nExtendedmodel = (CPUInfo[0] >> 16) & 0xf;
nExtendedfamily = (CPUInfo[0] >> 20) & 0xff;
nBrandIndex = CPUInfo[1] & 0xff;
nCLFLUSHcachelinesize = ((CPUInfo[1] >> 8) & 0xff) * 8;
nAPICPhysicalID = (CPUInfo[1] >> 24) & 0xff;
bSSE3 = (CPUInfo[2] & 0x1) || false;
bSSSE3 = (CPUInfo[2] & 0x200) || false;
bMONITOR_MWAIT = (CPUInfo[2] & 0x8) || false;
bCPLQualifiedDebugStore = (CPUInfo[2] & 0x10) || false;
bThermalMonitor2 = (CPUInfo[2] & 0x100) || false;
nFeatureInfo = CPUInfo[3];
// Interpret CPU feature information.
if (i == 1)
if (CPUInfo[2] & (1 << 23))
{
nSteppingID = CPUInfo[0] & 0xf;
nModel = (CPUInfo[0] >> 4) & 0xf;
nFamily = (CPUInfo[0] >> 8) & 0xf;
nProcessorType = (CPUInfo[0] >> 12) & 0x3;
nExtendedmodel = (CPUInfo[0] >> 16) & 0xf;
nExtendedfamily = (CPUInfo[0] >> 20) & 0xff;
nBrandIndex = CPUInfo[1] & 0xff;
nCLFLUSHcachelinesize = ((CPUInfo[1] >> 8) & 0xff) * 8;
nAPICPhysicalID = (CPUInfo[1] >> 24) & 0xff;
bSSE3NewInstructions = (CPUInfo[2] & 0x1) || false;
bSSSE3NewInstructions = (CPUInfo[2] & 0x200) || false;
bMONITOR_MWAIT = (CPUInfo[2] & 0x8) || false;
bCPLQualifiedDebugStore = (CPUInfo[2] & 0x10) || false;
bThermalMonitor2 = (CPUInfo[2] & 0x100) || false;
nFeatureInfo = CPUInfo[3];
bPOPCNT = true;
}
if (CPUInfo[2] & (1 << 23))
{
bPOPCNT = true;
}
if (CPUInfo[2] & (1 << 19))
{
bSSE4_1 = true;
}
if (CPUInfo[2] & (1 << 19))
{
bSSE4_1 = true;
}
if (CPUInfo[2] & (1 << 20))
{
bSSE4_2 = true;
}
if (CPUInfo[2] & (1 << 20))
{
bSSE4_2 = true;
}
}
// Calling __cpuid with 0x80000000 as the InfoType argument
// gets the number of valid extended IDs.
__cpuid(CPUInfo, 0x80000000);
nExIds = CPUInfo[0];
memset(CPUBrandString, 0, sizeof(CPUBrandString));
// Get the information associated with each extended ID.
for (unsigned int i = 0x80000000; i <= nExIds; ++i)
if (bSSE3)
{
__cpuid(CPUInfo, i);
// Only SSE3 CPU-s support extended infotypes
// Calling __cpuid with 0x80000000 as the InfoType argument
// gets the number of valid extended IDs.
__cpuid(CPUInfo, 0x80000000);
nExIds = CPUInfo[0];
memset(CPUBrandString, 0, sizeof(CPUBrandString));
// Interpret CPU brand string and cache information.
if (i == 0x80000001)
// Get the information associated with each extended ID.
for (unsigned int i = 0x80000000; i <= nExIds; ++i)
{
// This block seems bugged.
nFeatureInfo2 = CPUInfo[1]; // ECX
bSSE5 = (nFeatureInfo2 & (1 << 11)) ? true : false;
bLZCNT = (nFeatureInfo2 & (1 << 5)) ? true : false;
bSSE4A = (nFeatureInfo2 & (1 << 6)) ? true : false;
bLAHFSAHF64 = (nFeatureInfo2 & (1 << 0)) ? true : false;
__cpuid(CPUInfo, i);
CPU64bit = (CPUInfo[2] & (1 << 29)) ? true : false;
}
else if (i == 0x80000002)
{
memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo));
}
else if (i == 0x80000003)
{
memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo));
}
else if (i == 0x80000004)
{
memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo));
}
else if (i == 0x80000006)
{
nCacheLineSize = CPUInfo[2] & 0xff;
nL2Associativity = (CPUInfo[2] >> 12) & 0xf;
nCacheSizeK = (CPUInfo[2] >> 16) & 0xffff;
}
else if (i == 0x80000008)
{
int numLSB = (CPUInfo[2] >> 12) & 0xF;
numCores = 1 << numLSB;
//int coresPerDie = CPUInfo[2] & 0xFF;
// numCores = coresPerDie;
// Interpret CPU brand string and cache information.
if (i == 0x80000001)
{
// This block seems bugged.
nFeatureInfo2 = CPUInfo[1]; // ECX
bSSE5 = (nFeatureInfo2 & (1 << 11)) ? true : false;
bLZCNT = (nFeatureInfo2 & (1 << 5)) ? true : false;
bSSE4A = (nFeatureInfo2 & (1 << 6)) ? true : false;
bLAHFSAHF64 = (nFeatureInfo2 & (1 << 0)) ? true : false;
CPU64bit = (CPUInfo[2] & (1 << 29)) ? true : false;
}
else if (i == 0x80000002)
{
memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo));
}
else if (i == 0x80000003)
{
memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo));
}
else if (i == 0x80000004)
{
memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo));
}
else if (i == 0x80000006)
{
nCacheLineSize = CPUInfo[2] & 0xff;
nL2Associativity = (CPUInfo[2] >> 12) & 0xf;
nCacheSizeK = (CPUInfo[2] >> 16) & 0xffff;
}
else if (i == 0x80000008)
{
int numLSB = (CPUInfo[2] >> 12) & 0xF;
numCores = 1 << numLSB;
//int coresPerDie = CPUInfo[2] & 0xFF;
// numCores = coresPerDie;
}
}
}
@ -222,9 +222,9 @@ void CPUInfoStruct::Detect()
nIds <<= 1;
bFXSAVE_FXRSTOR = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1;
bSSEExtensions = (nFeatureInfo & nIds) ? true : false;
bSSE = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1;
bSSE2Extensions = (nFeatureInfo & nIds) ? true : false;
bSSE2 = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1;
bSelfSnoop = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1;

View File

@ -77,16 +77,16 @@ struct CPUInfoStruct
bool bThermalMonitorandClockCtrl;
bool bMMXTechnology;
bool bFXSAVE_FXRSTOR;
bool bSSEExtensions;
bool bSSE2Extensions;
bool bSSE3NewInstructions;
bool bSSSE3NewInstructions;
bool bSelfSnoop;
bool bHyper_threadingTechnology;
bool bThermalMonitor;
bool bUnknown4;
bool bPendBrkEN;
bool bSSE;
bool bSSE2;
bool bSSE3;
bool bSSSE3;
bool bPOPCNT;
bool bSSE4_1;
bool bSSE4_2;

View File

@ -17,6 +17,7 @@ files = ["ABI.cpp",
"PortableSockets.cpp",
"StringUtil.cpp",
"TestFramework.cpp",
"Thunk.cpp",
"Timer.cpp",
"Thread.cpp",
"x64Emitter.cpp",

View File

@ -0,0 +1,147 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include <map>
#include "Common.h"
#include "Thunk.h"
#include "x64Emitter.h"
#include "MemoryUtil.h"
#include "ABI.h"
using namespace Gen;
#define THUNK_ARENA_SIZE 1024*1024*1
namespace {
static std::map<void *, const u8 *> thunks;
u8 GC_ALIGNED32(saved_fp_state[16 * 4 * 4]);
u8 GC_ALIGNED32(saved_gpr_state[16 * 8]);
}
static u8 *thunk_memory;
static u8 *thunk_code;
static const u8 *save_regs;
static const u8 *load_regs;
u32 saved_return;
void Thunk_Init()
{
thunk_memory = (u8 *)AllocateExecutableMemory(THUNK_ARENA_SIZE);
thunk_code = thunk_memory;
GenContext ctx(&thunk_code);
save_regs = GetCodePtr();
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i));
#ifdef _M_X64
MOV(64, M(saved_gpr_state + 0 ), R(RCX));
MOV(64, M(saved_gpr_state + 8 ), R(RDX));
MOV(64, M(saved_gpr_state + 16), R(R8) );
MOV(64, M(saved_gpr_state + 24), R(R9) );
MOV(64, M(saved_gpr_state + 32), R(R10));
MOV(64, M(saved_gpr_state + 40), R(R11));
#ifndef _WIN32
MOV(64, M(saved_gpr_state + 48), R(RSI));
MOV(64, M(saved_gpr_state + 56), R(RDI));
#endif
#else
MOV(32, M(saved_gpr_state + 0 ), R(RCX));
MOV(32, M(saved_gpr_state + 4 ), R(RDX));
#endif
RET();
load_regs = GetCodePtr();
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS((X64Reg)(XMM0 + i), M(saved_fp_state + i * 16));
#ifdef _M_X64
MOV(64, R(RCX), M(saved_gpr_state + 0 ));
MOV(64, R(RDX), M(saved_gpr_state + 8 ));
MOV(64, R(R8) , M(saved_gpr_state + 16));
MOV(64, R(R9) , M(saved_gpr_state + 24));
MOV(64, R(R10), M(saved_gpr_state + 32));
MOV(64, R(R11), M(saved_gpr_state + 40));
#ifndef _WIN32
MOV(64, R(RSI), M(saved_gpr_state + 48));
MOV(64, R(RDI), M(saved_gpr_state + 56));
#endif
#else
MOV(32, R(RCX), M(saved_gpr_state + 0 ));
MOV(32, R(RDX), M(saved_gpr_state + 4 ));
#endif
RET();
}
void Thunk_Reset()
{
thunks.clear();
thunk_code = thunk_memory;
}
void Thunk_Shutdown()
{
Thunk_Reset();
FreeMemoryPages(thunk_memory, THUNK_ARENA_SIZE);
thunk_memory = 0;
thunk_code = 0;
}
void *ProtectFunction(void *function, int num_params)
{
std::map<void *, const u8 *>::iterator iter;
iter = thunks.find(function);
if (iter != thunks.end())
return (void *)iter->second;
if (!thunk_memory)
PanicAlert("Trying to protect functions before the emu is started. Bad bad bad.");
GenContext gen(&thunk_code);
const u8 *call_point = GetCodePtr();
// Make sure to align stack.
#ifdef _M_X64
#ifdef _WIN32
SUB(64, R(ESP), Imm8(0x28));
#else
SUB(64, R(ESP), Imm8(0x8));
#endif
CALL((void*)save_regs);
CALL((void*)function);
CALL((void*)load_regs);
#ifdef _WIN32
ADD(64, R(ESP), Imm8(0x28));
#else
ADD(64, R(ESP), Imm8(0x8));
#endif
RET();
#else
//INT3();
CALL((void*)save_regs);
// Re-push parameters from previous stack frame
for (int i = 0; i < num_params; i++) {
// ESP is changing, so we do not need i
PUSH(32, MDisp(ESP, (num_params) * 4));
}
CALL(function);
if (num_params)
ADD(32, R(ESP), Imm8(num_params * 4));
CALL((void*)load_regs);
RET();
#endif
thunks[function] = call_point;
return (void *)call_point;
}

View File

@ -0,0 +1,39 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _THUNK_H
#define _THUNK_H
// This simple class creates a wrapper around a C/C++ function that saves all fp state
// before entering it, and restores it upon exit. This is required to be able to selectively
// call functions from generated code, without inflicting the performance hit and increase
// of complexity that it means to protect the generated code from this problem.
// This process is called thunking.
// There will only ever be one level of thunking on the stack, plus,
// we don't want to pollute the stack, so we store away regs somewhere global.
// NOT THREAD SAFE. This may only be used from the CPU thread.
// Any other thread using this stuff will be FATAL.
void Thunk_Init();
void Thunk_Reset();
void Thunk_Shutdown();
void *ProtectFunction(void *function, int num_params);
#endif

View File

@ -1156,7 +1156,7 @@ namespace Gen
void MOVDDUP(X64Reg regOp, OpArg arg)
{
// TODO(ector): check SSE3 flag
if (cpu_info.bSSE3NewInstructions)
if (cpu_info.bSSE3)
{
WriteSSEOp(64, 0x12, false, regOp, arg); //SSE3
}
@ -1205,7 +1205,7 @@ namespace Gen
}
void PSHUFB(X64Reg dest, OpArg arg) {
if (!cpu_info.bSSE3NewInstructions) {
if (!cpu_info.bSSSE3) {
PanicAlert("Trying to use PSHUFB on a system that doesn't support it. Bad programmer.");
}
Write8(0x66);

View File

@ -14,6 +14,9 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!!
#ifndef _DOLPHIN_INTEL_CODEGEN
#define _DOLPHIN_INTEL_CODEGEN
@ -92,6 +95,26 @@ namespace Gen
const u8 *GetCodePtr();
u8 *GetWritableCodePtr();
// Safe way to temporarily redirect the code generator.
class GenContext
{
u8 **code_ptr_ptr;
u8 *saved_ptr;
public:
GenContext(u8 **code_ptr_ptr_)
{
saved_ptr = GetWritableCodePtr();
code_ptr_ptr = code_ptr_ptr_;
SetCodePtr(*code_ptr_ptr);
}
~GenContext()
{
*code_ptr_ptr = GetWritableCodePtr();
SetCodePtr(saved_ptr);
}
};
enum NormalOp {
nrmADD,
nrmADC,