From 575bdd9166330ee2cd77e72abcacf6dce2ed863b Mon Sep 17 00:00:00 2001 From: hrydgard Date: Tue, 12 Aug 2008 20:05:45 +0000 Subject: [PATCH] Lots of various changes. CPU detect fix. Maybe a minor speed increase. CPU bugs remain. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@180 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Common.vcproj | 8 + Source/Core/Common/Src/ABI.h | 7 +- Source/Core/Common/Src/CPUDetect.cpp | 158 +++++++++--------- Source/Core/Common/Src/CPUDetect.h | 8 +- Source/Core/Common/Src/SConscript | 1 + Source/Core/Common/Src/Thunk.cpp | 147 ++++++++++++++++ Source/Core/Common/Src/Thunk.h | 39 +++++ Source/Core/Common/Src/x64Emitter.cpp | 4 +- Source/Core/Common/Src/x64Emitter.h | 23 +++ Source/Core/Core/Src/Core.cpp | 5 +- Source/Core/Core/Src/HW/HW.cpp | 3 + Source/Core/Core/Src/HW/Memmap.cpp | 6 +- .../Core/Core/Src/HW/PeripheralInterface.cpp | 2 +- .../Interpreter/Interpreter_LoadStore.cpp | 36 ++-- Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 10 +- .../Core/Src/PowerPC/Jit64/JitBackpatch.cpp | 3 +- .../Core/Src/PowerPC/Jit64/JitRegCache.cpp | 9 +- .../Core/Core/Src/PowerPC/Jit64/JitRegCache.h | 1 - .../Core/Src/PowerPC/Jit64/Jit_Branch.cpp | 6 +- .../Src/PowerPC/Jit64/Jit_FloatingPoint.cpp | 1 + .../Core/Src/PowerPC/Jit64/Jit_Integer.cpp | 27 +-- .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 33 ++-- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 35 ++-- .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 14 +- .../Core/Src/PowerPC/Jit64/Jit_Paired.cpp | 2 - .../Src/PowerPC/Jit64/Jit_SystemRegisters.cpp | 2 +- Source/Core/DolphinWX/src/Main.cpp | 2 +- 27 files changed, 400 insertions(+), 192 deletions(-) create mode 100644 Source/Core/Common/Src/Thunk.cpp create mode 100644 Source/Core/Common/Src/Thunk.h diff --git a/Source/Core/Common/Common.vcproj b/Source/Core/Common/Common.vcproj index 03d5d9dbca..bdd518410d 100644 --- a/Source/Core/Common/Common.vcproj +++ b/Source/Core/Common/Common.vcproj @@ -659,6 +659,14 @@ RelativePath=".\Src\Thread.h" > + + + + diff --git a/Source/Core/Common/Src/ABI.h b/Source/Core/Common/Src/ABI.h index feb4431531..2c6f59fac6 100644 --- a/Source/Core/Common/Src/ABI.h +++ b/Source/Core/Common/Src/ABI.h @@ -28,7 +28,7 @@ // * Caller fixes stack after call // * function subtract from stack for local storage only. // Scratch: EAX ECX EDX -// Callee-save: EBX ESI EDI EBP +// Callee-save: EBX ESI EDI EBP // Parameters: - // Windows 64-bit @@ -103,6 +103,11 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack(); void ABI_PushAllCallerSavedRegsAndAdjustStack(); void ABI_PopAllCallerSavedRegsAndAdjustStack(); +#ifdef _M_IX86 +inline int ABI_GetNumXMMRegs() { return 8; } +#else +inline int ABI_GetNumXMMRegs() { return 16; } +#endif #endif // _JIT_ABI_H diff --git a/Source/Core/Common/Src/CPUDetect.cpp b/Source/Core/Common/Src/CPUDetect.cpp index 344b5b670e..298a9dfc51 100644 --- a/Source/Core/Common/Src/CPUDetect.cpp +++ b/Source/Core/Common/Src/CPUDetect.cpp @@ -21,7 +21,7 @@ //#include #include -void __cpuid(int info[4], int x) {} +void __cpuid(int info[4], int x) {memset(info, 0, sizeof(info));} #endif @@ -72,94 +72,94 @@ void CPUInfoStruct::Detect() isAMD = true; } - // Get the information associated with each valid Id - for (unsigned int i = 0; i <= nIds; ++i) + if (nIds >= 2) { - __cpuid(CPUInfo, i); + // Get the information associated with each valid Id + __cpuid(CPUInfo, 1); + + nSteppingID = CPUInfo[0] & 0xf; + nModel = (CPUInfo[0] >> 4) & 0xf; + nFamily = (CPUInfo[0] >> 8) & 0xf; + nProcessorType = (CPUInfo[0] >> 12) & 0x3; + nExtendedmodel = (CPUInfo[0] >> 16) & 0xf; + nExtendedfamily = (CPUInfo[0] >> 20) & 0xff; + nBrandIndex = CPUInfo[1] & 0xff; + nCLFLUSHcachelinesize = ((CPUInfo[1] >> 8) & 0xff) * 8; + nAPICPhysicalID = (CPUInfo[1] >> 24) & 0xff; + bSSE3 = (CPUInfo[2] & 0x1) || false; + bSSSE3 = (CPUInfo[2] & 0x200) || false; + bMONITOR_MWAIT = (CPUInfo[2] & 0x8) || false; + bCPLQualifiedDebugStore = (CPUInfo[2] & 0x10) || false; + bThermalMonitor2 = (CPUInfo[2] & 0x100) || false; + nFeatureInfo = CPUInfo[3]; - // Interpret CPU feature information. - if (i == 1) + if (CPUInfo[2] & (1 << 23)) { - nSteppingID = CPUInfo[0] & 0xf; - nModel = (CPUInfo[0] >> 4) & 0xf; - nFamily = (CPUInfo[0] >> 8) & 0xf; - nProcessorType = (CPUInfo[0] >> 12) & 0x3; - nExtendedmodel = (CPUInfo[0] >> 16) & 0xf; - nExtendedfamily = (CPUInfo[0] >> 20) & 0xff; - nBrandIndex = CPUInfo[1] & 0xff; - nCLFLUSHcachelinesize = ((CPUInfo[1] >> 8) & 0xff) * 8; - nAPICPhysicalID = (CPUInfo[1] >> 24) & 0xff; - bSSE3NewInstructions = (CPUInfo[2] & 0x1) || false; - bSSSE3NewInstructions = (CPUInfo[2] & 0x200) || false; - bMONITOR_MWAIT = (CPUInfo[2] & 0x8) || false; - bCPLQualifiedDebugStore = (CPUInfo[2] & 0x10) || false; - bThermalMonitor2 = (CPUInfo[2] & 0x100) || false; - nFeatureInfo = CPUInfo[3]; + bPOPCNT = true; + } - if (CPUInfo[2] & (1 << 23)) - { - bPOPCNT = true; - } + if (CPUInfo[2] & (1 << 19)) + { + bSSE4_1 = true; + } - if (CPUInfo[2] & (1 << 19)) - { - bSSE4_1 = true; - } - - if (CPUInfo[2] & (1 << 20)) - { - bSSE4_2 = true; - } + if (CPUInfo[2] & (1 << 20)) + { + bSSE4_2 = true; } } - // Calling __cpuid with 0x80000000 as the InfoType argument - // gets the number of valid extended IDs. - __cpuid(CPUInfo, 0x80000000); - nExIds = CPUInfo[0]; - memset(CPUBrandString, 0, sizeof(CPUBrandString)); - - // Get the information associated with each extended ID. - for (unsigned int i = 0x80000000; i <= nExIds; ++i) + if (bSSE3) { - __cpuid(CPUInfo, i); + // Only SSE3 CPU-s support extended infotypes + // Calling __cpuid with 0x80000000 as the InfoType argument + // gets the number of valid extended IDs. + __cpuid(CPUInfo, 0x80000000); + nExIds = CPUInfo[0]; + memset(CPUBrandString, 0, sizeof(CPUBrandString)); - // Interpret CPU brand string and cache information. - if (i == 0x80000001) + // Get the information associated with each extended ID. + for (unsigned int i = 0x80000000; i <= nExIds; ++i) { - // This block seems bugged. - nFeatureInfo2 = CPUInfo[1]; // ECX - bSSE5 = (nFeatureInfo2 & (1 << 11)) ? true : false; - bLZCNT = (nFeatureInfo2 & (1 << 5)) ? true : false; - bSSE4A = (nFeatureInfo2 & (1 << 6)) ? true : false; - bLAHFSAHF64 = (nFeatureInfo2 & (1 << 0)) ? true : false; + __cpuid(CPUInfo, i); - CPU64bit = (CPUInfo[2] & (1 << 29)) ? true : false; - } - else if (i == 0x80000002) - { - memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo)); - } - else if (i == 0x80000003) - { - memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo)); - } - else if (i == 0x80000004) - { - memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo)); - } - else if (i == 0x80000006) - { - nCacheLineSize = CPUInfo[2] & 0xff; - nL2Associativity = (CPUInfo[2] >> 12) & 0xf; - nCacheSizeK = (CPUInfo[2] >> 16) & 0xffff; - } - else if (i == 0x80000008) - { - int numLSB = (CPUInfo[2] >> 12) & 0xF; - numCores = 1 << numLSB; - //int coresPerDie = CPUInfo[2] & 0xFF; - // numCores = coresPerDie; + // Interpret CPU brand string and cache information. + if (i == 0x80000001) + { + // This block seems bugged. + nFeatureInfo2 = CPUInfo[1]; // ECX + bSSE5 = (nFeatureInfo2 & (1 << 11)) ? true : false; + bLZCNT = (nFeatureInfo2 & (1 << 5)) ? true : false; + bSSE4A = (nFeatureInfo2 & (1 << 6)) ? true : false; + bLAHFSAHF64 = (nFeatureInfo2 & (1 << 0)) ? true : false; + + CPU64bit = (CPUInfo[2] & (1 << 29)) ? true : false; + } + else if (i == 0x80000002) + { + memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo)); + } + else if (i == 0x80000003) + { + memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo)); + } + else if (i == 0x80000004) + { + memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo)); + } + else if (i == 0x80000006) + { + nCacheLineSize = CPUInfo[2] & 0xff; + nL2Associativity = (CPUInfo[2] >> 12) & 0xf; + nCacheSizeK = (CPUInfo[2] >> 16) & 0xffff; + } + else if (i == 0x80000008) + { + int numLSB = (CPUInfo[2] >> 12) & 0xF; + numCores = 1 << numLSB; + //int coresPerDie = CPUInfo[2] & 0xFF; + // numCores = coresPerDie; + } } } @@ -222,9 +222,9 @@ void CPUInfoStruct::Detect() nIds <<= 1; bFXSAVE_FXRSTOR = (nFeatureInfo & nIds) ? true : false; nIds <<= 1; - bSSEExtensions = (nFeatureInfo & nIds) ? true : false; + bSSE = (nFeatureInfo & nIds) ? true : false; nIds <<= 1; - bSSE2Extensions = (nFeatureInfo & nIds) ? true : false; + bSSE2 = (nFeatureInfo & nIds) ? true : false; nIds <<= 1; bSelfSnoop = (nFeatureInfo & nIds) ? true : false; nIds <<= 1; diff --git a/Source/Core/Common/Src/CPUDetect.h b/Source/Core/Common/Src/CPUDetect.h index 89ef2a7a71..389127261a 100644 --- a/Source/Core/Common/Src/CPUDetect.h +++ b/Source/Core/Common/Src/CPUDetect.h @@ -77,16 +77,16 @@ struct CPUInfoStruct bool bThermalMonitorandClockCtrl; bool bMMXTechnology; bool bFXSAVE_FXRSTOR; - bool bSSEExtensions; - bool bSSE2Extensions; - bool bSSE3NewInstructions; - bool bSSSE3NewInstructions; bool bSelfSnoop; bool bHyper_threadingTechnology; bool bThermalMonitor; bool bUnknown4; bool bPendBrkEN; + bool bSSE; + bool bSSE2; + bool bSSE3; + bool bSSSE3; bool bPOPCNT; bool bSSE4_1; bool bSSE4_2; diff --git a/Source/Core/Common/Src/SConscript b/Source/Core/Common/Src/SConscript index 839e7382b1..5f215ab564 100644 --- a/Source/Core/Common/Src/SConscript +++ b/Source/Core/Common/Src/SConscript @@ -17,6 +17,7 @@ files = ["ABI.cpp", "PortableSockets.cpp", "StringUtil.cpp", "TestFramework.cpp", + "Thunk.cpp", "Timer.cpp", "Thread.cpp", "x64Emitter.cpp", diff --git a/Source/Core/Common/Src/Thunk.cpp b/Source/Core/Common/Src/Thunk.cpp new file mode 100644 index 0000000000..ed7d93bc99 --- /dev/null +++ b/Source/Core/Common/Src/Thunk.cpp @@ -0,0 +1,147 @@ +// Copyright (C) 2003-2008 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include + +#include "Common.h" +#include "Thunk.h" +#include "x64Emitter.h" +#include "MemoryUtil.h" +#include "ABI.h" + +using namespace Gen; + +#define THUNK_ARENA_SIZE 1024*1024*1 + +namespace { +static std::map thunks; +u8 GC_ALIGNED32(saved_fp_state[16 * 4 * 4]); +u8 GC_ALIGNED32(saved_gpr_state[16 * 8]); +} + +static u8 *thunk_memory; +static u8 *thunk_code; +static const u8 *save_regs; +static const u8 *load_regs; +u32 saved_return; + +void Thunk_Init() +{ + thunk_memory = (u8 *)AllocateExecutableMemory(THUNK_ARENA_SIZE); + thunk_code = thunk_memory; + + GenContext ctx(&thunk_code); + save_regs = GetCodePtr(); + for (int i = 2; i < ABI_GetNumXMMRegs(); i++) + MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i)); +#ifdef _M_X64 + MOV(64, M(saved_gpr_state + 0 ), R(RCX)); + MOV(64, M(saved_gpr_state + 8 ), R(RDX)); + MOV(64, M(saved_gpr_state + 16), R(R8) ); + MOV(64, M(saved_gpr_state + 24), R(R9) ); + MOV(64, M(saved_gpr_state + 32), R(R10)); + MOV(64, M(saved_gpr_state + 40), R(R11)); +#ifndef _WIN32 + MOV(64, M(saved_gpr_state + 48), R(RSI)); + MOV(64, M(saved_gpr_state + 56), R(RDI)); +#endif +#else + MOV(32, M(saved_gpr_state + 0 ), R(RCX)); + MOV(32, M(saved_gpr_state + 4 ), R(RDX)); +#endif + RET(); + load_regs = GetCodePtr(); + for (int i = 2; i < ABI_GetNumXMMRegs(); i++) + MOVAPS((X64Reg)(XMM0 + i), M(saved_fp_state + i * 16)); +#ifdef _M_X64 + MOV(64, R(RCX), M(saved_gpr_state + 0 )); + MOV(64, R(RDX), M(saved_gpr_state + 8 )); + MOV(64, R(R8) , M(saved_gpr_state + 16)); + MOV(64, R(R9) , M(saved_gpr_state + 24)); + MOV(64, R(R10), M(saved_gpr_state + 32)); + MOV(64, R(R11), M(saved_gpr_state + 40)); +#ifndef _WIN32 + MOV(64, R(RSI), M(saved_gpr_state + 48)); + MOV(64, R(RDI), M(saved_gpr_state + 56)); +#endif +#else + MOV(32, R(RCX), M(saved_gpr_state + 0 )); + MOV(32, R(RDX), M(saved_gpr_state + 4 )); +#endif + RET(); +} + +void Thunk_Reset() +{ + thunks.clear(); + thunk_code = thunk_memory; +} + +void Thunk_Shutdown() +{ + Thunk_Reset(); + FreeMemoryPages(thunk_memory, THUNK_ARENA_SIZE); + thunk_memory = 0; + thunk_code = 0; +} + +void *ProtectFunction(void *function, int num_params) +{ + std::map::iterator iter; + iter = thunks.find(function); + if (iter != thunks.end()) + return (void *)iter->second; + + if (!thunk_memory) + PanicAlert("Trying to protect functions before the emu is started. Bad bad bad."); + + GenContext gen(&thunk_code); + const u8 *call_point = GetCodePtr(); + // Make sure to align stack. +#ifdef _M_X64 +#ifdef _WIN32 + SUB(64, R(ESP), Imm8(0x28)); +#else + SUB(64, R(ESP), Imm8(0x8)); +#endif + CALL((void*)save_regs); + CALL((void*)function); + CALL((void*)load_regs); +#ifdef _WIN32 + ADD(64, R(ESP), Imm8(0x28)); +#else + ADD(64, R(ESP), Imm8(0x8)); +#endif + RET(); +#else + //INT3(); + CALL((void*)save_regs); + // Re-push parameters from previous stack frame + for (int i = 0; i < num_params; i++) { + // ESP is changing, so we do not need i + PUSH(32, MDisp(ESP, (num_params) * 4)); + } + CALL(function); + if (num_params) + ADD(32, R(ESP), Imm8(num_params * 4)); + CALL((void*)load_regs); + RET(); +#endif + + thunks[function] = call_point; + return (void *)call_point; +} \ No newline at end of file diff --git a/Source/Core/Common/Src/Thunk.h b/Source/Core/Common/Src/Thunk.h new file mode 100644 index 0000000000..5ce19a9ef1 --- /dev/null +++ b/Source/Core/Common/Src/Thunk.h @@ -0,0 +1,39 @@ +// Copyright (C) 2003-2008 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _THUNK_H +#define _THUNK_H + +// This simple class creates a wrapper around a C/C++ function that saves all fp state +// before entering it, and restores it upon exit. This is required to be able to selectively +// call functions from generated code, without inflicting the performance hit and increase +// of complexity that it means to protect the generated code from this problem. + +// This process is called thunking. + +// There will only ever be one level of thunking on the stack, plus, +// we don't want to pollute the stack, so we store away regs somewhere global. +// NOT THREAD SAFE. This may only be used from the CPU thread. +// Any other thread using this stuff will be FATAL. + +void Thunk_Init(); +void Thunk_Reset(); +void Thunk_Shutdown(); + +void *ProtectFunction(void *function, int num_params); + +#endif diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index 27c353b3a1..6611cf50a2 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -1156,7 +1156,7 @@ namespace Gen void MOVDDUP(X64Reg regOp, OpArg arg) { // TODO(ector): check SSE3 flag - if (cpu_info.bSSE3NewInstructions) + if (cpu_info.bSSE3) { WriteSSEOp(64, 0x12, false, regOp, arg); //SSE3 } @@ -1205,7 +1205,7 @@ namespace Gen } void PSHUFB(X64Reg dest, OpArg arg) { - if (!cpu_info.bSSE3NewInstructions) { + if (!cpu_info.bSSSE3) { PanicAlert("Trying to use PSHUFB on a system that doesn't support it. Bad programmer."); } Write8(0x66); diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index c7e2185412..5126aa1d66 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -14,6 +14,9 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ + +// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!! + #ifndef _DOLPHIN_INTEL_CODEGEN #define _DOLPHIN_INTEL_CODEGEN @@ -92,6 +95,26 @@ namespace Gen const u8 *GetCodePtr(); u8 *GetWritableCodePtr(); + + // Safe way to temporarily redirect the code generator. + class GenContext + { + u8 **code_ptr_ptr; + u8 *saved_ptr; + public: + GenContext(u8 **code_ptr_ptr_) + { + saved_ptr = GetWritableCodePtr(); + code_ptr_ptr = code_ptr_ptr_; + SetCodePtr(*code_ptr_ptr); + } + ~GenContext() + { + *code_ptr_ptr = GetWritableCodePtr(); + SetCodePtr(saved_ptr); + } + }; + enum NormalOp { nrmADD, nrmADC, diff --git a/Source/Core/Core/Src/Core.cpp b/Source/Core/Core/Src/Core.cpp index e2d3397b9f..8646260a82 100644 --- a/Source/Core/Core/Src/Core.cpp +++ b/Source/Core/Core/Src/Core.cpp @@ -26,6 +26,7 @@ #include "Console.h" #include "Core.h" +#include "CPUDetect.h" #include "CoreTiming.h" #include "Boot/Boot.h" #include "PatchEngine.h" @@ -140,7 +141,9 @@ bool Init(const SCoreStartupParameter _CoreParameter) // all right ... here we go Host_SetWaitCursor(false); - DisplayMessage("Emulation started.", 3000); + DisplayMessage(cpu_info.CPUBrandString, 3000); + DisplayMessage(_CoreParameter.m_strFilename, 3000); + //RegisterPanicAlertHandler(PanicAlertToVideo); diff --git a/Source/Core/Core/Src/HW/HW.cpp b/Source/Core/Core/Src/HW/HW.cpp index ccd161e03f..401fd792eb 100644 --- a/Source/Core/Core/Src/HW/HW.cpp +++ b/Source/Core/Core/Src/HW/HW.cpp @@ -16,6 +16,7 @@ // http://code.google.com/p/dolphin-emu/ #include "Common.h" +#include "Thunk.h" #include "../Core.h" #include "HW.h" #include "../PowerPC/PowerPC.h" @@ -42,6 +43,7 @@ namespace HW { void Init() { + Thunk_Init(); // not really hw, but this way we know it's inited first :P // Init the whole Hardware PixelEngine::Init(); CommandProcessor::Init(); @@ -72,5 +74,6 @@ namespace HW WII_IPC_HLE_Interface::Shutdown(); WII_IPCInterface::Shutdown(); + Thunk_Shutdown(); } } diff --git a/Source/Core/Core/Src/HW/Memmap.cpp b/Source/Core/Core/Src/HW/Memmap.cpp index 6c9d92a00c..87eae61765 100644 --- a/Source/Core/Core/Src/HW/Memmap.cpp +++ b/Source/Core/Core/Src/HW/Memmap.cpp @@ -1054,10 +1054,14 @@ void SDRUpdated() u32 CheckDTLB(u32 _Address, XCheckTLBFlag _Flag) { PanicAlert("TLB: %s unknown memory (0x%08x)\n" + "This is either the game crashing randomly, or a TLB write." "Several games uses the TLB to map memory. This\n" - "function is not support in dolphin. Cheers!", + "function is not supported in Dolphin. " + "Also, unfortunately there is no way to recover from this error," + "so Dolphin will now exit abruptly. Sorry!", _Flag == FLAG_WRITE ? "Write to" : "Read from", _Address); + exit(0); u32 sr = PowerPC::ppcState.sr[EA_SR(_Address)]; u32 offset = EA_Offset(_Address); // 12 bit diff --git a/Source/Core/Core/Src/HW/PeripheralInterface.cpp b/Source/Core/Core/Src/HW/PeripheralInterface.cpp index d0fb2d9af7..94224c5ee8 100644 --- a/Source/Core/Core/Src/HW/PeripheralInterface.cpp +++ b/Source/Core/Core/Src/HW/PeripheralInterface.cpp @@ -132,7 +132,7 @@ void CPeripheralInterface::Write32(const u32 _uValue, const u32 _iAddress) if ((_uValue != 0x80000001) && (_uValue != 0x80000005)) // DVDLowReset { TCHAR szTemp[256]; - sprintf(szTemp, "Unknown write to PI_RESET_CODE (%08x)", _uValue); + sprintf(szTemp, "Game wants to reset the machine. PI_RESET_CODE: (%08x)", _uValue); PanicAlert(szTemp); } } diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 952773c5bb..d638885ac1 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -145,7 +145,7 @@ void CInterpreter::lmw(UGeckoInstruction _inst) u32 TempReg = Memory::Read_U32(uAddress); if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) { - PanicAlert("DSI exception in lmv. This is very bad."); + PanicAlert("DSI exception in lmv."); return; } @@ -500,9 +500,9 @@ void CInterpreter::lswi(UGeckoInstruction _inst) u32 n; if (_inst.NB == 0) - n=32; + n = 32; else - n=_inst.NB; + n = _inst.NB; int r = _inst.RD - 1; int i = 0; @@ -511,22 +511,22 @@ void CInterpreter::lswi(UGeckoInstruction _inst) if (i==0) { r++; - r&=31; + r &= 31; m_GPR[r] = 0; } - u32 TempValue = Memory::Read_U8(EA) << (24-i); + u32 TempValue = Memory::Read_U8(EA) << (24 - i); if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) { - PanicAlert("DSI exception in lsw. This is very bad."); + PanicAlert("DSI exception in lsw."); return; } m_GPR[r] |= TempValue; - i+=8; - if (i==32) - i=0; + i += 8; + if (i == 32) + i = 0; EA++; n--; } @@ -546,26 +546,26 @@ void CInterpreter::stswi(UGeckoInstruction _inst) u32 n; if (_inst.NB == 0) - n=32; + n = 32; else - n=_inst.NB; + n = _inst.NB; int r = _inst.RS - 1; int i = 0; - while (n>0) + while (n > 0) { - if (i==0) + if (i == 0) { r++; - r&=31; + r &= 31; } - Memory::Write_U8((m_GPR[r] >> (24-i)) & 0xFF, EA); + Memory::Write_U8((m_GPR[r] >> (24 - i)) & 0xFF, EA); if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) return; - i+=8; - if (i==32) - i=0; + i += 8; + if (i == 32) + i = 0; EA++; n--; } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index ed8d94b3d0..1c66cfcae3 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -220,7 +220,7 @@ subfex // Evil namespace CPUCompare { - extern u32 m_BlockStart; + extern u32 m_BlockStart; } @@ -231,9 +231,8 @@ namespace Jit64 void WriteCallInterpreter(UGeckoInstruction _inst) { - gpr.Flush(js.op); - if (PPCTables::UsesFPU(_inst)) - fpr.Flush(js.op); + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); if (js.isLastInstruction) { MOV(32, M(&PC), Imm32(js.compilerPC)); @@ -250,7 +249,8 @@ namespace Jit64 void HLEFunction(UGeckoInstruction _inst) { - FlushRegCaches(); + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); MOV(32, R(EAX), M(&NPC)); WriteExitDestInEAX(0); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.cpp index 48c3ff56bb..2a22a4f29e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.cpp @@ -24,6 +24,7 @@ #include "x64Emitter.h" #include "ABI.h" +#include "Thunk.h" #include "x64Analyzer.h" #include "StringUtil.h" @@ -109,7 +110,7 @@ void BackPatch(u8 *codePtr, int accessType, u32 emAddress) // break; case 4: // THIS FUNCTION CANNOT TOUCH FLOATING POINT REGISTERS. - CALL((void *)&Memory::Read_U32); + CALL(ProtectFunction((void *)&Memory::Read_U32, 1)); break; default: BackPatchError(StringFromFormat("We don't handle the size %i yet in backpatch", info.operandSize), codePtr, emAddress); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp index 36021251d0..3cbee4af76 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp @@ -259,12 +259,12 @@ namespace Jit64 { #ifdef _M_X64 #ifdef _WIN32 - RSI, RDI, R12, R13, R14, R8, R9, RDX, R10, R11 //, RCX + RSI, RDI, R12, R13, R14, R8, R9, R10, R11 //, RCX #else R12, R13, R14, R8, R9, R10, R11, RSI, RDI //, RCX #endif #elif _M_IX86 - ESI, EDI, EBX, EBP, EDX + ESI, EDI, EBX, EBP, EDX, ECX, #endif }; count = sizeof(allocationOrder) / sizeof(const int); @@ -412,10 +412,7 @@ namespace Jit64 if (regs[i].location.IsSimpleReg()) { X64Reg xr = RX(i); - if (mode != FLUSH_VOLATILE || IsXRegVolatile(xr)) - { - StoreFromX64(i); - } + StoreFromX64(i); xregs[xr].dirty = false; } else if (regs[i].location.IsImm()) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h index eb0e6b22dd..dabbaa2b57 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h @@ -25,7 +25,6 @@ namespace Jit64 using namespace Gen; enum FlushMode { - FLUSH_VOLATILE, // FLUSH_ALLNONSTATIC, FLUSH_ALL }; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp index 416f147c7f..d82eddd3e7 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp @@ -48,7 +48,7 @@ namespace Jit64 void rfi(UGeckoInstruction _inst) { FlushRegCaches(); - //Bits SRR1[0,5-9,16�23, 25�27, 30�31] are placed into the corresponding bits of the MSR. + //Bits SRR1[0, 5-9, 16-23, 25-27, 30-31] are placed into the corresponding bits of the MSR. //MSR[13] is set to 0. const int mask = 0x87C0FF73; // MSR = (MSR & ~mask) | (SRR1 & mask); @@ -105,7 +105,7 @@ namespace Jit64 const bool only_counter_check = ((inst.BO >> 4) & 1); const bool only_condition_check = ((inst.BO >> 2) & 1); if (only_condition_check && only_counter_check) - PanicAlert("Stupid bcx encountered. Likely bad or corrupt code."); + PanicAlert("Bizarre bcx encountered. Likely bad or corrupt code."); bool doFullTest = (inst.BO & 16) == 0 && (inst.BO & 4) == 0; bool ctrDecremented = false; @@ -182,7 +182,7 @@ namespace Jit64 bool fastway = true; - if((inst.BO & 16) == 0) + if ((inst.BO & 16) == 0) { PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex); _assert_msg_(DYNA_REC, 0, "Bizarro bcctrx"); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp index 9eca6538d0..b4f9ba1e59 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -27,6 +27,7 @@ #ifdef _WIN32 #define INSTRUCTION_START +//#define INSTRUCTION_START Default(inst); return; #else #define INSTRUCTION_START Default(inst); return; #endif diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index ae09fc056b..297e8edeec 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -317,8 +317,7 @@ namespace Jit64 { INSTRUCTION_START; int a = inst.RA, d = inst.RD; - gpr.FlushR(ECX); - gpr.LockX(ECX); + gpr.FlushLockX(ECX); gpr.Lock(a, d); if (a != d) gpr.LoadToX64(d, false, true); @@ -376,8 +375,7 @@ namespace Jit64 { INSTRUCTION_START; int a = inst.RA, d = inst.RD; - gpr.FlushR(EDX); - gpr.LockX(EDX); + gpr.FlushLockX(EDX); gpr.Lock(a, d); if (d != a) { gpr.LoadToX64(d, false, true); @@ -396,8 +394,7 @@ namespace Jit64 { INSTRUCTION_START; int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushR(EDX); - gpr.LockX(EDX); + gpr.FlushLockX(EDX); gpr.Lock(a, b, d); if (d != a && d != b) { gpr.LoadToX64(d, false, true); @@ -420,8 +417,7 @@ namespace Jit64 { INSTRUCTION_START; int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushR(EDX); - gpr.LockX(EDX); + gpr.FlushLockX(EDX); gpr.Lock(a, b, d); if (d != a && d != b) { gpr.LoadToX64(d, false, true); @@ -450,8 +446,7 @@ namespace Jit64 Default(inst); return; int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushR(EDX); - gpr.LockX(EDX); + gpr.FlushLockX(EDX); gpr.Lock(a, b, d); if (d != a && d != b) { gpr.LoadToX64(d, false, true); @@ -534,8 +529,7 @@ namespace Jit64 { INSTRUCTION_START; int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushR(ECX); - gpr.LockX(ECX); + gpr.FlushLockX(ECX); gpr.Lock(a, b, d); if (d != a && d != b) gpr.LoadToX64(d, false); @@ -649,8 +643,7 @@ namespace Jit64 } u32 mask = Helper_Mask(inst.MB, inst.ME); - gpr.FlushR(ECX); - gpr.LockX(ECX); + gpr.FlushLockX(ECX); gpr.Lock(a, b, s); MOV(32, R(EAX), gpr.R(s)); MOV(32, R(ECX), gpr.R(b)); @@ -691,8 +684,7 @@ namespace Jit64 int a = inst.RA; int b = inst.RB; int s = inst.RS; - gpr.FlushR(ECX); - gpr.LockX(ECX); + gpr.FlushLockX(ECX); gpr.Lock(a, b, s); gpr.LoadToX64(a, a == s || a == b || s == b, true); MOV(32, R(ECX), gpr.R(b)); @@ -719,8 +711,7 @@ namespace Jit64 int a = inst.RA; int b = inst.RB; int s = inst.RS; - gpr.FlushR(ECX); - gpr.LockX(ECX); + gpr.FlushLockX(ECX); gpr.Lock(a, b, s); gpr.LoadToX64(a, a == s || a == b || s == b, true); MOV(32, R(ECX), gpr.R(b)); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index b5935b9694..c10c5dde94 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -19,6 +19,7 @@ // Should give a very noticable speed boost to paired single heavy code. #include "Common.h" +#include "Thunk.h" #include "../PowerPC.h" #include "../../Core.h" @@ -82,9 +83,9 @@ namespace Jit64 SetJumpTarget(argh); switch (accessSize) { - case 32: ABI_CallFunctionR((void *)&Memory::Read_U32, reg); break; - case 16: ABI_CallFunctionR((void *)&Memory::Read_U16, reg); break; - case 8: ABI_CallFunctionR((void *)&Memory::Read_U8, reg); break; + case 32: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U32, 1), reg); break; + case 16: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U16, 1), reg); break; + case 8: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U8, 1), reg); break; } SetJumpTarget(arg2); } @@ -97,9 +98,9 @@ namespace Jit64 BSWAP(32, reg_value); #ifdef _M_IX86 AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); - MOV(accessSize, MDisp(reg_addr, (u32)Memory::base), R(reg_value)); + MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value)); #else - MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, 0), R(reg_value)); + MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value)); #endif } @@ -113,17 +114,16 @@ namespace Jit64 UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0); FixupBranch arg2 = J(); SetJumpTarget(argh); - ABI_CallFunctionRR((void *)&Memory::Write_U32, ABI_PARAM1, ABI_PARAM2); + ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); SetJumpTarget(arg2); } void lbzx(UGeckoInstruction inst) { INSTRUCTION_START; - gpr.Flush(FLUSH_VOLATILE); - fpr.Flush(FLUSH_VOLATILE); int a = inst.RA, b = inst.RB, d = inst.RD; gpr.Lock(a, b, d); + gpr.FlushLockX(ABI_PARAM1); if (b == d || a == d) gpr.LoadToX64(d, true, true); else @@ -134,6 +134,7 @@ namespace Jit64 SafeLoadRegToEAX(ABI_PARAM1, 8, 0); MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); + gpr.UnlockAllX(); } void lXz(UGeckoInstruction inst) @@ -145,7 +146,6 @@ namespace Jit64 // TODO(ector): Make it dynamically enable/disable idle skipping where appropriate // Will give nice boost to dual core mode // if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping()) - if (!Core::GetStartupParameter().bUseDualCore && inst.OPCD == 32 && (inst.hex & 0xFFFF0000) == 0x800D0000 && @@ -172,7 +172,7 @@ namespace Jit64 { case 32: accessSize = 32; break; //lwz case 40: accessSize = 16; break; //lhz - case 34: accessSize = 8; break; //lbz + case 34: accessSize = 8; break; //lbz default: _assert_msg_(DYNA_REC, 0, "lXz: invalid access size"); return; } @@ -183,8 +183,6 @@ namespace Jit64 if (true) { #endif // Safe and boring - gpr.Flush(FLUSH_VOLATILE); - fpr.Flush(FLUSH_VOLATILE); gpr.FlushLockX(ABI_PARAM1); gpr.Lock(d, a); MOV(32, R(ABI_PARAM1), gpr.R(a)); @@ -221,8 +219,6 @@ namespace Jit64 int a = inst.RA; s32 offset = (s32)(s16)inst.SIMM_16; // Safe and boring - gpr.Flush(FLUSH_VOLATILE); - fpr.Flush(FLUSH_VOLATILE); gpr.FlushLockX(ABI_PARAM1); gpr.Lock(d, a); MOV(32, R(ABI_PARAM1), gpr.R(a)); @@ -272,8 +268,6 @@ namespace Jit64 s32 offset = (s32)(s16)inst.SIMM_16; if (a || update) { - gpr.Flush(FLUSH_VOLATILE); - fpr.Flush(FLUSH_VOLATILE); int accessSize; switch (inst.OPCD & ~1) { @@ -358,6 +352,7 @@ namespace Jit64 ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); if (update && offset) { + gpr.LoadToX64(a, true, true); MOV(32, gpr.R(a), R(ABI_PARAM2)); } TEST(32, R(ABI_PARAM2), Imm32(0x0C000000)); @@ -380,9 +375,9 @@ namespace Jit64 SetJumpTarget(argh); switch (accessSize) { - case 32: ABI_CallFunctionRR((void *)&Memory::Write_U32, ABI_PARAM1, ABI_PARAM2); break; - case 16: ABI_CallFunctionRR((void *)&Memory::Write_U16, ABI_PARAM1, ABI_PARAM2); break; - case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, ABI_PARAM1, ABI_PARAM2); break; + case 32: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); break; + case 16: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U16, 2), ABI_PARAM1, ABI_PARAM2); break; + case 8: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U8, 2), ABI_PARAM1, ABI_PARAM2); break; } SetJumpTarget(arg2); gpr.UnlockAll(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 95580bf76b..89e23ccad1 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -65,7 +65,6 @@ static u32 GC_ALIGNED16(temp32); void lfs(UGeckoInstruction inst) { INSTRUCTION_START; - DISABLE_32BIT; int d = inst.RD; int a = inst.RA; if (!a) @@ -74,15 +73,8 @@ void lfs(UGeckoInstruction inst) return; } s32 offset = (s32)(s16)inst.SIMM_16; - - if (jo.noAssumeFPLoadFromMem) { - // We might call a function. - gpr.Flush(FLUSH_VOLATILE); - fpr.Flush(FLUSH_VOLATILE); - gpr.FlushLockX(ABI_PARAM1); - } - gpr.Lock(d, a); - + gpr.FlushLockX(ABI_PARAM1); + gpr.Lock(a); MOV(32, R(ABI_PARAM1), gpr.R(a)); if (!jo.noAssumeFPLoadFromMem) { @@ -103,6 +95,7 @@ void lfs(UGeckoInstruction inst) fpr.UnlockAll(); } + void lfd(UGeckoInstruction inst) { INSTRUCTION_START; @@ -115,11 +108,12 @@ void lfd(UGeckoInstruction inst) return; } s32 offset = (s32)(s16)inst.SIMM_16; + gpr.FlushLockX(ABI_PARAM1); gpr.Lock(a); MOV(32, R(ABI_PARAM1), gpr.R(a)); fpr.LoadToX64(d, false); fpr.Lock(d); - if (cpu_info.bSSE3NewInstructions) { + if (cpu_info.bSSSE3) { X64Reg xd = fpr.RX(d); MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); PSHUFB(xd, M((void *)bswapShuffle1x8Dupe)); @@ -130,13 +124,14 @@ void lfd(UGeckoInstruction inst) MOVDDUP(fpr.RX(d), M(&temp64)); } gpr.UnlockAll(); + gpr.UnlockAllX(); fpr.UnlockAll(); } void stfd(UGeckoInstruction inst) { INSTRUCTION_START; - if (!cpu_info.bSSSE3NewInstructions) + if (!cpu_info.bSSSE3) { DISABLE_32BIT; } @@ -148,14 +143,14 @@ void stfd(UGeckoInstruction inst) return; } s32 offset = (s32)(s16)inst.SIMM_16; + gpr.FlushLockX(ABI_PARAM1); gpr.Lock(a); fpr.Lock(s); - gpr.FlushLockX(ABI_PARAM1); MOV(32, R(ABI_PARAM1), gpr.R(a)); #ifdef _M_IX86 AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); #endif - if (cpu_info.bSSSE3NewInstructions) { + if (cpu_info.bSSSE3) { MOVAPS(XMM0, fpr.R(s)); PSHUFB(XMM0, M((void *)bswapShuffle1x8)); #ifdef _M_X64 @@ -175,6 +170,7 @@ void stfd(UGeckoInstruction inst) fpr.UnlockAll(); } + void stfs(UGeckoInstruction inst) { INSTRUCTION_START; @@ -185,12 +181,11 @@ void stfs(UGeckoInstruction inst) s32 offset = (s32)(s16)inst.SIMM_16; if (a && !update) { - gpr.Flush(FLUSH_VOLATILE); - fpr.Flush(FLUSH_VOLATILE); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); gpr.Lock(a); fpr.Lock(s); - gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); MOV(32, R(ABI_PARAM2), gpr.R(a)); + ADD(32, R(ABI_PARAM2), Imm32(offset)); if (update && offset) { MOV(32, gpr.R(a), R(ABI_PARAM2)); @@ -198,7 +193,7 @@ void stfs(UGeckoInstruction inst) CVTSD2SS(XMM0, fpr.R(s)); MOVSS(M(&temp32), XMM0); MOV(32, R(ABI_PARAM1), M(&temp32)); - SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, offset); + SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0); gpr.UnlockAll(); gpr.UnlockAllX(); fpr.UnlockAll(); @@ -209,6 +204,7 @@ void stfs(UGeckoInstruction inst) } } + void lfsx(UGeckoInstruction inst) { INSTRUCTION_START; @@ -217,8 +213,7 @@ void lfsx(UGeckoInstruction inst) MOV(32, R(EAX), gpr.R(inst.RB)); if (inst.RA) ADD(32, R(EAX), gpr.R(inst.RA)); - if (cpu_info.bSSSE3NewInstructions) { - // PanicAlert("SSE3 supported!"); + if (cpu_info.bSSSE3) { X64Reg r = fpr.R(inst.RS).GetSimpleReg(); #ifdef _M_IX86 AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 281f2debcd..fa344cafe2 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -20,6 +20,7 @@ #include "Common.h" +#include "Thunk.h" #include "../PowerPC.h" #include "../../Core.h" #include "../../HW/GPFifo.h" @@ -36,7 +37,7 @@ #include "JitAsm.h" #include "JitRegCache.h" -// #define INSTRUCTION_START Default(inst); return; +//#define INSTRUCTION_START Default(inst); return; #define INSTRUCTION_START #ifdef _M_IX86 @@ -125,8 +126,6 @@ void psq_st(UGeckoInstruction inst) if (stType == QUANTIZE_FLOAT) { DISABLE_32BIT; - gpr.Flush(FLUSH_VOLATILE); - fpr.Flush(FLUSH_VOLATILE); gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); gpr.Lock(a); fpr.Lock(s); @@ -147,7 +146,7 @@ void psq_st(UGeckoInstruction inst) MOV(64, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); FixupBranch arg2 = J(); SetJumpTarget(argh); - CALL((void *)&WriteDual32); + CALL(ProtectFunction((void *)&WriteDual32, 0)); SetJumpTarget(arg2); gpr.UnlockAll(); gpr.UnlockAllX(); @@ -255,7 +254,7 @@ void psq_l(UGeckoInstruction inst) #ifdef _M_X64 gpr.LoadToX64(inst.RA, true, update); fpr.LoadToX64(inst.RS, false); - if (cpu_info.bSSSE3NewInstructions) { + if (cpu_info.bSSSE3) { X64Reg xd = fpr.R(inst.RS).GetSimpleReg(); MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); PSHUFB(xd, M((void *)pbswapShuffle2x4)); @@ -272,7 +271,7 @@ void psq_l(UGeckoInstruction inst) ADD(32, gpr.R(inst.RA), Imm32(offset)); break; #else - if (cpu_info.bSSSE3NewInstructions) { + if (cpu_info.bSSSE3) { gpr.LoadToX64(inst.RA, true, update); fpr.LoadToX64(inst.RS, false); X64Reg xd = fpr.R(inst.RS).GetSimpleReg(); @@ -282,8 +281,7 @@ void psq_l(UGeckoInstruction inst) PSHUFB(xd, M((void *)pbswapShuffle2x4)); CVTPS2PD(xd, R(xd)); } else { - gpr.FlushR(ECX); - gpr.LockX(ECX); + gpr.FlushLockX(ECX); gpr.LoadToX64(inst.RA); // This can probably be optimized somewhat. LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset)); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp index 862c7c1dae..90a93f9f3e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp @@ -93,8 +93,6 @@ namespace Jit64 fpr.UnlockAll(); } - - //add a, b, c //mov a, b diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp index 6acedc9650..5e27957d74 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -59,7 +59,7 @@ namespace Jit64 case SPR_GQR0 + 5: case SPR_GQR0 + 6: case SPR_GQR0 + 7: - js.blockSetsQuantizers = false; + js.blockSetsQuantizers = true; // Prevent recompiler from compiling in old quantizer values. // TODO - actually save the set state and use it in following quantizer ops. break; diff --git a/Source/Core/DolphinWX/src/Main.cpp b/Source/Core/DolphinWX/src/Main.cpp index 612614e72e..ca6383db92 100644 --- a/Source/Core/DolphinWX/src/Main.cpp +++ b/Source/Core/DolphinWX/src/Main.cpp @@ -45,7 +45,7 @@ bool DolphinApp::OnInit() #ifdef _WIN32 // TODO: if First Boot - if (!cpu_info.bSSE2Extensions) + if (!cpu_info.bSSE2) { MessageBox(0, _T("Hi,\n\nDolphin requires that your CPU has support for SSE2 extensions.\n" "Unfortunately your CPU does not support them, so Dolphin will not run.\n\n"