Be pedantic about stack overflow on Linux and OS X.

Add some magic to the fault handler to handle stack overflow due to BLR
optimization, and disable the optimization if fastmem is not enabled.
This commit is contained in:
comex 2014-09-15 23:03:07 -04:00
parent 755bd2c445
commit 7ad9027593
9 changed files with 174 additions and 23 deletions

View file

@ -158,6 +158,25 @@ void FreeAlignedMemory(void* ptr)
}
}
void ReadProtectMemory(void* ptr, size_t size)
{
bool error_occurred = false;
#ifdef _WIN32
DWORD oldValue;
if (!VirtualProtect(ptr, size, PAGE_NOACCESS, &oldValue))
error_occurred = true;
#else
int retval = mprotect(ptr, size, PROT_NONE);
if (retval != 0)
error_occurred = true;
#endif
if (error_occurred)
PanicAlert("ReadProtectMemory failed!\n%s", GetLastErrorMsg());
}
void WriteProtectMemory(void* ptr, size_t size, bool allowExecute)
{
bool error_occurred = false;

View file

@ -12,8 +12,12 @@ void* AllocateMemoryPages(size_t size);
void FreeMemoryPages(void* ptr, size_t size);
void* AllocateAlignedMemory(size_t size,size_t alignment);
void FreeAlignedMemory(void* ptr);
void ReadProtectMemory(void* ptr, size_t size);
void WriteProtectMemory(void* ptr, size_t size, bool executable = false);
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false);
std::string MemUsage();
void GuardMemoryMake(void* ptr, size_t size);
void GuardMemoryUnmake(void* ptr, size_t size);
inline int GetPageSize() { return 4096; }

View file

@ -1766,6 +1766,8 @@ void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI
void XEmitter::LOCK() { Write8(0xF0); }
void XEmitter::REP() { Write8(0xF3); }
void XEmitter::REPNE() { Write8(0xF2); }
void XEmitter::FSOverride() { Write8(0x64); }
void XEmitter::GSOverride() { Write8(0x65); }
void XEmitter::FWAIT()
{

View file

@ -467,6 +467,8 @@ public:
void LOCK();
void REP();
void REPNE();
void FSOverride();
void GSOverride();
// x87
enum x87StatusWordBits {

View file

@ -95,6 +95,83 @@ using namespace PowerPC;
and such, but it's currently limited to integer ops only. This can definitely be made better.
*/
// The BLR optimization is nice, but it means that JITted code can overflow the
// native stack by repeatedly running BL. (The chance of this happening in any
// retail game is close to 0, but correctness is correctness...) Also, the
// overflow might not happen directly in the JITted code but in a C++ function
// called from it, so we can't just adjust RSP in the case of a fault.
// Instead, we have to have extra stack space preallocated under the fault
// point which allows the code to continue, after wiping the JIT cache so we
// can reset things at a safe point. Once this condition trips, the
// optimization is permanently disabled, under the assumption this will never
// happen in practice.
// On Unix, we just mark an appropriate region of the stack as PROT_NONE and
// handle it the same way as fastmem faults. It's safe to take a fault with a
// bad RSP, because on Linux we can use sigaltstack and on OS X we're already
// on a separate thread.
// On Windows, the OS gets upset if RSP doesn't work, and I don't know any
// equivalent of sigaltstack. Windows supports guard pages which, when
// accessed, immediately turn into regular pages but cause a trap... but
// putting them in the path of RSP just leads to something (in the kernel?)
// thinking a regular stack extension is required. So this protection is not
// supported on Windows yet... We still use a separate stack for the sake of
// simplicity.
enum
{
STACK_SIZE = 2 * 1024 * 1024,
SAFE_STACK_SIZE = 512 * 1024,
GUARD_SIZE = 0x10000, // two guards - bottom (permanent) and middle (see above)
GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE,
};
void Jit64::AllocStack()
{
#if defined(_WIN32)
m_stack = (u8*)AllocateMemoryPages(STACK_SIZE);
ReadProtectMemory(m_stack, GUARD_SIZE);
ReadProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
#endif
}
void Jit64::FreeStack()
{
#if defined(_WIN32)
if (m_stack)
{
FreeMemoryPages(m_stack, STACK_SIZE);
m_stack = NULL;
}
#endif
}
bool Jit64::HandleFault(uintptr_t access_address, SContext* ctx)
{
uintptr_t stack = (uintptr_t)m_stack, diff = access_address - stack;
// In the trap region?
if (stack && diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE)
{
WARN_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program.");
m_enable_blr_optimization = false;
UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
// We're going to need to clear the whole cache to get rid of the bad
// CALLs, but we can't yet. Fake the downcount so we're forced to the
// dispatcher (no block linking), and clear the cache so we're sent to
// Jit. Yeah, it's kind of gross.
GetBlockCache()->InvalidateICache(0, 0xffffffff);
CoreTiming::ForceExceptionCheck(0);
m_clear_cache_asap = true;
return true;
}
return Jitx86Base::HandleFault(access_address, ctx);
}
void Jit64::Init()
{
jo.optimizeStack = true;
@ -130,8 +207,18 @@ void Jit64::Init()
trampolines.Init();
AllocCodeSpace(CODE_SIZE);
// BLR optimization has the same consequences as block linking, as well as
// depending on the fault handler to be safe in the event of excessive BL.
m_enable_blr_optimization = jo.enableBlocklink && SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem;
m_clear_cache_asap = false;
m_stack = nullptr;
if (m_enable_blr_optimization)
AllocStack();
blocks.Init();
asm_routines.Init();
asm_routines.Init(m_stack ? (m_stack + STACK_SIZE) : nullptr);
// important: do this *after* generating the global asm routines, because we can't use farcode in them.
// it'll crash because the farcode functions get cleared on JIT clears.
@ -155,6 +242,7 @@ void Jit64::ClearCache()
void Jit64::Shutdown()
{
FreeStack();
FreeCodeSpace();
blocks.Shutdown();
@ -251,11 +339,8 @@ bool Jit64::Cleanup()
void Jit64::WriteExit(u32 destination, bool bl, u32 after)
{
// BLR optimization has similar consequences to block linking.
if (!jo.enableBlocklink)
{
if (!m_enable_blr_optimization)
bl = false;
}
Cleanup();
@ -313,17 +398,17 @@ void Jit64::JustWriteExit(u32 destination, bool bl, u32 after)
void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after)
{
if (!jo.enableBlocklink)
{
if (!m_enable_blr_optimization)
bl = false;
}
MOV(32, PPCSTATE(pc), R(RSCRATCH));
Cleanup();
if (bl)
{
MOV(32, R(RSCRATCH2), Imm32(after));
PUSH(RSCRATCH2);
}
MOV(32, PPCSTATE(pc), R(RSCRATCH));
Cleanup();
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
if (bl)
{
@ -339,7 +424,7 @@ void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after)
void Jit64::WriteBLRExit()
{
if (!jo.enableBlocklink)
if (!m_enable_blr_optimization)
{
WriteExitDestInRSCRATCH();
return;
@ -428,8 +513,11 @@ void Jit64::Trace()
void STACKALIGN Jit64::Jit(u32 em_address)
{
if (GetSpaceLeft() < 0x10000 || farcode.GetSpaceLeft() < 0x10000 || blocks.IsFull() ||
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache)
if (GetSpaceLeft() < 0x10000 ||
farcode.GetSpaceLeft() < 0x10000 ||
blocks.IsFull() ||
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache ||
m_clear_cache_asap)
{
ClearCache();
}

View file

@ -18,6 +18,10 @@
// ----------
#pragma once
#ifdef _WIN32
#include <winnt.h>
#endif
#include "Common/x64ABI.h"
#include "Common/x64Analyzer.h"
#include "Common/x64Emitter.h"
@ -40,6 +44,9 @@
class Jit64 : public Jitx86Base
{
private:
void AllocStack();
void FreeStack();
GPRRegCache gpr;
FPURegCache fpr;
@ -48,6 +55,10 @@ private:
PPCAnalyst::CodeBuffer code_buffer;
Jit64AsmRoutineManager asm_routines;
bool m_enable_blr_optimization;
bool m_clear_cache_asap;
u8* m_stack;
public:
Jit64() : code_buffer(32000) {}
~Jit64() {}
@ -55,6 +66,8 @@ public:
void Init() override;
void Shutdown() override;
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
// Jit!
void Jit(u32 em_address) override;

View file

@ -23,8 +23,18 @@ void Jit64AsmRoutineManager::Generate()
// for the shadow region before calls in this function. This call will
// waste a bit of space for a second shadow, but whatever.
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, /*frame*/ 16);
if (m_stack_top)
{
// Pivot the stack to our custom one.
MOV(64, R(RSCRATCH), R(RSP));
MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
MOV(64, MDisp(RSP, 0x18), R(RSCRATCH));
}
else
{
MOV(64, M(&s_saved_rsp), R(RSP));
}
// something that can't pass the BLR test
MOV(64, M(&s_saved_rsp), R(RSP));
MOV(64, MDisp(RSP, 8), Imm32((u32)-1));
// Two statically allocated registers.
@ -46,7 +56,10 @@ void Jit64AsmRoutineManager::Generate()
ABI_PopRegistersAndAdjustStack(1 << RSCRATCH, 0);
#endif
MOV(64, R(RSP), M(&s_saved_rsp));
if (m_stack_top)
MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
else
MOV(64, R(RSP), M(&s_saved_rsp));
SUB(32, PPCSTATE(downcount), R(RSCRATCH));
@ -55,6 +68,8 @@ void Jit64AsmRoutineManager::Generate()
// IMPORTANT - We jump on negative, not carry!!!
FixupBranch bail = J_CC(CC_BE, true);
FixupBranch dbg_exit;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
{
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING));
@ -63,11 +78,7 @@ void Jit64AsmRoutineManager::Generate()
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
ABI_PopRegistersAndAdjustStack(0, 0);
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = J_CC(CC_Z);
MOV(64, R(RSP), M(&s_saved_rsp));
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16);
RET();
SetJumpTarget(noBreakpoint);
dbg_exit = J_CC(CC_NZ);
SetJumpTarget(notStepping);
}
@ -155,7 +166,17 @@ void Jit64AsmRoutineManager::Generate()
J_CC(CC_Z, outerLoop);
//Landing pad for drec space
MOV(64, R(RSP), M(&s_saved_rsp));
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
SetJumpTarget(dbg_exit);
if (m_stack_top)
{
MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x8));
POP(RSP);
}
else
{
MOV(64, R(RSP), M(&s_saved_rsp));
}
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16);
RET();

View file

@ -25,10 +25,12 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines
private:
void Generate();
void GenerateCommon();
u8* m_stack_top;
public:
void Init()
void Init(u8* stack_top)
{
m_stack_top = stack_top;
AllocCodeSpace(8192);
Generate();
WriteProtect();

View file

@ -272,7 +272,7 @@ void JitIL::Init()
trampolines.Init();
AllocCodeSpace(CODE_SIZE);
blocks.Init();
asm_routines.Init();
asm_routines.Init(nullptr);
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);