Fastmem writes for x86-64.

This commit is contained in:
comex 2013-09-24 22:47:01 -04:00
parent 18abc33306
commit 2a339c926e
9 changed files with 107 additions and 85 deletions

View file

@ -411,31 +411,6 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1)
ABI_RestoreStack(0);
}
void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
PUSH(RCX);
PUSH(RDX);
PUSH(RSI);
PUSH(RDI);
PUSH(R8);
PUSH(R9);
PUSH(R10);
PUSH(R11);
PUSH(R11);
}
void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
POP(R11);
POP(R11);
POP(R10);
POP(R9);
POP(R8);
POP(RDI);
POP(RSI);
POP(RDX);
POP(RCX);
}
#ifdef _WIN32
// Win64 Specific Code

View file

@ -153,6 +153,19 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
}
}
case 0x88: // mem <- r8
{
info->isMemoryWrite = true;
if (info->operandSize == 4)
{
info->operandSize = 1;
break;
}
else
return false;
break;
}
case 0x89: // mem <- r16/32/64
{
info->isMemoryWrite = true;

View file

@ -646,12 +646,6 @@ public:
void ABI_PushAllCalleeSavedRegsAndAdjustStack();
void ABI_PopAllCalleeSavedRegsAndAdjustStack();
// A function that doesn't know anything about it's surroundings, should
// be surrounded by these to establish a safe environment, where it can roam free.
// An example is a backpatch injected function.
void ABI_PushAllCallerSavedRegsAndAdjustStack();
void ABI_PopAllCallerSavedRegsAndAdjustStack();
unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false);
void ABI_AlignStack(unsigned int frameSize, bool noProlog = false);
void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false);

View file

@ -209,8 +209,6 @@ void Jit64::stfd(UGeckoInstruction inst)
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
MOVAPD(XMM0, fpr.R(s));
MOVD_xmm(R(EAX), XMM0);
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4);

View file

@ -1322,9 +1322,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
int addr_scale = SCALE_8;
#endif
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
Jit->ABI_AlignStack(0);
Jit->CALLptr(MScaled(EDX, addr_scale, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized)));
Jit->ABI_RestoreStack(0);
Jit->MOVAPD(reg, R(XMM0));
RI.fregs[reg] = I;
regNormalRegClear(RI, I);
@ -1429,9 +1427,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
#endif
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
Jit->ABI_AlignStack(0);
Jit->CALLptr(MScaled(EDX, addr_scale, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized)));
Jit->ABI_RestoreStack(0);
if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8)

View file

@ -206,7 +206,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, false, true);
SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
@ -225,7 +225,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, false, true);
SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
@ -251,7 +251,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOV(16, R(AX), M((char*)psTemp + 4));
BSWAP(32, EAX);
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
@ -271,7 +271,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOVD_xmm(R(EAX), XMM0);
BSWAP(32, EAX);
ROL(32, R(EAX), Imm8(16));
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
@ -295,7 +295,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
// Easy!
const u8* storeSingleFloat = AlignCode4();
SafeWriteFloatToReg(XMM0, ECX);
SafeWriteFloatToReg(XMM0, ECX, SAFE_WRITE_NO_FASTMEM);
RET();
/*
if (cpu_info.bSSSE3) {
@ -303,11 +303,11 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
// TODO: SafeWriteFloat
MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp[0]));
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
} else {
MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp[0]));
SafeWriteRegToReg(EAX, ECX, 32, 0, true, true);
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
}*/
const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
@ -318,7 +318,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_255));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, true, true);
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
const u8* storeSingleS8 = AlignCode4();
@ -328,7 +328,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m128));
MINSS(XMM0, M((void *)&m_127));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, true, true);
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
@ -339,7 +339,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_65535));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, true, true);
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
const u8* storeSingleS16 = AlignCode4();
@ -349,7 +349,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m32768));
MINSS(XMM0, M((void *)&m_32767));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, true, true);
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
singleStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));

View file

@ -67,7 +67,10 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
X64Reg dataReg = (X64Reg)info.regOperandReg;
// It's a read. Easy.
ABI_PushAllCallerSavedRegsAndAdjustStack();
// It ought to be necessary to align the stack here. Since it seems to not
// affect anybody, I'm not going to add it just to be completely safe about
// performance.
if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
if (info.displacement) {
@ -87,8 +90,6 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
break;
}
ABI_PopAllCallerSavedRegsAndAdjustStack();
if (dataReg != EAX)
{
MOV(32, R(dataReg), R(EAX));
@ -109,32 +110,24 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info)
#ifdef _M_X64
X64Reg dataReg = (X64Reg)info.regOperandReg;
if (dataReg != EAX)
PanicAlert("Backpatch write - not through EAX");
X64Reg addrReg = (X64Reg)info.scaledReg;
// It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
// hardware access - we can take shortcuts.
//if (emAddress == 0xCC008000)
// PanicAlert("Caught a FIFO write");
CMP(32, R(addrReg), Imm32(0xCC008000));
FixupBranch skip_fast = J_CC(CC_NE, false);
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
CALL((void*)jit->GetAsmRoutines()->fifoDirectWrite32);
RET();
SetJumpTarget(skip_fast);
ABI_PushAllCallerSavedRegsAndAdjustStack();
// Don't treat FIFO writes specially for now because they require a burst
// check anyway.
if (dataReg == ABI_PARAM2)
PanicAlert("Incorrect use of SafeWriteRegToReg");
if (addrReg != ABI_PARAM1)
{
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg));
MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg));
}
else
{
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg));
MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg));
}
if (info.displacement)
@ -142,13 +135,25 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info)
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
}
SUB(64, R(RSP), Imm8(8));
switch (info.operandSize)
{
case 8:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U64, 2));
break;
case 4:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U32, 2));
break;
case 2:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U16, 2));
break;
case 1:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U8, 2));
break;
}
ABI_PopAllCallerSavedRegsAndAdjustStack();
ADD(64, R(RSP), Imm8(8));
RET();
#endif
@ -193,21 +198,35 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
}
else
{
PanicAlert("BackPatch : Currently only supporting reads."
"\n\nAttempted to write to %08x.", emAddress);
// TODO: special case FIFO writes. Also, support 32-bit mode.
// Also, debug this so that it actually works correctly :P
XEmitter emitter(codePtr - 2);
// We know it's EAX so the BSWAP before will be two byte. Overwrite it.
// We entered here with a BSWAP-ed register. We'll have to swap it back.
u64 *ptr = ContextRN(ctx, info.regOperandReg);
int bswapSize = 0;
switch (info.operandSize)
{
case 1:
bswapSize = 0;
break;
case 2:
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap16((u16) *ptr);
break;
case 4:
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap32((u32) *ptr);
break;
case 8:
bswapSize = 3;
*ptr = Common::swap64(*ptr);
break;
}
u8 *start = codePtr - bswapSize;
XEmitter emitter(start);
const u8 *trampoline = trampolines.GetWriteTrampoline(info);
emitter.CALL((void *)trampoline);
emitter.NOP((int)info.instructionSize - 3);
if (info.instructionSize < 3)
PanicAlert("Instruction too small");
// We entered here with a BSWAP-ed EAX. We'll have to swap it back.
ctx->CTX_RAX = Common::swap32((u32)ctx->CTX_RAX);
return codePtr - 2;
emitter.NOP(codePtr + info.instructionSize - emitter.GetCodePtr());
return start;
}
return 0;
#else

View file

@ -223,8 +223,27 @@ void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int ac
}
// Destroys both arg registers
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap, bool noProlog)
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, int flags)
{
#if defined(_M_X64)
if (!Core::g_CoreStartupParameter.bMMU &&
Core::g_CoreStartupParameter.bFastmem &&
!(flags & (SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_FASTMEM))
#ifdef ENABLE_MEM_CHECK
&& !Core::g_CoreStartupParameter.bEnableDebugging
#endif
)
{
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_WRITE_NO_SWAP));
if (accessSize == 8)
{
NOP(1);
NOP(1);
}
return;
}
#endif
if (offset)
ADD(32, R(reg_addr), Imm32((u32)offset));
@ -245,6 +264,8 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
TEST(32, R(reg_addr), Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z);
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
bool noProlog = flags & SAFE_WRITE_NO_PROLOG;
bool swap = !(flags & SAFE_WRITE_NO_SWAP);
switch (accessSize)
{
case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr, noProlog); break;
@ -257,7 +278,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
SetJumpTarget(exit);
}
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, int flags)
{
if (false && cpu_info.bSSSE3) {
// This path should be faster but for some reason it causes errors so I've disabled it.
@ -290,7 +311,7 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
} else {
MOVSS(M(&float_buffer), xmm_value);
MOV(32, R(EAX), M(&float_buffer));
SafeWriteRegToReg(EAX, reg_addr, 32, 0, true);
SafeWriteRegToReg(EAX, reg_addr, 32, 0, flags);
}
}

View file

@ -16,10 +16,16 @@ public:
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true, bool noProlog = false);
enum SafeWriteFlags
{
SAFE_WRITE_NO_SWAP = 1,
SAFE_WRITE_NO_PROLOG = 2,
SAFE_WRITE_NO_FASTMEM = 4
};
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, int flags = 0);
// Trashes both inputs and EAX.
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr);
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, int flags = 0);
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);