From 2a339c926e43c11f7b9acc8d3af202f0be54e2b0 Mon Sep 17 00:00:00 2001 From: comex Date: Tue, 24 Sep 2013 22:47:01 -0400 Subject: [PATCH] Fastmem writes for x86-64. --- Source/Core/Common/Src/x64ABI.cpp | 25 ------ Source/Core/Common/Src/x64Analyzer.cpp | 13 +++ Source/Core/Common/Src/x64Emitter.h | 6 -- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 2 - .../Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp | 4 - .../Src/PowerPC/JitCommon/JitAsmCommon.cpp | 22 ++--- .../Src/PowerPC/JitCommon/JitBackpatch.cpp | 83 ++++++++++++------- .../Core/Src/PowerPC/JitCommon/Jit_Util.cpp | 27 +++++- .../Core/Src/PowerPC/JitCommon/Jit_Util.h | 10 ++- 9 files changed, 107 insertions(+), 85 deletions(-) diff --git a/Source/Core/Common/Src/x64ABI.cpp b/Source/Core/Common/Src/x64ABI.cpp index 1b4fa0b5ff..e56db6ef34 100644 --- a/Source/Core/Common/Src/x64ABI.cpp +++ b/Source/Core/Common/Src/x64ABI.cpp @@ -411,31 +411,6 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) ABI_RestoreStack(0); } -void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { - PUSH(RCX); - PUSH(RDX); - PUSH(RSI); - PUSH(RDI); - PUSH(R8); - PUSH(R9); - PUSH(R10); - PUSH(R11); - PUSH(R11); -} - -void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { - POP(R11); - POP(R11); - POP(R10); - POP(R9); - POP(R8); - POP(RDI); - POP(RSI); - POP(RDX); - POP(RCX); -} - - #ifdef _WIN32 // Win64 Specific Code diff --git a/Source/Core/Common/Src/x64Analyzer.cpp b/Source/Core/Common/Src/x64Analyzer.cpp index 9632c46125..e41b8ce5dd 100644 --- a/Source/Core/Common/Src/x64Analyzer.cpp +++ b/Source/Core/Common/Src/x64Analyzer.cpp @@ -153,6 +153,19 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info) } } + case 0x88: // mem <- r8 + { + info->isMemoryWrite = true; + if (info->operandSize == 4) + { + info->operandSize = 1; + break; + } + else + return false; + break; + } + case 0x89: // mem <- r16/32/64 { info->isMemoryWrite = true; diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 27a735ab90..14f0ddb811 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -646,12 +646,6 @@ public: void ABI_PushAllCalleeSavedRegsAndAdjustStack(); void ABI_PopAllCalleeSavedRegsAndAdjustStack(); - // A function that doesn't know anything about it's surroundings, should - // be surrounded by these to establish a safe environment, where it can roam free. - // An example is a backpatch injected function. - void ABI_PushAllCallerSavedRegsAndAdjustStack(); - void ABI_PopAllCallerSavedRegsAndAdjustStack(); - unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false); void ABI_AlignStack(unsigned int frameSize, bool noProlog = false); void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 200727dc15..f324c32cf6 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -209,8 +209,6 @@ void Jit64::stfd(UGeckoInstruction inst) MOVD_xmm(R(EAX), XMM0); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0); - MOVAPD(XMM0, fpr.R(s)); - MOVD_xmm(R(EAX), XMM0); LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset)); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index 78bf1db57c..8b0a42086a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -1322,9 +1322,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak int addr_scale = SCALE_8; #endif Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - Jit->ABI_AlignStack(0); Jit->CALLptr(MScaled(EDX, addr_scale, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized))); - Jit->ABI_RestoreStack(0); Jit->MOVAPD(reg, R(XMM0)); RI.fregs[reg] = I; regNormalRegClear(RI, I); @@ -1429,9 +1427,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak #endif Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I))); - Jit->ABI_AlignStack(0); Jit->CALLptr(MScaled(EDX, addr_scale, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized))); - Jit->ABI_RestoreStack(0); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp index d80b0bb3f8..767035efb1 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp @@ -206,7 +206,7 @@ void CommonAsmRoutines::GenQuantizedStores() { PACKSSDW(XMM0, R(XMM0)); PACKUSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, false, true); + SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); @@ -225,7 +225,7 @@ void CommonAsmRoutines::GenQuantizedStores() { PACKSSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, false, true); + SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); @@ -251,7 +251,7 @@ void CommonAsmRoutines::GenQuantizedStores() { MOV(16, R(AX), M((char*)psTemp + 4)); BSWAP(32, EAX); - SafeWriteRegToReg(EAX, ECX, 32, 0, false, true); + SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); @@ -271,7 +271,7 @@ void CommonAsmRoutines::GenQuantizedStores() { MOVD_xmm(R(EAX), XMM0); BSWAP(32, EAX); ROL(32, R(EAX), Imm8(16)); - SafeWriteRegToReg(EAX, ECX, 32, 0, false, true); + SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); @@ -295,7 +295,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { // Easy! const u8* storeSingleFloat = AlignCode4(); - SafeWriteFloatToReg(XMM0, ECX); + SafeWriteFloatToReg(XMM0, ECX, SAFE_WRITE_NO_FASTMEM); RET(); /* if (cpu_info.bSSSE3) { @@ -303,11 +303,11 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { // TODO: SafeWriteFloat MOVSS(M(&psTemp[0]), XMM0); MOV(32, R(EAX), M(&psTemp[0])); - SafeWriteRegToReg(EAX, ECX, 32, 0, false, true); + SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); } else { MOVSS(M(&psTemp[0]), XMM0); MOV(32, R(EAX), M(&psTemp[0])); - SafeWriteRegToReg(EAX, ECX, 32, 0, true, true); + SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); }*/ const u8* storeSingleU8 = AlignCode4(); // Used by MKWii @@ -318,7 +318,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_255)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, true, true); + SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleS8 = AlignCode4(); @@ -328,7 +328,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, M((void *)&m_m128)); MINSS(XMM0, M((void *)&m_127)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, true, true); + SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleU16 = AlignCode4(); // Used by MKWii @@ -339,7 +339,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_65535)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, true, true); + SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleS16 = AlignCode4(); @@ -349,7 +349,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, M((void *)&m_m32768)); MINSS(XMM0, M((void *)&m_32767)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, true, true); + SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); singleStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp index 57a2f002ff..385a48362e 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp @@ -67,7 +67,10 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info) X64Reg dataReg = (X64Reg)info.regOperandReg; // It's a read. Easy. - ABI_PushAllCallerSavedRegsAndAdjustStack(); + // It ought to be necessary to align the stack here. Since it seems to not + // affect anybody, I'm not going to add it just to be completely safe about + // performance. + if (addrReg != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); if (info.displacement) { @@ -87,8 +90,6 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info) break; } - ABI_PopAllCallerSavedRegsAndAdjustStack(); - if (dataReg != EAX) { MOV(32, R(dataReg), R(EAX)); @@ -109,32 +110,24 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info) #ifdef _M_X64 X64Reg dataReg = (X64Reg)info.regOperandReg; - if (dataReg != EAX) - PanicAlert("Backpatch write - not through EAX"); - X64Reg addrReg = (X64Reg)info.scaledReg; // It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a // hardware access - we can take shortcuts. - //if (emAddress == 0xCC008000) - // PanicAlert("Caught a FIFO write"); - CMP(32, R(addrReg), Imm32(0xCC008000)); - FixupBranch skip_fast = J_CC(CC_NE, false); - MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg)); - CALL((void*)jit->GetAsmRoutines()->fifoDirectWrite32); - RET(); - SetJumpTarget(skip_fast); - ABI_PushAllCallerSavedRegsAndAdjustStack(); + // Don't treat FIFO writes specially for now because they require a burst + // check anyway. + if (dataReg == ABI_PARAM2) + PanicAlert("Incorrect use of SafeWriteRegToReg"); if (addrReg != ABI_PARAM1) { - MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg)); - MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg)); + MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg)); + MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg)); } else { - MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg)); - MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg)); + MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg)); + MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg)); } if (info.displacement) @@ -142,13 +135,25 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info) ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); } + SUB(64, R(RSP), Imm8(8)); + switch (info.operandSize) { + case 8: + CALL(thunks.ProtectFunction((void *)&Memory::Write_U64, 2)); + break; case 4: CALL(thunks.ProtectFunction((void *)&Memory::Write_U32, 2)); break; + case 2: + CALL(thunks.ProtectFunction((void *)&Memory::Write_U16, 2)); + break; + case 1: + CALL(thunks.ProtectFunction((void *)&Memory::Write_U8, 2)); + break; } - ABI_PopAllCallerSavedRegsAndAdjustStack(); + + ADD(64, R(RSP), Imm8(8)); RET(); #endif @@ -193,21 +198,35 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) } else { - PanicAlert("BackPatch : Currently only supporting reads." - "\n\nAttempted to write to %08x.", emAddress); - // TODO: special case FIFO writes. Also, support 32-bit mode. - // Also, debug this so that it actually works correctly :P - XEmitter emitter(codePtr - 2); - // We know it's EAX so the BSWAP before will be two byte. Overwrite it. + // We entered here with a BSWAP-ed register. We'll have to swap it back. + u64 *ptr = ContextRN(ctx, info.regOperandReg); + int bswapSize = 0; + switch (info.operandSize) + { + case 1: + bswapSize = 0; + break; + case 2: + bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0); + *ptr = Common::swap16((u16) *ptr); + break; + case 4: + bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0); + *ptr = Common::swap32((u32) *ptr); + break; + case 8: + bswapSize = 3; + *ptr = Common::swap64(*ptr); + break; + } + + u8 *start = codePtr - bswapSize; + XEmitter emitter(start); const u8 *trampoline = trampolines.GetWriteTrampoline(info); emitter.CALL((void *)trampoline); - emitter.NOP((int)info.instructionSize - 3); - if (info.instructionSize < 3) - PanicAlert("Instruction too small"); - // We entered here with a BSWAP-ed EAX. We'll have to swap it back. - ctx->CTX_RAX = Common::swap32((u32)ctx->CTX_RAX); - return codePtr - 2; + emitter.NOP(codePtr + info.instructionSize - emitter.GetCodePtr()); + return start; } return 0; #else diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index 637dff171b..da5b735dcb 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -223,8 +223,27 @@ void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int ac } // Destroys both arg registers -void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap, bool noProlog) +void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, int flags) { +#if defined(_M_X64) + if (!Core::g_CoreStartupParameter.bMMU && + Core::g_CoreStartupParameter.bFastmem && + !(flags & (SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_FASTMEM)) +#ifdef ENABLE_MEM_CHECK + && !Core::g_CoreStartupParameter.bEnableDebugging +#endif + ) + { + UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_WRITE_NO_SWAP)); + if (accessSize == 8) + { + NOP(1); + NOP(1); + } + return; + } +#endif + if (offset) ADD(32, R(reg_addr), Imm32((u32)offset)); @@ -245,6 +264,8 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce TEST(32, R(reg_addr), Imm32(mem_mask)); FixupBranch fast = J_CC(CC_Z); MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write + bool noProlog = flags & SAFE_WRITE_NO_PROLOG; + bool swap = !(flags & SAFE_WRITE_NO_SWAP); switch (accessSize) { case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr, noProlog); break; @@ -257,7 +278,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce SetJumpTarget(exit); } -void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr) +void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, int flags) { if (false && cpu_info.bSSSE3) { // This path should be faster but for some reason it causes errors so I've disabled it. @@ -290,7 +311,7 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr) } else { MOVSS(M(&float_buffer), xmm_value); MOV(32, R(EAX), M(&float_buffer)); - SafeWriteRegToReg(EAX, reg_addr, 32, 0, true); + SafeWriteRegToReg(EAX, reg_addr, 32, 0, flags); } } diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h index 57dce19790..ca3f05c395 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h @@ -16,10 +16,16 @@ public: void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); - void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true, bool noProlog = false); + enum SafeWriteFlags + { + SAFE_WRITE_NO_SWAP = 1, + SAFE_WRITE_NO_PROLOG = 2, + SAFE_WRITE_NO_FASTMEM = 4 + }; + void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, int flags = 0); // Trashes both inputs and EAX. - void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr); + void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, int flags = 0); void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address); void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);