Merge pull request #1024 from comex/abi-cleanup

ABI cleanup
This commit is contained in:
comex 2014-09-08 01:03:36 -04:00
commit 7fb6628789
12 changed files with 110 additions and 210 deletions

View file

@ -10,93 +10,86 @@ using namespace Gen;
// Shared code between Win64 and Unix64
unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog)
void XEmitter::ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp)
{
frameSize = noProlog ? 0x28 : 0;
return frameSize;
}
void XEmitter::ABI_AlignStack(unsigned int frameSize, bool noProlog)
{
unsigned int fillSize = ABI_GetAlignedFrameSize(frameSize, noProlog) - frameSize;
if (fillSize != 0)
{
SUB(64, R(RSP), Imm8(fillSize));
}
}
void XEmitter::ABI_RestoreStack(unsigned int frameSize, bool noProlog)
{
unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize, noProlog);
if (alignedSize != 0)
{
ADD(64, R(RSP), Imm8(alignedSize));
}
}
void XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog)
{
int regSize = 8;
int shadow = 0;
size_t shadow = 0;
#if defined(_WIN32)
shadow = 0x20;
#endif
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
{
PUSH((X64Reg) r);
count++;
}
}
int size = ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf;
rsp_alignment -= count * 8;
size_t subtraction = 0;
if (mask & 0xffff0000)
{
// If we have any XMMs to save, we must align the stack here.
subtraction = rsp_alignment & 0xf;
}
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
size += 16;
}
size += shadow;
if (size)
SUB(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size));
int offset = shadow;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVUPD(MDisp(RSP, offset), (X64Reg) x);
offset += 16;
}
subtraction += 16;
}
size_t xmm_base_subtraction = subtraction;
subtraction += needed_frame_size;
subtraction += shadow;
// Final alignment.
rsp_alignment -= subtraction;
subtraction += rsp_alignment & 0xf;
*shadowp = shadow;
*subtractionp = subtraction;
*xmm_offsetp = subtraction - xmm_base_subtraction;
}
void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog)
size_t XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size)
{
int regSize = 8;
int size = 0;
#if defined(_WIN32)
size += 0x20;
#endif
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
PUSH((X64Reg) r);
}
if (subtraction)
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVUPD((X64Reg) x, MDisp(RSP, size));
size += 16;
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg) x);
xmm_offset += 16;
}
}
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
count++;
}
size += ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf;
if (size)
ADD(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size));
return shadow;
}
void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size)
{
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD((X64Reg) x, MDisp(RSP, (int)xmm_offset));
xmm_offset += 16;
}
}
if (subtraction)
ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
for (int r = 15; r >= 0; r--)
{
if (mask & (1 << r))
@ -109,7 +102,6 @@ void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog)
// Common functions
void XEmitter::ABI_CallFunction(void *func)
{
ABI_AlignStack(0);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL &&
distance < 0xFFFFFFFF80000000ULL)
@ -122,12 +114,10 @@ void XEmitter::ABI_CallFunction(void *func)
{
CALL(func);
}
ABI_RestoreStack(0);
}
void XEmitter::ABI_CallFunctionC16(void *func, u16 param1)
{
ABI_AlignStack(0);
MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL &&
@ -141,12 +131,10 @@ void XEmitter::ABI_CallFunctionC16(void *func, u16 param1)
{
CALL(func);
}
ABI_RestoreStack(0);
}
void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2)
{
ABI_AlignStack(0);
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
u64 distance = u64(func) - (u64(code) + 5);
@ -161,12 +149,10 @@ void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2)
{
CALL(func);
}
ABI_RestoreStack(0);
}
void XEmitter::ABI_CallFunctionC(void *func, u32 param1)
{
ABI_AlignStack(0);
MOV(32, R(ABI_PARAM1), Imm32(param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL &&
@ -180,12 +166,10 @@ void XEmitter::ABI_CallFunctionC(void *func, u32 param1)
{
CALL(func);
}
ABI_RestoreStack(0);
}
void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2)
{
ABI_AlignStack(0);
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
u64 distance = u64(func) - (u64(code) + 5);
@ -200,12 +184,10 @@ void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2)
{
CALL(func);
}
ABI_RestoreStack(0);
}
void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2)
{
ABI_AlignStack(0);
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(64, R(ABI_PARAM2), Imm64((u64)param2));
u64 distance = u64(func) - (u64(code) + 5);
@ -220,12 +202,10 @@ void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2)
{
CALL(func);
}
ABI_RestoreStack(0);
}
void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3)
{
ABI_AlignStack(0);
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
@ -241,12 +221,10 @@ void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param
{
CALL(func);
}
ABI_RestoreStack(0);
}
void XEmitter::ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3)
{
ABI_AlignStack(0);
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(64, R(ABI_PARAM3), Imm64((u64)param3));
@ -262,12 +240,10 @@ void XEmitter::ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *par
{
CALL(func);
}
ABI_RestoreStack(0);
}
void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 param3, void *param4)
{
ABI_AlignStack(0);
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
@ -284,12 +260,10 @@ void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 para
{
CALL(func);
}
ABI_RestoreStack(0);
}
void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2)
{
ABI_AlignStack(0);
MOV(64, R(ABI_PARAM1), Imm64((u64)param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
u64 distance = u64(func) - (u64(code) + 5);
@ -304,12 +278,10 @@ void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2)
{
CALL(func);
}
ABI_RestoreStack(0);
}
void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 param3)
{
ABI_AlignStack(0);
MOV(64, R(ABI_PARAM1), Imm64((u64)param1));
MOV(64, R(ABI_PARAM2), Imm64((u64)param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
@ -325,13 +297,11 @@ void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 p
{
CALL(func);
}
ABI_RestoreStack(0);
}
// Pass a register as a parameter.
void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1)
{
ABI_AlignStack(0);
if (reg1 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(reg1));
u64 distance = u64(func) - (u64(code) + 5);
@ -346,13 +316,11 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1)
{
CALL(func);
}
ABI_RestoreStack(0);
}
// Pass two registers as parameters.
void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog)
void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2)
{
ABI_AlignStack(0, noProlog);
MOVTwo(64, ABI_PARAM1, reg1, ABI_PARAM2, reg2, ABI_PARAM3);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL &&
@ -366,7 +334,6 @@ void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noP
{
CALL(func);
}
ABI_RestoreStack(0, noProlog);
}
void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, X64Reg temp)
@ -395,7 +362,6 @@ void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg
void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
{
ABI_AlignStack(0);
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
MOV(32, R(ABI_PARAM2), Imm32(param2));
@ -411,12 +377,10 @@ void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2
{
CALL(func);
}
ABI_RestoreStack(0);
}
void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1)
{
ABI_AlignStack(0);
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
u64 distance = u64(func) - (u64(code) + 5);
@ -431,66 +395,5 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1)
{
CALL(func);
}
ABI_RestoreStack(0);
}
#ifdef _WIN32
// Win64 Specific Code
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack()
{
//we only want to do this once
PUSH(RBP);
MOV(64, R(RBP), R(RSP));
PUSH(RBX);
PUSH(RSI);
PUSH(RDI);
PUSH(R12);
PUSH(R13);
PUSH(R14);
PUSH(R15);
SUB(64, R(RSP), Imm8(0x28));
//TODO: Also preserve XMM0-3?
}
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack()
{
ADD(64, R(RSP), Imm8(0x28));
POP(R15);
POP(R14);
POP(R13);
POP(R12);
POP(RDI);
POP(RSI);
POP(RBX);
POP(RBP);
}
#else
// Unix64 Specific Code
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack()
{
PUSH(RBP);
MOV(64, R(RBP), R(RSP));
PUSH(RBX);
PUSH(R12);
PUSH(R13);
PUSH(R14);
PUSH(R15);
SUB(64, R(RSP), Imm8(8));
}
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack()
{
ADD(64, R(RSP), Imm8(8));
POP(R15);
POP(R14);
POP(R13);
POP(R12);
POP(RBX);
POP(RBP);
}
#endif // WIN32

View file

@ -53,5 +53,7 @@
#endif // WIN32
#define ABI_ALL_CALLEE_SAVED ((u32) ~ABI_ALL_CALLER_SAVED)
#define ABI_RETURN RAX

View file

@ -281,6 +281,8 @@ private:
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
protected:
inline void Write8(u8 value) {*code++ = value;}
inline void Write16(u16 value) {*(u16*)code = (value); code += 2;}
@ -751,23 +753,16 @@ public:
// Pass a register as a parameter.
void ABI_CallFunctionR(void *func, X64Reg reg1);
void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog = false);
void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2);
// Helper method for the above, or can be used separately.
void MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, Gen::X64Reg temp);
// A function that doesn't have any control over what it will do to regs,
// such as the dispatcher, should be surrounded by these.
void ABI_PushAllCalleeSavedRegsAndAdjustStack();
void ABI_PopAllCalleeSavedRegsAndAdjustStack();
// A more flexible version of the above.
void ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog);
void ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog);
unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false);
void ABI_AlignStack(unsigned int frameSize, bool noProlog = false);
void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false);
// Saves/restores the registers and adjusts the stack to be aligned as
// required by the ABI, where the previous alignment was as specified.
// Push returns the size of the shadow space, i.e. the offset of the frame.
size_t ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
void ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
inline int ABI_GetNumXMMRegs() { return 16; }

View file

@ -384,7 +384,9 @@ const u8 *DSPEmitter::CompileStub()
void DSPEmitter::CompileDispatcher()
{
enterDispatcher = AlignCode16();
ABI_PushAllCalleeSavedRegsAndAdjustStack();
// We don't use floating point (high 16 bits).
u32 registers_used = ABI_ALL_CALLEE_SAVED & 0xffff;
ABI_PushRegistersAndAdjustStack(registers_used, 8);
const u8 *dispatcherLoop = GetCodePtr();
@ -419,6 +421,6 @@ void DSPEmitter::CompileDispatcher()
SetJumpTarget(exceptionExit);
}
//MOV(32, M(&cyclesLeft), Imm32(0));
ABI_PopAllCalleeSavedRegsAndAdjustStack();
ABI_PopRegistersAndAdjustStack(registers_used, 8);
RET();
}

View file

@ -495,9 +495,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.fifoBytesThisBlock -= 32;
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
}
u32 function = HLE::GetFunctionIndex(ops[i].address);

View file

@ -16,7 +16,7 @@ using namespace Gen;
void Jit64AsmRoutineManager::Generate()
{
enterCode = AlignCode16();
ABI_PushAllCalleeSavedRegsAndAdjustStack();
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
// Two statically allocated registers.
MOV(64, R(RMEM), Imm64((u64)Memory::base));
@ -39,7 +39,7 @@ void Jit64AsmRoutineManager::Generate()
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = J_CC(CC_Z);
ABI_PopAllCalleeSavedRegsAndAdjustStack();
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
RET();
SetJumpTarget(noBreakpoint);
SetJumpTarget(notStepping);
@ -126,7 +126,7 @@ void Jit64AsmRoutineManager::Generate()
J_CC(CC_Z, outerLoop);
//Landing pad for drec space
ABI_PopAllCalleeSavedRegsAndAdjustStack();
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
RET();
GenerateCommon();

View file

@ -116,11 +116,11 @@ void Jit64::lXXx(UGeckoInstruction inst)
FixupBranch noIdle = J_CC(CC_NZ);
u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
// ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0
//MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
@ -285,9 +285,9 @@ void Jit64::dcbz(UGeckoInstruction inst)
// supposedly there are, at least for some MMU titles. Let's be careful and support it to be sure.
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH);
ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
FixupBranch exit = J();
SetJumpTarget(fast);
@ -374,7 +374,7 @@ void Jit64::stX(UGeckoInstruction inst)
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
switch (accessSize)
{
case 32:
@ -387,7 +387,7 @@ void Jit64::stX(UGeckoInstruction inst)
ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (update)
gpr.SetImmediate32(a, addr);
return;

View file

@ -87,9 +87,7 @@ void Jit64::psq_l(UGeckoInstruction inst)
if (inst.W)
OR(32, R(RSCRATCH2), Imm8(8));
ABI_AlignStack(0);
CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized));
ABI_RestoreStack(0);
// MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access

View file

@ -110,9 +110,9 @@ void CommonAsmRoutines::GenFrsqrte()
SetJumpTarget(complex1);
SetJumpTarget(complex2);
SetJumpTarget(complex3);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
ABI_CallFunction((void *)&MathUtil::ApproximateReciprocalSquareRoot);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
RET();
}
@ -169,9 +169,9 @@ void CommonAsmRoutines::GenFres()
SetJumpTarget(complex1);
SetJumpTarget(complex2);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
ABI_CallFunction((void *)&MathUtil::ApproximateReciprocal);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
RET();
}
@ -258,9 +258,10 @@ void CommonAsmRoutines::GenQuantizedStores()
SwapAndStore(64, MComplex(RMEM, RSCRATCH_EXTRA, SCALE_1, 0), RSCRATCH);
FixupBranch skip_complex = J(true);
SetJumpTarget(too_complex);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true);
// RSP alignment here is 8 due to the call.
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
ABI_CallFunctionR((void *)&WriteDual32, RSCRATCH_EXTRA);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
SetJumpTarget(skip_complex);
RET();

View file

@ -56,10 +56,8 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
X64Reg dataReg = (X64Reg)info.regOperandReg;
// It's a read. Easy.
// It ought to be necessary to align the stack here. Since it seems to not
// affect anybody, I'm not going to add it just to be completely safe about
// performance.
ABI_PushRegistersAndAdjustStack(registersInUse, true);
// RSP alignment here is 8 due to the call.
ABI_PushRegistersAndAdjustStack(registersInUse, 8);
if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
@ -91,7 +89,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
MOV(32, R(dataReg), R(ABI_RETURN));
}
ABI_PopRegistersAndAdjustStack(registersInUse, true);
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
RET();
return trampoline;
}
@ -115,7 +113,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(pc));
ABI_PushRegistersAndAdjustStack(registersInUse, true);
ABI_PushRegistersAndAdjustStack(registersInUse, 8);
MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg, ABI_PARAM3);
@ -140,7 +138,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, true);
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
RET();
return trampoline;

View file

@ -204,9 +204,9 @@ private:
void CallLambda(int sbits, const std::function<T(u32)>* lambda)
{
m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, false);
m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, 0);
m_code->ABI_CallLambdaC(lambda, m_address);
m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, false);
m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, 0);
MoveOpArgToReg(sbits, R(ABI_RETURN));
}
@ -305,7 +305,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
}
else
{
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
switch (accessSize)
{
case 64: ABI_CallFunctionC((void *)&Memory::Read_U64, address); break;
@ -313,7 +313,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
case 16: ABI_CallFunctionC((void *)&Memory::Read_U16_ZX, address); break;
case 8: ABI_CallFunctionC((void *)&Memory::Read_U8_ZX, address); break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
MEMCHECK_START
@ -350,7 +350,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
FixupBranch fast = J_CC(CC_Z, true);
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
switch (accessSize)
{
case 64:
@ -366,7 +366,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
MEMCHECK_START
@ -470,25 +470,25 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
FixupBranch fast = J_CC(CC_Z, true);
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG));
size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0;
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
ABI_PushRegistersAndAdjustStack(registersInUse, noProlog);
ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment);
switch (accessSize)
{
case 64:
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false);
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr);
break;
case 32:
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false);
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr);
break;
case 16:
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false);
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr);
break;
case 8:
ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false);
ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, noProlog);
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
FixupBranch exit = J();
SetJumpTarget(fast);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);

View file

@ -584,7 +584,8 @@ void VertexLoader::CompileVertexTranslator()
PanicAlert("Trying to recompile a vertex translator");
m_compiledCode = GetCodePtr();
ABI_PushAllCalleeSavedRegsAndAdjustStack();
// We don't use any callee saved registers or anything but RAX.
ABI_PushRegistersAndAdjustStack(0, 8);
// Start loop here
const u8 *loop_start = GetCodePtr();
@ -845,7 +846,7 @@ void VertexLoader::CompileVertexTranslator()
SUB(32, MatR(RAX), Imm8(1));
J_CC(CC_NZ, loop_start);
ABI_PopAllCalleeSavedRegsAndAdjustStack();
ABI_PopRegistersAndAdjustStack(0, 8);
RET();
#endif
}