Reduce my idiocy in register saving code.

(1) Rename ABI_ALL_CALLEE_SAVED to ABI_ALL_CALLER_SAVED, because that's
what it was actually defined as (and used as).  Derp.

(2) RegistersInUse is always used for the purpose of saving registers
before calling a C++ function in the middle of a JIT block (without
flushing).  There is no need to save callee-saved registers in this
case.  Change the name to CallerSavedRegistersInUse and mask with
ABI_ALL_CALLER_SAVED.

Nothing obvious broke when starting up a Melee game.  (I added a test
for anything actually being masked out; it happens, but in this
particular case seemed to occur at most a few dozen times per second, so
the actual performance benefit is probably negligible.)
This commit is contained in:
comex 2014-08-23 14:41:31 -04:00
parent 7725737872
commit d19ec35363
6 changed files with 21 additions and 21 deletions

View file

@ -30,7 +30,7 @@
#define ABI_PARAM3 R8 #define ABI_PARAM3 R8
#define ABI_PARAM4 R9 #define ABI_PARAM4 R9
#define ABI_ALL_CALLEE_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << R8) | \ #define ABI_ALL_CALLER_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << R8) | \
(1 << R9) | (1 << R10) | (1 << R11) | \ (1 << R9) | (1 << R10) | (1 << R11) | \
(1 << XMM0) | (1 << XMM1) | (1 << XMM2) | (1 << XMM3) | \ (1 << XMM0) | (1 << XMM1) | (1 << XMM2) | (1 << XMM3) | \
(1 << XMM4) | (1 << XMM5)) (1 << XMM4) | (1 << XMM5))
@ -44,7 +44,7 @@
#define ABI_PARAM5 R8 #define ABI_PARAM5 R8
#define ABI_PARAM6 R9 #define ABI_PARAM6 R9
#define ABI_ALL_CALLEE_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << RDI) | \ #define ABI_ALL_CALLER_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << RDI) | \
(1 << RSI) | (1 << R8) | (1 << R9) | (1 << R10) | (1 << R11) | \ (1 << RSI) | (1 << R8) | (1 << R9) | (1 << R10) | (1 << R11) | \
0xffff0000 /* xmm0..15 */) 0xffff0000 /* xmm0..15 */)

View file

@ -501,7 +501,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
{ {
js.fifoBytesThisBlock -= 32; js.fifoBytesThisBlock -= 32;
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
u32 registersInUse = RegistersInUse(); u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
ABI_PopRegistersAndAdjustStack(registersInUse, false); ABI_PopRegistersAndAdjustStack(registersInUse, false);
@ -672,7 +672,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
return normalEntry; return normalEntry;
} }
u32 Jit64::RegistersInUse() u32 Jit64::CallerSavedRegistersInUse()
{ {
u32 result = 0; u32 result = 0;
for (int i = 0; i < NUMXREGS; i++) for (int i = 0; i < NUMXREGS; i++)
@ -682,5 +682,5 @@ u32 Jit64::RegistersInUse()
if (!fpr.IsFreeX(i)) if (!fpr.IsFreeX(i))
result |= (1 << (16 + i)); result |= (1 << (16 + i));
} }
return result; return result & ABI_ALL_CALLER_SAVED;
} }

View file

@ -60,7 +60,7 @@ public:
void Jit(u32 em_address) override; void Jit(u32 em_address) override;
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b); const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b);
u32 RegistersInUse(); u32 CallerSavedRegistersInUse();
JitBlockCache *GetBlockCache() override { return &blocks; } JitBlockCache *GetBlockCache() override { return &blocks; }

View file

@ -110,13 +110,13 @@ void Jit64::lXXx(UGeckoInstruction inst)
// do our job at first // do our job at first
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.BindToRegister(d, false, true); gpr.BindToRegister(d, false, true);
SafeLoadToReg(gpr.RX(d), gpr.R(a), accessSize, offset, RegistersInUse(), signExtend); SafeLoadToReg(gpr.RX(d), gpr.R(a), accessSize, offset, CallerSavedRegistersInUse(), signExtend);
// if it's still 0, we can wait until the next event // if it's still 0, we can wait until the next event
TEST(32, gpr.R(d), gpr.R(d)); TEST(32, gpr.R(d), gpr.R(d));
FixupBranch noIdle = J_CC(CC_NZ); FixupBranch noIdle = J_CC(CC_NZ);
u32 registersInUse = RegistersInUse(); u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
@ -199,7 +199,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, js.memcheck, true); gpr.BindToRegister(d, js.memcheck, true);
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, 0, RegistersInUse(), signExtend); SafeLoadToReg(gpr.RX(d), opAddress, accessSize, 0, CallerSavedRegistersInUse(), signExtend);
if (update && js.memcheck && !zeroOffset) if (update && js.memcheck && !zeroOffset)
{ {
@ -320,7 +320,7 @@ void Jit64::stX(UGeckoInstruction inst)
// Helps external systems know which instruction triggered the write // Helps external systems know which instruction triggered the write
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); MOV(32, M(&PC), Imm32(jit->js.compilerPC));
u32 registersInUse = RegistersInUse(); u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_PushRegistersAndAdjustStack(registersInUse, false);
switch (accessSize) switch (accessSize)
{ {
@ -345,7 +345,7 @@ void Jit64::stX(UGeckoInstruction inst)
gpr.Lock(s, a); gpr.Lock(s, a);
MOV(32, R(EDX), gpr.R(a)); MOV(32, R(EDX), gpr.R(a));
MOV(32, R(ECX), gpr.R(s)); MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, offset, RegistersInUse()); SafeWriteRegToReg(ECX, EDX, accessSize, offset, CallerSavedRegistersInUse());
if (update && offset) if (update && offset)
{ {
@ -410,7 +410,7 @@ void Jit64::stXx(UGeckoInstruction inst)
} }
MOV(32, R(ECX), gpr.R(s)); MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, 0, RegistersInUse()); SafeWriteRegToReg(ECX, EDX, accessSize, 0, CallerSavedRegistersInUse());
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -429,7 +429,7 @@ void Jit64::lmw(UGeckoInstruction inst)
ADD(32, R(ECX), gpr.R(inst.RA)); ADD(32, R(ECX), gpr.R(inst.RA));
for (int i = inst.RD; i < 32; i++) for (int i = inst.RD; i < 32; i++)
{ {
SafeLoadToReg(EAX, R(ECX), 32, (i - inst.RD) * 4, RegistersInUse(), false); SafeLoadToReg(EAX, R(ECX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse(), false);
gpr.BindToRegister(i, false, true); gpr.BindToRegister(i, false, true);
MOV(32, gpr.R(i), R(EAX)); MOV(32, gpr.R(i), R(EAX));
} }
@ -450,7 +450,7 @@ void Jit64::stmw(UGeckoInstruction inst)
else else
XOR(32, R(EAX), R(EAX)); XOR(32, R(EAX), R(EAX));
MOV(32, R(ECX), gpr.R(i)); MOV(32, R(ECX), gpr.R(i));
SafeWriteRegToReg(ECX, EAX, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, RegistersInUse()); SafeWriteRegToReg(ECX, EAX, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse());
} }
gpr.UnlockAllX(); gpr.UnlockAllX();
} }

View file

@ -25,7 +25,7 @@ void Jit64::lfs(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
SafeLoadToReg(EAX, gpr.R(a), 32, offset, RegistersInUse(), false); SafeLoadToReg(EAX, gpr.R(a), 32, offset, CallerSavedRegistersInUse(), false);
fpr.Lock(d); fpr.Lock(d);
fpr.BindToRegister(d, js.memcheck); fpr.BindToRegister(d, js.memcheck);
@ -49,7 +49,7 @@ void Jit64::lfd(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
SafeLoadToReg(RAX, gpr.R(a), 64, offset, RegistersInUse(), false); SafeLoadToReg(RAX, gpr.R(a), 64, offset, CallerSavedRegistersInUse(), false);
fpr.Lock(d); fpr.Lock(d);
fpr.BindToRegister(d, true); fpr.BindToRegister(d, true);
@ -81,7 +81,7 @@ void Jit64::stfd(UGeckoInstruction inst)
MOV(64, R(RAX), fpr.R(s)); MOV(64, R(RAX), fpr.R(s));
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
SafeWriteRegToReg(RAX, ABI_PARAM1, 64, offset, RegistersInUse()); SafeWriteRegToReg(RAX, ABI_PARAM1, 64, offset, CallerSavedRegistersInUse());
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -100,7 +100,7 @@ void Jit64::stfs(UGeckoInstruction inst)
ConvertDoubleToSingle(XMM0, fpr.RX(s)); ConvertDoubleToSingle(XMM0, fpr.RX(s));
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeWriteF32ToReg(XMM0, ABI_PARAM1, offset, RegistersInUse()); SafeWriteF32ToReg(XMM0, ABI_PARAM1, offset, CallerSavedRegistersInUse());
fpr.UnlockAll(); fpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -119,7 +119,7 @@ void Jit64::stfsx(UGeckoInstruction inst)
fpr.Lock(s); fpr.Lock(s);
fpr.BindToRegister(s, true, false); fpr.BindToRegister(s, true, false);
ConvertDoubleToSingle(XMM0, fpr.RX(s)); ConvertDoubleToSingle(XMM0, fpr.RX(s));
SafeWriteF32ToReg(XMM0, ABI_PARAM1, 0, RegistersInUse()); SafeWriteF32ToReg(XMM0, ABI_PARAM1, 0, CallerSavedRegistersInUse());
fpr.UnlockAll(); fpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -133,7 +133,7 @@ void Jit64::lfsx(UGeckoInstruction inst)
if (inst.RA) if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA)); ADD(32, R(EAX), gpr.R(inst.RA));
SafeLoadToReg(EAX, R(EAX), 32, 0, RegistersInUse(), false); SafeLoadToReg(EAX, R(EAX), 32, 0, CallerSavedRegistersInUse(), false);
fpr.Lock(inst.RS); fpr.Lock(inst.RS);
fpr.BindToRegister(inst.RS, js.memcheck); fpr.BindToRegister(inst.RS, js.memcheck);

View file

@ -8,7 +8,7 @@
#include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/JitCommon/JitBase.h"
#define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLEE_SAVED & ~((1 << RAX) | (1 << RCX) | (1 << RDX) | \ #define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLER_SAVED & ~((1 << RAX) | (1 << RCX) | (1 << RDX) | \
(1 << XMM0) | (1 << XMM1))) (1 << XMM0) | (1 << XMM1)))
using namespace Gen; using namespace Gen;