[AArch64] Implement dirty register tracking.

Using BindToRegister from the register caches causes the register bound to be marked dirty.
Using the regular R() function from the register caches loads the registers without being dirty.

When implementing new instructions make sure to BindToRegister registers that become dirty.
This commit is contained in:
Ryan Houdek 2015-03-15 20:09:05 -05:00
parent 7be1498a43
commit 3e946b1bf7
8 changed files with 126 additions and 53 deletions

View file

@ -22,8 +22,10 @@ void JitArm64::fabsx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FABS(EncodeRegToDouble(V0), EncodeRegToDouble(VB));
@ -38,10 +40,12 @@ void JitArm64::faddsx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
m_float_emit.INS(64, VD, 1, VD, 0);
@ -53,9 +57,12 @@ void JitArm64::faddx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FADD(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
@ -92,6 +99,7 @@ void JitArm64::fmaddx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
@ -111,8 +119,11 @@ void JitArm64::fmrx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.INS(64, VD, 0, VB, 0);
}
@ -145,6 +156,7 @@ void JitArm64::fmsubx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
@ -164,10 +176,12 @@ void JitArm64::fmulsx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VC = fpr.R(inst.FC);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
m_float_emit.FMUL(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
m_float_emit.INS(64, VD, 1, VD, 0);
@ -179,9 +193,12 @@ void JitArm64::fmulx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VC = fpr.R(inst.FC);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
@ -196,8 +213,11 @@ void JitArm64::fnabsx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FABS(EncodeRegToDouble(V0), EncodeRegToDouble(VB));
@ -213,8 +233,11 @@ void JitArm64::fnegx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FNEG(EncodeRegToDouble(V0), EncodeRegToDouble(VB));
@ -252,6 +275,7 @@ void JitArm64::fnmaddx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
@ -295,6 +319,7 @@ void JitArm64::fnmsubx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
@ -315,11 +340,14 @@ void JitArm64::fselx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VD = fpr.R(d);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg V0 = fpr.GetReg();
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VC = fpr.R(inst.FC);
m_float_emit.FCMPE(EncodeRegToDouble(VA));
m_float_emit.FCSEL(EncodeRegToDouble(V0), EncodeRegToDouble(VC), EncodeRegToDouble(VB), CC_GE);
@ -334,10 +362,12 @@ void JitArm64::fsubsx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
m_float_emit.INS(64, VD, 1, VD, 0);
@ -349,9 +379,12 @@ void JitArm64::fsubx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FSUB(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VB));

View file

@ -284,6 +284,7 @@ void JitArm64::addx(UGeckoInstruction inst)
}
else
{
gpr.BindToRegister(d, d == a || d == b);
ADD(gpr.R(d), gpr.R(a), gpr.R(b));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
@ -297,8 +298,6 @@ void JitArm64::extsXx(UGeckoInstruction inst)
int a = inst.RA, s = inst.RS;
int size = inst.SUBOP10 == 922 ? 16 : 8;
gpr.BindToRegister(a, a == s);
if (gpr.IsImm(s))
{
gpr.SetImmediate(a, (u32)(s32)(size == 16 ? (s16)gpr.GetImm(s) : (s8)gpr.GetImm(s)));
@ -307,11 +306,11 @@ void JitArm64::extsXx(UGeckoInstruction inst)
}
else
{
gpr.BindToRegister(a, a == s);
SBFM(gpr.R(a), gpr.R(s), 0, size - 1);
if (inst.Rc)
ComputeRC(gpr.R(a), 0);
}
}
void JitArm64::cntlzwx(UGeckoInstruction inst)
@ -321,8 +320,6 @@ void JitArm64::cntlzwx(UGeckoInstruction inst)
int a = inst.RA;
int s = inst.RS;
gpr.BindToRegister(a, a == s);
if (gpr.IsImm(s))
{
gpr.SetImmediate(a, __builtin_clz(gpr.GetImm(s)));
@ -331,6 +328,7 @@ void JitArm64::cntlzwx(UGeckoInstruction inst)
}
else
{
gpr.BindToRegister(a, a == s);
CLZ(gpr.R(a), gpr.R(s));
if (inst.Rc)
ComputeRC(gpr.R(a), 0);
@ -346,8 +344,6 @@ void JitArm64::negx(UGeckoInstruction inst)
FALLBACK_IF(inst.OE);
gpr.BindToRegister(d, d == a);
if (gpr.IsImm(a))
{
gpr.SetImmediate(d, ~((u32)gpr.GetImm(a)) + 1);
@ -356,6 +352,7 @@ void JitArm64::negx(UGeckoInstruction inst)
}
else
{
gpr.BindToRegister(d, d == a);
SUB(gpr.R(d), WSP, gpr.R(a), ArithOption(gpr.R(a), ST_LSL, 0));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
@ -487,26 +484,27 @@ void JitArm64::rlwinmx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
u32 a = inst.RA, s = inst.RS;
u32 mask = Helper_Mask(inst.MB,inst.ME);
u32 mask = Helper_Mask(inst.MB, inst.ME);
if (gpr.IsImm(inst.RS))
{
gpr.SetImmediate(inst.RA, _rotl(gpr.GetImm(inst.RS), inst.SH) & mask);
gpr.SetImmediate(a, _rotl(gpr.GetImm(s), inst.SH) & mask);
if (inst.Rc)
ComputeRC(gpr.GetImm(inst.RA), 0);
ComputeRC(gpr.GetImm(a), 0);
return;
}
gpr.BindToRegister(inst.RA, inst.RA == inst.RS);
gpr.BindToRegister(a, a == s);
ARM64Reg WA = gpr.GetReg();
ArithOption Shift(gpr.R(inst.RS), ST_ROR, 32 - inst.SH);
ArithOption Shift(gpr.R(s), ST_ROR, 32 - inst.SH);
MOVI2R(WA, mask);
AND(gpr.R(inst.RA), WA, gpr.R(inst.RS), Shift);
AND(gpr.R(a), WA, gpr.R(s), Shift);
gpr.Unlock(WA);
if (inst.Rc)
ComputeRC(gpr.R(inst.RA), 0);
ComputeRC(gpr.R(a), 0);
}
void JitArm64::srawix(UGeckoInstruction inst)
@ -685,6 +683,7 @@ void JitArm64::subfx(UGeckoInstruction inst)
}
else
{
gpr.BindToRegister(d, d == a || d == b);
SUB(gpr.R(d), gpr.R(b), gpr.R(a));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);

View file

@ -145,7 +145,10 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
MOVI2R(XA, imm_addr);
if (update)
{
gpr.BindToRegister(addr, false);
MOV(gpr.R(addr), addr_reg);
}
u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@ -493,6 +496,8 @@ void JitArm64::stX(UGeckoInstruction inst)
if (update)
{
gpr.BindToRegister(a, false);
ARM64Reg WA = gpr.GetReg();
ARM64Reg RB;
ARM64Reg RA = gpr.R(a);

View file

@ -71,6 +71,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
u32 imm_addr = 0;
bool is_immediate = false;
fpr.BindToRegister(inst.FD, false);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg addr_reg = W0;
@ -172,7 +173,10 @@ void JitArm64::lfXX(UGeckoInstruction inst)
MOVI2R(XA, imm_addr);
if (update)
{
gpr.BindToRegister(a, false);
MOV(gpr.R(a), addr_reg);
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
@ -360,7 +364,10 @@ void JitArm64::stfXX(UGeckoInstruction inst)
MOVI2R(XA, imm_addr);
if (update)
{
gpr.BindToRegister(a, false);
MOV(gpr.R(a), addr_reg);
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();

View file

@ -57,7 +57,10 @@ void JitArm64::psq_l(UGeckoInstruction inst)
UBFM(scale_reg, scale_reg, 24, 29); // Scale
if (update)
{
gpr.BindToRegister(inst.RA, false);
MOV(arm_addr, addr_reg);
}
MOVI2R(X30, (u64)&asm_routines.pairedLoadQuantized[inst.W * 8]);
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
@ -116,7 +119,10 @@ void JitArm64::psq_st(UGeckoInstruction inst)
UBFM(scale_reg, scale_reg, 8, 13); // Scale
if (update)
{
gpr.BindToRegister(inst.RA, false);
MOV(arm_addr, addr_reg);
}
ARM64Reg VS = fpr.R(inst.RS);
m_float_emit.FCVTN(32, D0, VS);

View file

@ -95,7 +95,8 @@ void Arm64GPRCache::FlushRegister(u32 preg, bool maintain_state)
if (reg.GetType() == REG_REG)
{
ARM64Reg host_reg = reg.GetReg();
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
if (reg.IsDirty())
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
if (!maintain_state)
{
@ -169,6 +170,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
ARM64Reg host_reg = GetReg();
m_emit->MOVI2R(host_reg, reg.GetImm());
reg.LoadToReg(host_reg);
reg.SetDirty(true);
return host_reg;
}
break;
@ -178,6 +180,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
// This can also happen on cases where PPCAnalyst isn't feeing us proper register usage statistics
ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg);
reg.SetDirty(false);
m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
return host_reg;
}
@ -202,6 +205,7 @@ void Arm64GPRCache::BindToRegister(u32 preg, bool do_load)
{
OpArg& reg = m_guest_registers[preg];
reg.SetDirty(true);
if (reg.GetType() == REG_NOTLOADED)
{
ARM64Reg host_reg = GetReg();
@ -292,6 +296,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg)
{
ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg);
reg.SetDirty(false);
m_float_emit->LDR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
return host_reg;
}
@ -308,6 +313,7 @@ void Arm64FPRCache::BindToRegister(u32 preg, bool do_load)
{
OpArg& reg = m_guest_registers[preg];
reg.SetDirty(true);
if (reg.GetType() == REG_NOTLOADED)
{
ARM64Reg host_reg = GetReg();
@ -355,7 +361,9 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
{
ARM64Reg host_reg = reg.GetReg();
m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
if (reg.IsDirty())
m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
if (!maintain_state)
{
UnlockRegister(host_reg);

View file

@ -85,6 +85,9 @@ public:
void ResetLastUsed() { m_last_used = 0; }
void IncrementLastUsed() { ++m_last_used; }
void SetDirty(bool dirty) { m_dirty = dirty; }
bool IsDirty() { return m_dirty; }
private:
// For REG_REG
RegType m_type; // store type
@ -94,6 +97,8 @@ private:
u32 m_value; // IMM value
u32 m_last_used;
bool m_dirty;
};
class HostReg

View file

@ -51,6 +51,7 @@ void JitArm64::mtmsr(UGeckoInstruction inst)
// Don't interpret this, if we do we get thrown out
//JITDISABLE(bJITSystemRegistersOff)
gpr.BindToRegister(inst.RS, true);
STR(INDEX_UNSIGNED, gpr.R(inst.RS), X29, PPCSTATE_OFF(msr));
gpr.Flush(FlushMode::FLUSH_ALL);
@ -64,6 +65,7 @@ void JitArm64::mfmsr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(INDEX_UNSIGNED, gpr.R(inst.RD), X29, PPCSTATE_OFF(msr));
}
@ -87,6 +89,7 @@ void JitArm64::mfsr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(INDEX_UNSIGNED, gpr.R(inst.RD), X29, PPCSTATE_OFF(sr[inst.SR]));
}
@ -95,6 +98,7 @@ void JitArm64::mtsr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RS, true);
STR(INDEX_UNSIGNED, gpr.R(inst.RS), X29, PPCSTATE_OFF(sr[inst.SR]));
}
@ -103,13 +107,16 @@ void JitArm64::mfsrin(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 b = inst.RB, d = inst.RD;
gpr.BindToRegister(d, d == b);
ARM64Reg index = gpr.GetReg();
ARM64Reg index64 = EncodeRegTo64(index);
ARM64Reg RB = gpr.R(inst.RB);
ARM64Reg RB = gpr.R(b);
UBFM(index, RB, 28, 31);
ADD(index64, X29, index64, ArithOption(index64, ST_LSL, 2));
LDR(INDEX_UNSIGNED, gpr.R(inst.RD), index64, PPCSTATE_OFF(sr[0]));
LDR(INDEX_UNSIGNED, gpr.R(d), index64, PPCSTATE_OFF(sr[0]));
gpr.Unlock(index);
}
@ -119,13 +126,16 @@ void JitArm64::mtsrin(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 b = inst.RB, d = inst.RD;
gpr.BindToRegister(d, d == b);
ARM64Reg index = gpr.GetReg();
ARM64Reg index64 = EncodeRegTo64(index);
ARM64Reg RB = gpr.R(inst.RB);
ARM64Reg RB = gpr.R(b);
UBFM(index, RB, 28, 31);
ADD(index64, X29, index64, ArithOption(index64, ST_LSL, 2));
STR(INDEX_UNSIGNED, gpr.R(inst.RD), index64, PPCSTATE_OFF(sr[0]));
STR(INDEX_UNSIGNED, gpr.R(d), index64, PPCSTATE_OFF(sr[0]));
gpr.Unlock(index);
}