Merge pull request #474 from Sonicadvance1/conditional-branch

Support conditional register cache flushing on ARMv7.
This commit is contained in:
Ryan Houdek 2014-06-10 07:21:07 -05:00
commit 1db93db474
7 changed files with 113 additions and 31 deletions

View file

@ -149,9 +149,6 @@ void JitArm::bcx(UGeckoInstruction inst)
JITDISABLE(bJITBranchOff)
// USES_CR
gpr.Flush();
fpr.Flush();
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
FixupBranch pCTRDontBranch;
@ -194,6 +191,9 @@ void JitArm::bcx(UGeckoInstruction inst)
destination = SignExt16(inst.BD << 2);
else
destination = js.compilerPC + SignExt16(inst.BD << 2);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExit(destination);
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
@ -202,16 +202,17 @@ void JitArm::bcx(UGeckoInstruction inst)
SetJumpTarget( pCTRDontBranch );
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}
void JitArm::bcctrx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITBranchOff)
gpr.Flush();
fpr.Flush();
// bcctrx doesn't decrement and/or test CTR
_dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!");
@ -220,6 +221,9 @@ void JitArm::bcctrx(UGeckoInstruction inst)
// BO_2 == 1z1zz -> b always
//NPC = CTR & 0xfffffffc;
gpr.Flush();
fpr.Flush();
ARMReg rA = gpr.GetReg();
if (inst.LK_3)
@ -261,22 +265,26 @@ void JitArm::bcctrx(UGeckoInstruction inst)
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInR(rA);
SetJumpTarget(b);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}
}
void JitArm::bclrx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITBranchOff)
gpr.Flush();
fpr.Flush();
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
FixupBranch pCTRDontBranch;
@ -326,6 +334,9 @@ void JitArm::bclrx(UGeckoInstruction inst)
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInR(rA);
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
@ -334,5 +345,9 @@ void JitArm::bclrx(UGeckoInstruction inst)
SetJumpTarget( pCTRDontBranch );
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}

View file

@ -954,9 +954,6 @@ void JitArm::twx(UGeckoInstruction inst)
s32 a = inst.RA;
gpr.Flush();
fpr.Flush();
ARMReg RA = gpr.GetReg();
ARMReg RB = gpr.GetReg();
MOV(RA, inst.TO);
@ -1003,6 +1000,9 @@ void JitArm::twx(UGeckoInstruction inst)
SetJumpTarget(take4);
SetJumpTarget(take5);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
LDR(RA, R9, PPCSTATE_OFF(Exceptions));
MOVI2R(RB, EXCEPTION_PROGRAM); // XXX: Can be optimized
ORR(RA, RA, RB);
@ -1016,7 +1016,12 @@ void JitArm::twx(UGeckoInstruction inst)
SetJumpTarget(exit5);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
gpr.Unlock(RA, RB);
}

View file

@ -446,12 +446,14 @@ void JitArm::lXX(UGeckoInstruction inst)
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
{
ARMReg RD = gpr.R(d);
gpr.Flush();
fpr.Flush();
// if it's still 0, we can wait until the next event
TST(RD, RD);
FixupBranch noIdle = B_CC(CC_NEQ);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
rA = gpr.GetReg();
MOVI2R(rA, (u32)&PowerPC::OnIdle);

View file

@ -32,6 +32,27 @@ void ArmFPRCache::Init(ARMXEmitter *emitter)
void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats)
{
// Make sure the state is wiped on Start
// There is a potential for the state remaining dirty from the previous block
// This is due to conditional branches not clearing the register cache state
for (u8 a = 0; a < 32; ++a)
{
if (_regs[a][0].GetType() != REG_NOTLOADED)
{
u32 regindex = _regs[a][0].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
}
if (_regs[a][1].GetType() != REG_NOTLOADED)
{
u32 regindex = _regs[a][1].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
}
}
ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count)
@ -162,7 +183,7 @@ ARMReg ArmFPRCache::R1(u32 preg, bool preLoad)
return GetPPCReg(preg, true, preLoad);
}
void ArmFPRCache::Flush()
void ArmFPRCache::Flush(FlushMode mode)
{
for (u8 a = 0; a < 32; ++a)
{
@ -172,20 +193,26 @@ void ArmFPRCache::Flush()
u32 regindex = _regs[a][0].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
}
}
if (_regs[a][1].GetType() != REG_NOTLOADED)
{
s16 offset = PPCSTATE_OFF(ps) + (a * 16) + 8;
u32 regindex = _regs[a][1].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
}
}
}

View file

@ -43,7 +43,7 @@ public:
ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use.
void Unlock(ARMReg V0);
void Flush();
void Flush(FlushMode mode = FLUSH_ALL);
ARMReg R0(u32 preg, bool preLoad = true); // Returns a cached register
ARMReg R1(u32 preg, bool preLoad = true);
};

View file

@ -30,6 +30,19 @@ void ArmRegCache::Init(ARMXEmitter *emitter)
}
void ArmRegCache::Start(PPCAnalyst::BlockRegStats &stats)
{
// Make sure the state is wiped on Start
// There is a potential for the state remaining dirty from the previous block
// This is due to conditional branches not clearing the register cache state
for (u8 a = 0; a < 32; ++a)
{
if (regs[a].GetType() == REG_REG)
{
u32 regindex = regs[a].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
}
regs[a].Flush();
}
}
ARMReg *ArmRegCache::GetPPCAllocationOrder(int &count)
@ -192,21 +205,36 @@ void ArmRegCache::SetImmediate(u32 preg, u32 imm)
regs[preg].LoadToImm(imm);
}
void ArmRegCache::Flush()
void ArmRegCache::Flush(FlushMode mode)
{
for (u8 a = 0; a < 32; ++a)
{
if (regs[a].GetType() == REG_IMM)
{
if (mode == FLUSH_ALL)
{
// This changes the type over to a REG_REG and gets caught below.
BindToRegister(a);
}
else
{
ARMReg tmp = GetReg();
emit->MOVI2R(tmp, regs[a].GetImm());
emit->STR(tmp, R9, PPCSTATE_OFF(gpr) + a * 4);
Unlock(tmp);
}
}
if (regs[a].GetType() == REG_REG)
{
u32 regindex = regs[a].GetRegIndex();
emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + a * 4);
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
}
regs[a].Flush();
}
}
}
}

View file

@ -29,6 +29,12 @@ enum RegType
REG_AWAY, // Bound to a register, but not preloaded
};
enum FlushMode
{
FLUSH_ALL = 0,
FLUSH_MAINTAIN_STATE,
};
class OpArg
{
private:
@ -116,9 +122,8 @@ public:
void Start(PPCAnalyst::BlockRegStats &stats);
ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use.
void Unlock(ARMReg R0, ARMReg R1 = INVALID_REG, ARMReg R2 = INVALID_REG, ARMReg R3 =
INVALID_REG);
void Flush();
void Unlock(ARMReg R0, ARMReg R1 = INVALID_REG, ARMReg R2 = INVALID_REG, ARMReg R3 = INVALID_REG);
void Flush(FlushMode mode = FLUSH_ALL);
ARMReg R(u32 preg); // Returns a cached register
bool IsImm(u32 preg) { return regs[preg].GetType() == REG_IMM; }
u32 GetImm(u32 preg) { return regs[preg].GetImm(); }