[ARM] Clean up FPR cache. Rapid fire floating point instruction implementations. Adds 13 new instructions.

This commit is contained in:
Ryan Houdek 2013-09-07 00:19:32 +00:00
parent cd7b97f767
commit ef05a14757
8 changed files with 412 additions and 118 deletions

View file

@ -186,6 +186,8 @@ public:
// Floating point
void fabsx(UGeckoInstruction _inst);
void fnabsx(UGeckoInstruction _inst);
void fnegx(UGeckoInstruction _inst);
void faddsx(UGeckoInstruction _inst);
void faddx(UGeckoInstruction _inst);
void fsubsx(UGeckoInstruction _inst);
@ -202,9 +204,20 @@ public:
// Paired Singles
void ps_add(UGeckoInstruction _inst);
void ps_sum0(UGeckoInstruction _inst);
void ps_sum1(UGeckoInstruction _inst);
void ps_madd(UGeckoInstruction _inst);
void ps_sub(UGeckoInstruction _inst);
void ps_mul(UGeckoInstruction _inst);
void ps_muls0(UGeckoInstruction _inst);
void ps_muls1(UGeckoInstruction _inst);
void ps_merge00(UGeckoInstruction _inst);
void ps_merge01(UGeckoInstruction _inst);
void ps_merge10(UGeckoInstruction _inst);
void ps_merge11(UGeckoInstruction _inst);
void ps_mr(UGeckoInstruction _inst);
void ps_neg(UGeckoInstruction _inst);
void ps_abs(UGeckoInstruction _inst);
void ps_nabs(UGeckoInstruction _inst);
};
#endif // _JIT64_H

View file

@ -43,14 +43,46 @@ void JitArm::fabsx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
ARMReg vD = fpr.R0(inst.FD);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VABS(vD, vB);
if (inst.Rc) Helper_UpdateCR1(vD);
}
void JitArm::fnabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
ARMReg V0 = fpr.GetReg();
// XXX: Could be done quicker
VABS(vD, vB);
VMOV(V0, vD);
VSUB(vD, vD, V0);
VSUB(vD, vD, V0);
fpr.Unlock(V0);
if (inst.Rc) Helper_UpdateCR1(vD);
}
void JitArm::fnegx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VNEG(vD, vB);
if (inst.Rc) Helper_UpdateCR1(vD);
}
void JitArm::faddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -58,8 +90,8 @@ void JitArm::faddsx(UGeckoInstruction inst)
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD0 = fpr.R0(inst.FD);
ARMReg vD1 = fpr.R1(inst.FD);
ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD, false);
VADD(vD0, vA, vB);
VMOV(vD1, vD0);
@ -71,9 +103,9 @@ void JitArm::faddx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
ARMReg vD = fpr.R0(inst.FD);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VADD(vD, vA, vB);
if (inst.Rc) Helper_UpdateCR1(vD);
@ -86,8 +118,8 @@ void JitArm::fsubsx(UGeckoInstruction inst)
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD0 = fpr.R0(inst.FD);
ARMReg vD1 = fpr.R1(inst.FD);
ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD, false);
VSUB(vD0, vA, vB);
VMOV(vD1, vD0);
@ -99,9 +131,9 @@ void JitArm::fsubx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
ARMReg vD = fpr.R0(inst.FD);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VSUB(vD, vA, vB);
if (inst.Rc) Helper_UpdateCR1(vD);
@ -114,8 +146,8 @@ void JitArm::fmulsx(UGeckoInstruction inst)
ARMReg vA = fpr.R0(inst.FA);
ARMReg vC = fpr.R0(inst.FC);
ARMReg vD0 = fpr.R0(inst.FD);
ARMReg vD1 = fpr.R1(inst.FD);
ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD, false);
VMUL(vD0, vA, vC);
VMOV(vD1, vD0);
@ -127,9 +159,9 @@ void JitArm::fmulx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
ARMReg vD0 = fpr.R0(inst.FD);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vC = fpr.R0(inst.FC);
ARMReg vD0 = fpr.R0(inst.FD, false);
VMUL(vD0, vA, vC);
if (inst.Rc) Helper_UpdateCR1(vD0);
@ -139,8 +171,8 @@ void JitArm::fmrx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
ARMReg vD = fpr.R0(inst.FD);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VMOV(vD, vB);

View file

@ -40,12 +40,11 @@ void JitArm::ps_add(UGeckoInstruction inst)
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d);
ARMReg vD1 = fpr.R1(d);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VADD(vD0, vA0, vB0);
VADD(vD1, vA1, vB1);
fpr.Flush();
}
// Wrong, THP videos like SMS and Ikaruga show artifacts
@ -67,8 +66,8 @@ void JitArm::ps_madd(UGeckoInstruction inst)
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d);
ARMReg vD1 = fpr.R1(d);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
@ -99,14 +98,35 @@ void JitArm::ps_sum0(UGeckoInstruction inst)
ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d);
ARMReg vD1 = fpr.R1(d);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VADD(vD0, vA0, vB1);
VMOV(vD1, vC1);
fpr.Flush();
}
void JitArm::ps_sum1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
if (inst.Rc) {
Default(inst); return;
}
ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vC0);
VADD(vD1, vA0, vB1);
}
void JitArm::ps_sub(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -120,12 +140,11 @@ void JitArm::ps_sub(UGeckoInstruction inst)
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d);
ARMReg vD1 = fpr.R1(d);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VSUB(vD0, vA0, vB0);
VSUB(vD1, vA1, vB1);
fpr.Flush();
}
void JitArm::ps_mul(UGeckoInstruction inst)
@ -141,11 +160,210 @@ void JitArm::ps_mul(UGeckoInstruction inst)
ARMReg vA1 = fpr.R1(a);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d);
ARMReg vD1 = fpr.R1(d);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMUL(vD0, vA0, vC0);
VMUL(vD1, vA1, vC1);
fpr.Flush();
}
void JitArm::ps_muls0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, c = inst.FC, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC0);
VMOV(vD0, V0);
VMOV(vD1, V1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_muls1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, c = inst.FC, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC1);
VMUL(V1, vA1, vC1);
VMOV(vD0, V0);
VMOV(vD1, V1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_merge00(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA0);
VMOV(vD1, vB0);
}
void JitArm::ps_merge01(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA0);
VMOV(vD1, vB1);
}
void JitArm::ps_merge10(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA1);
VMOV(vD1, vB0);
}
void JitArm::ps_merge11(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA1 = fpr.R1(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA1);
VMOV(vD1, vB1);
}
void JitArm::ps_mr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vB0);
VMOV(vD1, vB1);
}
void JitArm::ps_neg(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VNEG(vD0, vB0);
VNEG(vD1, vB1);
}
void JitArm::ps_abs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VABS(vD0, vB0);
VABS(vD1, vB1);
}
void JitArm::ps_nabs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
// XXX: Could be done quicker
VABS(vD0, vB0);
VMOV(V0, vD0);
VSUB(vD0, vD0, V0);
VSUB(vD0, vD0, V0);
VABS(vD1, vB1);
VMOV(V0, vD1);
VSUB(vD1, vD1, V0);
VSUB(vD1, vD1, V0);
fpr.Unlock(V0);
}

View file

@ -127,16 +127,16 @@ static GekkoOPTemplate table4[] =
{ //SUBOP10
{0, &JitArm::Default}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
{32, &JitArm::Default}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
{40, &JitArm::Default}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitArm::Default}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitArm::Default}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{40, &JitArm::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitArm::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitArm::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{64, &JitArm::Default}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{72, &JitArm::Default}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{72, &JitArm::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{96, &JitArm::Default}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{528, &JitArm::Default}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitArm::Default}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
{592, &JitArm::Default}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
{624, &JitArm::Default}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}},
{528, &JitArm::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitArm::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
{592, &JitArm::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
{624, &JitArm::ps_merge11}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}},
{1014, &JitArm::Default}, //"dcbz_l", OPTYPE_SYSTEM, 0}},
};
@ -144,9 +144,9 @@ static GekkoOPTemplate table4[] =
static GekkoOPTemplate table4_2[] =
{
{10, &JitArm::ps_sum0}, //"ps_sum0", OPTYPE_PS, 0}},
{11, &JitArm::Default}, //"ps_sum1", OPTYPE_PS, 0}},
{12, &JitArm::Default}, //"ps_muls0", OPTYPE_PS, 0}},
{13, &JitArm::Default}, //"ps_muls1", OPTYPE_PS, 0}},
{11, &JitArm::ps_sum1}, //"ps_sum1", OPTYPE_PS, 0}},
{12, &JitArm::ps_muls0}, //"ps_muls0", OPTYPE_PS, 0}},
{13, &JitArm::ps_muls1}, //"ps_muls1", OPTYPE_PS, 0}},
{14, &JitArm::Default}, //"ps_madds0", OPTYPE_PS, 0}},
{15, &JitArm::Default}, //"ps_madds1", OPTYPE_PS, 0}},
{18, &JitArm::Default}, //"ps_div", OPTYPE_PS, 0, 16}},
@ -352,8 +352,8 @@ static GekkoOPTemplate table63[] =
{14, &JitArm::Default}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}},
{15, &JitArm::Default}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
{72, &JitArm::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
{136, &JitArm::Default}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{40, &JitArm::Default}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
{136, &JitArm::fnabsx}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{40, &JitArm::fnegx}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
{12, &JitArm::Default}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}},
{64, &JitArm::Default}, //"mcrfs", OPTYPE_SYSTEMFP, 0}},

View file

@ -35,7 +35,6 @@ void ArmFPRCache::Init(ARMXEmitter *emitter)
ArmCRegs[a].Reg = PPCRegs[a];
ArmCRegs[a].LastLoad = 0;
ArmCRegs[a].PS1 = false;
ArmCRegs[a].Away = true;
}
for(u8 a = 0; a < NUMARMREG; ++a)
{
@ -43,14 +42,11 @@ void ArmFPRCache::Init(ARMXEmitter *emitter)
ArmRegs[a].free = true;
}
}
void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats)
{
for(u8 a = 0; a < NUMPPCREG; ++a)
{
ArmCRegs[a].PPCReg = 33;
ArmCRegs[a].LastLoad = 0;
}
}
ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count)
{
// This will return us the allocation order of the registers we can use on
@ -101,59 +97,78 @@ void ArmFPRCache::Unlock(ARMReg V0)
}
}
}
ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad)
u32 ArmFPRCache::GetLeastUsedRegister(bool increment)
{
u32 HighestUsed = 0;
u8 Num = 0;
u8 lastRegIndex = 0;
for(u8 a = 0; a < NUMPPCREG; ++a){
++ArmCRegs[a].LastLoad;
if (increment)
++ArmCRegs[a].LastLoad;
if (ArmCRegs[a].LastLoad > HighestUsed)
{
HighestUsed = ArmCRegs[a].LastLoad;
Num = a;
lastRegIndex = a;
}
}
// Check if already Loaded
for(u8 a = 0; a < NUMPPCREG; ++a)
if (ArmCRegs[a].PPCReg == preg && ArmCRegs[a].PS1 == PS1)
{
ArmCRegs[a].LastLoad = 0;
// Check if the value is actually in the reg
if (ArmCRegs[a].Away && preLoad)
{
// Load it now since we want it
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
emit->VLDR(ArmCRegs[a].Reg, R9, offset);
ArmCRegs[a].Away = false;
}
return ArmCRegs[a].Reg;
}
// Check if we have a free register
return lastRegIndex;
}
bool ArmFPRCache::FindFreeRegister(u32 &regindex)
{
for (u8 a = 0; a < NUMPPCREG; ++a)
if (ArmCRegs[a].PPCReg == 33)
{
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
if (preLoad)
emit->VLDR(ArmCRegs[a].Reg, R9, offset);
ArmCRegs[a].PPCReg = preg;
ArmCRegs[a].LastLoad = 0;
ArmCRegs[a].PS1 = PS1;
ArmCRegs[a].Away = !preLoad;
return ArmCRegs[a].Reg;
regindex = a;
return true;
}
// Alright, we couldn't get a free space, dump that least used register
s16 offsetOld = PPCSTATE_OFF(ps) + (ArmCRegs[Num].PPCReg * 16) + (ArmCRegs[Num].PS1 ? 8 : 0);
emit->VSTR(ArmCRegs[Num].Reg, R9, offsetOld);
s16 offsetNew = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
if (preLoad)
emit->VLDR(ArmCRegs[Num].Reg, R9, offsetNew);
ArmCRegs[Num].PPCReg = preg;
ArmCRegs[Num].LastLoad = 0;
ArmCRegs[Num].PS1 = PS1;
ArmCRegs[Num].Away = !preLoad;
return ArmCRegs[Num].Reg;
return false;
}
ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad)
{
u32 lastRegIndex = GetLeastUsedRegister(true);
if (_regs[preg][PS1].GetType() != REG_NOTLOADED)
{
u8 a = _regs[preg][PS1].GetRegIndex();
ArmCRegs[a].LastLoad = 0;
if (_regs[preg][PS1].GetType() == REG_AWAY && preLoad)
{
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
emit->VLDR(ArmCRegs[a].Reg, R9, offset);
_regs[preg][PS1].LoadToReg(a);
}
return ArmCRegs[a].Reg;
}
u32 regindex;
if (FindFreeRegister(regindex))
{
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
emit->VLDR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[regindex].PPCReg = preg;
ArmCRegs[regindex].LastLoad = 0;
_regs[preg][PS1].LoadToReg(regindex);
return ArmCRegs[regindex].Reg;
}
// Alright, we couldn't get a free space, dump that least used register
s16 offsetOld = PPCSTATE_OFF(ps) + (ArmCRegs[lastRegIndex].PPCReg * 16) + (ArmCRegs[lastRegIndex].PS1 ? 8 : 0);
s16 offsetNew = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
emit->VSTR(ArmCRegs[lastRegIndex].Reg, R9, offsetOld);
emit->VLDR(ArmCRegs[lastRegIndex].Reg, R9, offsetNew);
_regs[ArmCRegs[lastRegIndex].PPCReg][PS1].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
ArmCRegs[lastRegIndex].PS1 = PS1;
_regs[preg][PS1].LoadToReg(lastRegIndex);
return ArmCRegs[lastRegIndex].Reg;
}
ARMReg ArmFPRCache::R0(u32 preg, bool preLoad)
@ -168,14 +183,28 @@ ARMReg ArmFPRCache::R1(u32 preg, bool preLoad)
void ArmFPRCache::Flush()
{
for(u8 a = 0; a < NUMPPCREG; ++a)
if (ArmCRegs[a].PPCReg != 33)
for (u8 a = 0; a < 32; ++a)
{
if (_regs[a][0].GetType() == REG_REG)
{
s16 offset = PPCSTATE_OFF(ps) + (ArmCRegs[a].PPCReg * 16) + (ArmCRegs[a].PS1 ? 8 : 0);
emit->VSTR(ArmCRegs[a].Reg, R9, offset);
ArmCRegs[a].PPCReg = 33;
ArmCRegs[a].LastLoad = 0;
ArmCRegs[a].Away = true;
s16 offset = PPCSTATE_OFF(ps) + (a * 16);
u32 regindex = _regs[a][0].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
}
if (_regs[a][1].GetType() == REG_REG)
{
s16 offset = PPCSTATE_OFF(ps) + (a * 16) + 8;
u32 regindex = _regs[a][1].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
}
}

View file

@ -29,6 +29,7 @@ using namespace ArmGen;
class ArmFPRCache
{
private:
OpArg _regs[32][2]; // One for each FPR reg
JRCPPC ArmCRegs[ARMFPUREGS];
JRCReg ArmRegs[ARMFPUREGS];
@ -40,6 +41,8 @@ private:
ARMReg GetPPCReg(u32 preg, bool PS1, bool preLoad);
u32 GetLeastUsedRegister(bool increment);
bool FindFreeRegister(u32 &regindex);
protected:
ARMXEmitter *emit;

View file

@ -126,10 +126,8 @@ bool ArmRegCache::FindFreeRegister(u32 &regindex)
ARMReg ArmRegCache::R(u32 preg)
{
if (regs[preg].GetType() == REG_IMM)
{
return BindToRegister(preg);
//asm ("bkpt #1;");
}
u32 lastRegIndex = GetLeastUsedRegister(true);
// Check if already Loaded

View file

@ -38,55 +38,57 @@ using namespace ArmGen;
enum RegType
{
REG_NOTLOADED = 0,
REG_REG,
REG_IMM,
REG_REG, // Reg type is register
REG_IMM, // Reg is really a IMM
REG_AWAY, // Bound to a register, but not preloaded
};
class OpArg
{
private:
class Reg{
public:
RegType m_type;
u8 m_reg; // index to register
u32 m_value;
Reg()
{
m_type = REG_NOTLOADED;
m_reg = 33;
m_value = 0;
}
} Reg;
RegType m_type; // store type
u8 m_reg; // index to register
u32 m_value; // IMM value
public:
OpArg(){}
OpArg()
{
m_type = REG_NOTLOADED;
m_reg = 33;
m_value = 0;
}
RegType GetType()
{
return Reg.m_type;
return m_type;
}
u8 GetRegIndex()
{
return Reg.m_reg;
return m_reg;
}
u32 GetImm()
{
return Reg.m_value;
return m_value;
}
void LoadToAway(u8 reg)
{
m_type = REG_AWAY;
m_reg = reg;
}
void LoadToReg(u8 reg)
{
Reg.m_type = REG_REG;
Reg.m_reg = reg;
m_type = REG_REG;
m_reg = reg;
}
void LoadToImm(u32 imm)
{
Reg.m_type = REG_IMM;
Reg.m_value = imm;
m_type = REG_IMM;
m_value = imm;
}
void Flush()
{
Reg.m_type = REG_NOTLOADED;
m_type = REG_NOTLOADED;
}
};
@ -96,7 +98,6 @@ struct JRCPPC
bool PS1;
ARMReg Reg; // Tied to which ARM Register
u32 LastLoad;
bool Away; // Only used in FPR cache
};
struct JRCReg
{