[ARM] ps_cmpu0/ps_cmpu1/ps_cmpo0/ps_cmpo1 implementations.

This commit is contained in:
Ryan Houdek 2013-09-24 21:13:33 +00:00
parent 74bc855f20
commit 357a7707a6
3 changed files with 268 additions and 4 deletions

View file

@ -246,6 +246,10 @@ public:
void ps_nabs(UGeckoInstruction _inst);
void ps_rsqrte(UGeckoInstruction _inst);
void ps_sel(UGeckoInstruction _inst);
void ps_cmpu0(UGeckoInstruction _inst);
void ps_cmpu1(UGeckoInstruction _inst);
void ps_cmpo0(UGeckoInstruction _inst);
void ps_cmpo1(UGeckoInstruction _inst);
// LoadStore paired
void psq_l(UGeckoInstruction _inst);

View file

@ -658,3 +658,263 @@ void JitArm::ps_nabs(UGeckoInstruction inst)
VABS(vD1, vB1);
VNEG(vD1, vD1);
}
void JitArm::ps_cmpu0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
u32 a = inst.FA, b = inst.FB;
int cr = inst.CRFD;
ARMReg vA = fpr.R0(a);
ARMReg vB = fpr.R0(b);
ARMReg fpscrReg = gpr.GetReg();
ARMReg crReg = gpr.GetReg();
Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
Operand2 LessThan(0x8, 0xA); // 0x8000
Operand2 GreaterThan(0x4, 0xA); // 0x4000
Operand2 EqualTo(0x2, 0xA); // 0x2000
Operand2 NANRes(0x1, 0xA); // 0x1000
FixupBranch Done1, Done2, Done3;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
BIC(fpscrReg, fpscrReg, FPRFMask);
VCMPE(vA, vB);
VMRS(_PC);
SetCC(CC_LT);
ORR(fpscrReg, fpscrReg, LessThan);
MOV(crReg, 8);
Done1 = B();
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, GreaterThan);
MOV(crReg, 4);
Done2 = B();
SetCC(CC_EQ);
ORR(fpscrReg, fpscrReg, EqualTo);
MOV(crReg, 2);
Done3 = B();
SetCC();
ORR(fpscrReg, fpscrReg, NANRes);
MOV(crReg, 1);
VCMPE(vA, vA);
VMRS(_PC);
FixupBranch NanA = B_CC(CC_NEQ);
VCMPE(vB, vB);
VMRS(_PC);
FixupBranch NanB = B_CC(CC_NEQ);
FixupBranch Done4 = B();
SetJumpTarget(NanA);
SetJumpTarget(NanB);
SetFPException(fpscrReg, FPSCR_VXSNAN);
SetJumpTarget(Done1);
SetJumpTarget(Done2);
SetJumpTarget(Done3);
SetJumpTarget(Done4);
STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, crReg);
}
void JitArm::ps_cmpu1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
u32 a = inst.FA, b = inst.FB;
int cr = inst.CRFD;
ARMReg vA = fpr.R1(a);
ARMReg vB = fpr.R1(b);
ARMReg fpscrReg = gpr.GetReg();
ARMReg crReg = gpr.GetReg();
Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
Operand2 LessThan(0x8, 0xA); // 0x8000
Operand2 GreaterThan(0x4, 0xA); // 0x4000
Operand2 EqualTo(0x2, 0xA); // 0x2000
Operand2 NANRes(0x1, 0xA); // 0x1000
FixupBranch Done1, Done2, Done3;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
BIC(fpscrReg, fpscrReg, FPRFMask);
VCMPE(vA, vB);
VMRS(_PC);
SetCC(CC_LT);
ORR(fpscrReg, fpscrReg, LessThan);
MOV(crReg, 8);
Done1 = B();
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, GreaterThan);
MOV(crReg, 4);
Done2 = B();
SetCC(CC_EQ);
ORR(fpscrReg, fpscrReg, EqualTo);
MOV(crReg, 2);
Done3 = B();
SetCC();
ORR(fpscrReg, fpscrReg, NANRes);
MOV(crReg, 1);
VCMPE(vA, vA);
VMRS(_PC);
FixupBranch NanA = B_CC(CC_NEQ);
VCMPE(vB, vB);
VMRS(_PC);
FixupBranch NanB = B_CC(CC_NEQ);
FixupBranch Done4 = B();
SetJumpTarget(NanA);
SetJumpTarget(NanB);
SetFPException(fpscrReg, FPSCR_VXSNAN);
SetJumpTarget(Done1);
SetJumpTarget(Done2);
SetJumpTarget(Done3);
SetJumpTarget(Done4);
STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, crReg);
}
void JitArm::ps_cmpo0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
u32 a = inst.FA, b = inst.FB;
int cr = inst.CRFD;
ARMReg vA = fpr.R0(a);
ARMReg vB = fpr.R0(b);
ARMReg fpscrReg = gpr.GetReg();
ARMReg crReg = gpr.GetReg();
Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
Operand2 LessThan(0x8, 0xA); // 0x8000
Operand2 GreaterThan(0x4, 0xA); // 0x4000
Operand2 EqualTo(0x2, 0xA); // 0x2000
Operand2 NANRes(0x1, 0xA); // 0x1000
FixupBranch Done1, Done2, Done3;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
BIC(fpscrReg, fpscrReg, FPRFMask);
VCMPE(vA, vB);
VMRS(_PC);
SetCC(CC_LT);
ORR(fpscrReg, fpscrReg, LessThan);
MOV(crReg, 8);
Done1 = B();
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, GreaterThan);
MOV(crReg, 4);
Done2 = B();
SetCC(CC_EQ);
ORR(fpscrReg, fpscrReg, EqualTo);
MOV(crReg, 2);
Done3 = B();
SetCC();
ORR(fpscrReg, fpscrReg, NANRes);
MOV(crReg, 1);
VCMPE(vA, vA);
VMRS(_PC);
FixupBranch NanA = B_CC(CC_NEQ);
VCMPE(vB, vB);
VMRS(_PC);
FixupBranch NanB = B_CC(CC_NEQ);
SetFPException(fpscrReg, FPSCR_VXVC);
FixupBranch Done4 = B();
SetJumpTarget(NanA);
SetJumpTarget(NanB);
SetFPException(fpscrReg, FPSCR_VXSNAN);
TST(fpscrReg, VEMask);
FixupBranch noVXVC = B_CC(CC_NEQ);
SetFPException(fpscrReg, FPSCR_VXVC);
SetJumpTarget(noVXVC);
SetJumpTarget(Done1);
SetJumpTarget(Done2);
SetJumpTarget(Done3);
SetJumpTarget(Done4);
STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, crReg);
}
void JitArm::ps_cmpo1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
u32 a = inst.FA, b = inst.FB;
int cr = inst.CRFD;
ARMReg vA = fpr.R1(a);
ARMReg vB = fpr.R1(b);
ARMReg fpscrReg = gpr.GetReg();
ARMReg crReg = gpr.GetReg();
Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
Operand2 LessThan(0x8, 0xA); // 0x8000
Operand2 GreaterThan(0x4, 0xA); // 0x4000
Operand2 EqualTo(0x2, 0xA); // 0x2000
Operand2 NANRes(0x1, 0xA); // 0x1000
FixupBranch Done1, Done2, Done3;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
BIC(fpscrReg, fpscrReg, FPRFMask);
VCMPE(vA, vB);
VMRS(_PC);
SetCC(CC_LT);
ORR(fpscrReg, fpscrReg, LessThan);
MOV(crReg, 8);
Done1 = B();
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, GreaterThan);
MOV(crReg, 4);
Done2 = B();
SetCC(CC_EQ);
ORR(fpscrReg, fpscrReg, EqualTo);
MOV(crReg, 2);
Done3 = B();
SetCC();
ORR(fpscrReg, fpscrReg, NANRes);
MOV(crReg, 1);
VCMPE(vA, vA);
VMRS(_PC);
FixupBranch NanA = B_CC(CC_NEQ);
VCMPE(vB, vB);
VMRS(_PC);
FixupBranch NanB = B_CC(CC_NEQ);
SetFPException(fpscrReg, FPSCR_VXVC);
FixupBranch Done4 = B();
SetJumpTarget(NanA);
SetJumpTarget(NanB);
SetFPException(fpscrReg, FPSCR_VXSNAN);
TST(fpscrReg, VEMask);
FixupBranch noVXVC = B_CC(CC_NEQ);
SetFPException(fpscrReg, FPSCR_VXVC);
SetJumpTarget(noVXVC);
SetJumpTarget(Done1);
SetJumpTarget(Done2);
SetJumpTarget(Done3);
SetJumpTarget(Done4);
STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, crReg);
}

View file

@ -125,14 +125,14 @@ static GekkoOPTemplate primarytable[] =
static GekkoOPTemplate table4[] =
{ //SUBOP10
{0, &JitArm::Default}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
{32, &JitArm::Default}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
{0, &JitArm::ps_cmpu0}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
{32, &JitArm::ps_cmpo0}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
{40, &JitArm::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitArm::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitArm::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{64, &JitArm::Default}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{64, &JitArm::ps_cmpu1}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{72, &JitArm::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{96, &JitArm::Default}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{96, &JitArm::ps_cmpo1}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{528, &JitArm::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitArm::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
{592, &JitArm::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},