Merge pull request #527 from delroth/flags-opt

[RFC] PowerPC flags emulation optimization
This commit is contained in:
Lioncash 2014-07-31 02:51:48 -04:00
commit 5bb9a74759
40 changed files with 929 additions and 3300 deletions

View file

@ -137,7 +137,6 @@
<string name="jit64_recompiler">JIT64 Recompiler</string>
<string name="jitil_recompiler">JITIL Recompiler</string>
<string name="jit_arm_recompiler">JIT ARM Recompiler</string>
<string name="jitil_arm_recompiler">JITIL ARM Recompiler</string>
<string name="cpu_settings">CPU</string>
<string name="cpu_core">CPUコア</string>
<string name="cpu_core_desc">%s</string>

View file

@ -19,12 +19,10 @@
<string-array name="emuCoreEntriesARM" translatable="false">
<item>@string/interpreter</item>
<item>@string/jit_arm_recompiler</item>
<item>@string/jitil_arm_recompiler</item>
</string-array>
<string-array name="emuCoreValuesARM" translatable="false">
<item>0</item>
<item>3</item>
<item>4</item>
</string-array>
<!-- CPU core selection - Other -->

View file

@ -138,7 +138,6 @@
<string name="jit64_recompiler">JIT64 Recompiler</string>
<string name="jitil_recompiler">JITIL Recompiler</string>
<string name="jit_arm_recompiler">JIT ARM Recompiler</string>
<string name="jitil_arm_recompiler">JITIL ARM Recompiler</string>
<string name="cpu_settings">CPU</string>
<string name="cpu_core">CPU Core</string>
<string name="cpu_core_desc">%s</string>

View file

@ -721,7 +721,7 @@ void XEmitter::SETcc(CCFlags flag, OpArg dest)
{
if (dest.IsImm()) _assert_msg_(DYNA_REC, 0, "SETcc - Imm argument");
dest.operandReg = 0;
dest.WriteRex(this, 0, 0);
dest.WriteRex(this, 0, 8);
Write8(0x0F);
Write8(0x90 + (u8)flag);
dest.WriteRest(this);

View file

@ -218,12 +218,6 @@ if(_M_ARM_32)
PowerPC/JitArm32/JitArm_LoadStorePaired.cpp
PowerPC/JitArm32/JitArm_SystemRegisters.cpp
PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp
#JitArmIL
PowerPC/JitArmIL/JitIL.cpp
PowerPC/JitArmIL/JitILAsm.cpp
PowerPC/JitArmIL/JitIL_Tables.cpp
PowerPC/JitArmIL/JitIL_Branch.cpp
PowerPC/JitArmIL/IR_Arm.cpp
)
endif()

View file

@ -84,7 +84,7 @@ static void Trace(UGeckoInstruction& instCode)
char ppcInst[256];
DisassembleGekko(instCode.hex, PC, ppcInst, 256);
DEBUG_LOG(POWERPC, "INTER PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s %08x %s", PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str(), instCode.hex, ppcInst);
DEBUG_LOG(POWERPC, "INTER PC: %08x SRR0: %08x SRR1: %08x CRval: %016lx FPSCR: %08x MSR: %08x LR: %08x %s %08x %s", PC, SRR0, SRR1, PowerPC::ppcState.cr_val[0], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), instCode.hex, ppcInst);
}
int Interpreter::SingleStepInner(void)

View file

@ -4,32 +4,22 @@
#include "Core/PowerPC/Interpreter/Interpreter.h"
void Interpreter::Helper_UpdateCR0(u32 _uValue)
void Interpreter::Helper_UpdateCR0(u32 value)
{
u32 new_cr0;
int sValue = (int)_uValue;
if (sValue > 0)
new_cr0 = 0x4;
else if (sValue < 0)
new_cr0 = 0x8;
else
new_cr0 = 0x2;
new_cr0 |= GetXER_SO();
SetCRField(0, new_cr0);
s64 sign_extended = (s64)(s32)value;
u64 cr_val = (u64)sign_extended;
cr_val = (cr_val & ~(1ull << 61)) | ((u64)GetXER_SO() << 61);
PowerPC::ppcState.cr_val[0] = cr_val;
}
void Interpreter::Helper_UpdateCRx(int _x, u32 _uValue)
void Interpreter::Helper_UpdateCRx(int idx, u32 value)
{
u32 new_crX;
int sValue = (int)_uValue;
if (sValue > 0)
new_crX = 0x4;
else if (sValue < 0)
new_crX = 0x8;
else
new_crX = 0x2;
new_crX |= GetXER_SO();
SetCRField(_x, new_crX);
s64 sign_extended = (s64)(s32)value;
u64 cr_val = (u64)sign_extended;
cr_val = (cr_val & ~(1ull << 61)) | ((u64)GetXER_SO() << 61);
PowerPC::ppcState.cr_val[idx] = cr_val;
}
u32 Interpreter::Helper_Carry(u32 _uValue1, u32 _uValue2)

View file

@ -377,10 +377,8 @@ void Jit64::Trace()
}
#endif
DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s",
PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3],
PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr,
PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str());
DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x FPSCR: %08x MSR: %08x LR: %08x %s %s",
PC, SRR0, SRR1, PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str());
}
void STACKALIGN Jit64::Jit(u32 em_address)

View file

@ -106,6 +106,16 @@ public:
void GenerateRC();
void ComputeRC(const Gen::OpArg & arg);
// Reads a given bit of a given CR register part. Clobbers ABI_PARAM1,
// don't forget to xlock it before.
void GetCRFieldBit(int field, int bit, Gen::X64Reg out);
// Clobbers ABI_PARAM1 and ABI_PARAM2, xlock them before.
void SetCRFieldBit(int field, int bit, Gen::X64Reg in);
// Generates a branch that will check if a given bit of a CR register part
// is set or not.
FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
typedef u32 (*Operation)(u32 a, u32 b);
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);

View file

@ -117,11 +117,8 @@ void Jit64::bcx(UGeckoInstruction inst)
FixupBranch pConditionDontBranch;
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit
{
TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3)));
if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch
pConditionDontBranch = J_CC(CC_Z, true);
else
pConditionDontBranch = J_CC(CC_NZ, true);
pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
}
if (inst.LK)
@ -179,14 +176,8 @@ void Jit64::bcctrx(UGeckoInstruction inst)
// BO_2 == 001zy -> b if false
// BO_2 == 011zy -> b if true
// Ripped from bclrx
TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3)));
Gen::CCFlags branch;
if (inst.BO_2 & BO_BRANCH_IF_TRUE)
branch = CC_Z;
else
branch = CC_NZ;
FixupBranch b = J_CC(branch, true);
FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
MOV(32, R(EAX), M(&CTR));
AND(32, R(EAX), Imm32(0xFFFFFFFC));
//MOV(32, M(&PC), R(EAX)); => Already done in WriteExitDestInEAX()
@ -222,11 +213,8 @@ void Jit64::bclrx(UGeckoInstruction inst)
FixupBranch pConditionDontBranch;
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit
{
TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3)));
if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch
pConditionDontBranch = J_CC(CC_Z, true);
else
pConditionDontBranch = J_CC(CC_NZ, true);
pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
}
// This below line can be used to prove that blr "eats flags" in practice.

View file

@ -237,26 +237,22 @@ void Jit64::fcmpx(UGeckoInstruction inst)
pGreater = J_CC(CC_B);
}
// Equal
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2));
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ)));
continue1 = J();
// NAN
SetJumpTarget(pNaN);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x1));
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO)));
if (a != b)
{
continue2 = J();
// Greater Than
SetJumpTarget(pGreater);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4));
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT)));
continue3 = J();
// Less Than
SetJumpTarget(pLesser);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8));
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT)));
}
SetJumpTarget(continue1);
@ -266,6 +262,7 @@ void Jit64::fcmpx(UGeckoInstruction inst)
SetJumpTarget(continue3);
}
MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX));
fpr.UnlockAll();
}

View file

@ -116,57 +116,17 @@ void Jit64::GenerateCarry()
SetJumpTarget(pContinue);
}
// Assumes that Sign and Zero flags were set by the last operation. Preserves all flags and registers.
void Jit64::GenerateRC()
{
FixupBranch pZero = J_CC(CC_Z);
FixupBranch pNegative = J_CC(CC_S);
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // Result > 0
FixupBranch continue1 = J();
SetJumpTarget(pNegative);
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // Result < 0
FixupBranch continue2 = J();
SetJumpTarget(pZero);
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // Result == 0
SetJumpTarget(continue1);
SetJumpTarget(continue2);
}
void Jit64::ComputeRC(const Gen::OpArg & arg)
{
if (arg.IsImm())
{
s32 value = (s32)arg.offset;
if (value < 0)
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8));
else if (value > 0)
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4));
else
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2));
MOV(32, R(EAX), Imm32((s32)arg.offset));
MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX));
}
else
{
if (arg.IsSimpleReg())
TEST(32, arg, arg);
else
CMP(32, arg, Imm8(0));
FixupBranch pLesser = J_CC(CC_L);
FixupBranch pGreater = J_CC(CC_G);
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // _x86Reg == 0
FixupBranch continue1 = J();
SetJumpTarget(pGreater);
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // _x86Reg > 0
FixupBranch continue2 = J();
SetJumpTarget(pLesser);
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // _x86Reg < 0
SetJumpTarget(continue1);
SetJumpTarget(continue2);
MOVSX(64, 32, RAX, arg);
MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX));
}
}
@ -192,26 +152,20 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
{
gpr.KillImmediate(d, true, true);
(this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
if (Rc)
{
// All of the possible passed operators affect Sign/Zero flags
GenerateRC();
}
if (carry)
GenerateCarry();
if (Rc)
ComputeRC(gpr.R(d));
}
else
{
gpr.BindToRegister(d, false);
MOV(32, gpr.R(d), gpr.R(a));
(this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
if (Rc)
{
// All of the possible passed operators affect Sign/Zero flags
GenerateRC();
}
if (carry)
GenerateCarry();
if (Rc)
ComputeRC(gpr.R(d));
}
}
else if (doop == Add)
@ -219,9 +173,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
// a == 0, which for these instructions imply value = 0
gpr.SetImmediate32(d, value);
if (Rc)
{
ComputeRC(gpr.R(d));
}
}
else
{
@ -363,22 +315,23 @@ void Jit64::cmpXX(UGeckoInstruction inst)
if (signedCompare)
{
if ((s32)gpr.R(a).offset == (s32)comparand.offset)
compareResult = 0x2;
compareResult = CR_EQ;
else if ((s32)gpr.R(a).offset > (s32)comparand.offset)
compareResult = 0x4;
compareResult = CR_GT;
else
compareResult = 0x8;
compareResult = CR_LT;
}
else
{
if ((u32)gpr.R(a).offset == (u32)comparand.offset)
compareResult = 0x2;
compareResult = CR_EQ;
else if ((u32)gpr.R(a).offset > (u32)comparand.offset)
compareResult = 0x4;
compareResult = CR_GT;
else
compareResult = 0x8;
compareResult = CR_LT;
}
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(compareResult));
MOV(64, R(RAX), Imm64(PPCCRToInternal(compareResult)));
MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX));
gpr.UnlockAll();
if (merge_branch)
@ -436,71 +389,58 @@ void Jit64::cmpXX(UGeckoInstruction inst)
}
else
{
Gen::CCFlags less_than, greater_than;
if (signedCompare)
{
less_than = CC_L;
greater_than = CC_G;
if (gpr.R(a).IsImm())
MOV(64, R(RAX), gpr.R(a));
else
MOVSX(64, 32, RAX, gpr.R(a));
if (!comparand.IsImm())
{
MOVSX(64, 32, ABI_PARAM1, comparand);
comparand = R(ABI_PARAM1);
}
}
else
{
less_than = CC_B;
greater_than = CC_A;
}
if (gpr.R(a).IsImm())
MOV(32, R(RAX), gpr.R(a));
else
MOVZX(64, 32, RAX, gpr.R(a));
if (gpr.R(a).IsImm() || (!gpr.R(a).IsSimpleReg() && !comparand.IsImm() && !comparand.IsSimpleReg()))
{
// Syntax for CMP is invalid with such arguments. We must load RA in a register.
gpr.BindToRegister(a, true, false);
if (comparand.IsImm())
MOV(32, R(ABI_PARAM1), comparand);
else
MOVZX(64, 32, ABI_PARAM1, comparand);
comparand = R(ABI_PARAM1);
}
CMP(32, gpr.R(a), comparand);
gpr.UnlockAll();
SUB(64, R(RAX), comparand);
MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX));
if (!merge_branch)
{
// Keep the normal code separate for clarity.
FixupBranch pLesser = J_CC(less_than);
FixupBranch pGreater = J_CC(greater_than);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0
FixupBranch continue1 = J();
SetJumpTarget(pGreater);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0
FixupBranch continue2 = J();
SetJumpTarget(pLesser);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0
SetJumpTarget(continue1);
SetJumpTarget(continue2);
// TODO: If we ever care about SO, borrow a trick from
// http://maws.mameworld.info/maws/mamesrc/src/emu/cpu/powerpc/drc_ops.c : bt, adc
}
else
if (merge_branch)
{
js.downcountAmount++;
int test_bit = 8 >> (js.next_inst.BI & 3);
bool condition = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? false : true;
bool condition = js.next_inst.BO & BO_BRANCH_IF_TRUE;
// Test swapping (in the future, will be used to inline across branches the right way)
// if (rand() & 1)
// std::swap(destination1, destination2), condition = !condition;
gpr.UnlockAll();
gpr.Flush();
fpr.Flush();
FixupBranch pLesser = J_CC(less_than);
FixupBranch pGreater = J_CC(greater_than);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // == 0
FixupBranch continue1 = J();
FixupBranch pDontBranch;
if (test_bit & 8)
pDontBranch = J_CC(condition ? CC_GE : CC_L); // Test < 0, so jump over if >= 0.
else if (test_bit & 4)
pDontBranch = J_CC(condition ? CC_LE : CC_G); // Test > 0, so jump over if <= 0.
else if (test_bit & 2)
pDontBranch = J_CC(condition ? CC_NE : CC_E); // Test = 0, so jump over if != 0.
else // SO bit, do not branch (we don't emulate SO for cmp).
pDontBranch = J();
SetJumpTarget(pGreater);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // > 0
FixupBranch continue2 = J();
SetJumpTarget(pLesser);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // < 0
FixupBranch continue3;
if (!!(8 & test_bit) == condition) continue3 = J();
if (!!(4 & test_bit) != condition) SetJumpTarget(continue2);
if (!!(2 & test_bit) != condition) SetJumpTarget(continue1);
// Code that handles successful PPC branching.
if (js.next_inst.OPCD == 16) // bcx
{
if (js.next_inst.LK)
@ -534,9 +474,7 @@ void Jit64::cmpXX(UGeckoInstruction inst)
PanicAlert("WTF invalid branch");
}
if (!!(8 & test_bit) == condition) SetJumpTarget(continue3);
if (!!(4 & test_bit) == condition) SetJumpTarget(continue2);
if (!!(2 & test_bit) == condition) SetJumpTarget(continue1);
SetJumpTarget(pDontBranch);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
@ -619,9 +557,7 @@ void Jit64::boolX(UGeckoInstruction inst)
PanicAlert("WTF!");
}
if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
}
else if ((a == s) || (a == b))
{
@ -632,19 +568,11 @@ void Jit64::boolX(UGeckoInstruction inst)
if (inst.SUBOP10 == 28) /* andx */
{
AND(32, gpr.R(a), operand);
if (inst.Rc)
{
GenerateRC();
}
}
else if (inst.SUBOP10 == 476) /* nandx */
{
AND(32, gpr.R(a), operand);
NOT(32, gpr.R(a));
if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
}
else if (inst.SUBOP10 == 60) /* andcx */
{
@ -659,27 +587,15 @@ void Jit64::boolX(UGeckoInstruction inst)
NOT(32, R(EAX));
AND(32, gpr.R(a), R(EAX));
}
if (inst.Rc)
{
GenerateRC();
}
}
else if (inst.SUBOP10 == 444) /* orx */
{
OR(32, gpr.R(a), operand);
if (inst.Rc)
{
GenerateRC();
}
}
else if (inst.SUBOP10 == 124) /* norx */
{
OR(32, gpr.R(a), operand);
NOT(32, gpr.R(a));
if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
}
else if (inst.SUBOP10 == 412) /* orcx */
{
@ -694,32 +610,22 @@ void Jit64::boolX(UGeckoInstruction inst)
NOT(32, R(EAX));
OR(32, gpr.R(a), R(EAX));
}
if (inst.Rc)
{
GenerateRC();
}
}
else if (inst.SUBOP10 == 316) /* xorx */
{
XOR(32, gpr.R(a), operand);
if (inst.Rc)
{
GenerateRC();
}
}
else if (inst.SUBOP10 == 284) /* eqvx */
{
NOT(32, gpr.R(a));
XOR(32, gpr.R(a), operand);
if (inst.Rc)
{
GenerateRC();
}
}
else
{
PanicAlert("WTF");
}
if (inst.Rc)
ComputeRC(gpr.R(a));
gpr.UnlockAll();
}
else
@ -731,83 +637,53 @@ void Jit64::boolX(UGeckoInstruction inst)
{
MOV(32, gpr.R(a), gpr.R(s));
AND(32, gpr.R(a), gpr.R(b));
if (inst.Rc)
{
GenerateRC();
}
}
else if (inst.SUBOP10 == 476) /* nandx */
{
MOV(32, gpr.R(a), gpr.R(s));
AND(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a));
if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
}
else if (inst.SUBOP10 == 60) /* andcx */
{
MOV(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a));
AND(32, gpr.R(a), gpr.R(s));
if (inst.Rc)
{
GenerateRC();
}
}
else if (inst.SUBOP10 == 444) /* orx */
{
MOV(32, gpr.R(a), gpr.R(s));
OR(32, gpr.R(a), gpr.R(b));
if (inst.Rc)
{
GenerateRC();
}
}
else if (inst.SUBOP10 == 124) /* norx */
{
MOV(32, gpr.R(a), gpr.R(s));
OR(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a));
if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
}
else if (inst.SUBOP10 == 412) /* orcx */
{
MOV(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a));
OR(32, gpr.R(a), gpr.R(s));
if (inst.Rc)
{
GenerateRC();
}
}
else if (inst.SUBOP10 == 316) /* xorx */
{
MOV(32, gpr.R(a), gpr.R(s));
XOR(32, gpr.R(a), gpr.R(b));
if (inst.Rc)
{
GenerateRC();
}
}
else if (inst.SUBOP10 == 284) /* eqvx */
{
MOV(32, gpr.R(a), gpr.R(s));
NOT(32, gpr.R(a));
XOR(32, gpr.R(a), gpr.R(b));
if (inst.Rc)
{
GenerateRC();
}
}
else
{
PanicAlert("WTF!");
}
if (inst.Rc)
ComputeRC(gpr.R(a));
gpr.UnlockAll();
}
}
@ -943,9 +819,8 @@ void Jit64::subfcx(UGeckoInstruction inst)
MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), gpr.R(a));
}
if (inst.Rc) {
GenerateRC();
}
if (inst.Rc)
ComputeRC(gpr.R(d));
FinalizeCarryOverflow(inst.OE, true);
gpr.UnlockAll();
@ -980,10 +855,9 @@ void Jit64::subfex(UGeckoInstruction inst)
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), gpr.R(b));
}
if (inst.Rc) {
GenerateRC();
}
FinalizeCarryGenerateOverflowEAX(inst.OE, invertedCarry);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
@ -1004,11 +878,9 @@ void Jit64::subfmex(UGeckoInstruction inst)
}
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
if (inst.Rc)
{
GenerateRC();
}
FinalizeCarryGenerateOverflowEAX(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
@ -1029,11 +901,9 @@ void Jit64::subfzex(UGeckoInstruction inst)
}
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm8(0));
if (inst.Rc)
{
GenerateRC();
}
FinalizeCarryGenerateOverflowEAX(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
@ -1076,14 +946,10 @@ void Jit64::subfx(UGeckoInstruction inst)
MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), gpr.R(a));
}
if (inst.Rc)
{
GenerateRC();
}
if (inst.OE)
{
GenerateOverflow();
}
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
}
@ -1505,14 +1371,10 @@ void Jit64::addx(UGeckoInstruction inst)
gpr.Lock(a, b, d);
gpr.BindToRegister(d, true);
ADD(32, gpr.R(d), gpr.R(operand));
if (inst.Rc)
{
GenerateRC();
}
if (inst.OE)
{
GenerateOverflow();
}
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
else
@ -1521,14 +1383,10 @@ void Jit64::addx(UGeckoInstruction inst)
gpr.BindToRegister(d, false);
MOV(32, gpr.R(d), gpr.R(a));
ADD(32, gpr.R(d), gpr.R(b));
if (inst.Rc)
{
GenerateRC();
}
if (inst.OE)
{
GenerateOverflow();
}
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
}
@ -1547,11 +1405,9 @@ void Jit64::addex(UGeckoInstruction inst)
GetCarryEAXAndClear();
ADC(32, gpr.R(d), gpr.R((d == a) ? b : a));
if (inst.Rc)
{
GenerateRC();
}
FinalizeCarryGenerateOverflowEAX(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
else
@ -1562,11 +1418,9 @@ void Jit64::addex(UGeckoInstruction inst)
GetCarryEAXAndClear();
MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), gpr.R(b));
if (inst.Rc)
{
GenerateRC();
}
FinalizeCarryGenerateOverflowEAX(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
}
@ -1584,11 +1438,9 @@ void Jit64::addcx(UGeckoInstruction inst)
gpr.BindToRegister(d, true);
JitClearCAOV(inst.OE);
ADD(32, gpr.R(d), gpr.R(operand));
if (inst.Rc)
{
GenerateRC();
}
FinalizeCarryOverflow(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
else
@ -1598,11 +1450,9 @@ void Jit64::addcx(UGeckoInstruction inst)
JitClearCAOV(inst.OE);
MOV(32, gpr.R(d), gpr.R(a));
ADD(32, gpr.R(d), gpr.R(b));
if (inst.Rc)
{
GenerateRC();
}
FinalizeCarryOverflow(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
}
@ -1621,11 +1471,9 @@ void Jit64::addmex(UGeckoInstruction inst)
GetCarryEAXAndClear();
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
if (inst.Rc)
{
GenerateRC();
}
FinalizeCarryGenerateOverflowEAX(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
else
@ -1636,11 +1484,9 @@ void Jit64::addmex(UGeckoInstruction inst)
GetCarryEAXAndClear();
MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
if (inst.Rc)
{
GenerateRC();
}
FinalizeCarryGenerateOverflowEAX(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
}
@ -1659,11 +1505,9 @@ void Jit64::addzex(UGeckoInstruction inst)
GetCarryEAXAndClear();
ADC(32, gpr.R(d), Imm8(0));
if (inst.Rc)
{
GenerateRC();
}
FinalizeCarryGenerateOverflowEAX(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
else
@ -1674,11 +1518,9 @@ void Jit64::addzex(UGeckoInstruction inst)
GetCarryEAXAndClear();
MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm8(0));
if (inst.Rc)
{
GenerateRC();
}
FinalizeCarryGenerateOverflowEAX(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
}
@ -1714,17 +1556,13 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
{
SHL(32, gpr.R(a), Imm8(inst.SH));
if (inst.Rc)
{
GenerateRC();
}
ComputeRC(gpr.R(a));
}
else if (inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH)
{
SHR(32, gpr.R(a), Imm8(inst.MB));
if (inst.Rc)
{
GenerateRC();
}
ComputeRC(gpr.R(a));
}
else
{
@ -1736,9 +1574,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
{
AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME)));
if (inst.Rc)
{
GenerateRC();
}
ComputeRC(gpr.R(a));
}
else if (inst.Rc)
{
@ -1818,9 +1654,7 @@ void Jit64::rlwimix(UGeckoInstruction inst)
XOR(32, gpr.R(a), R(EAX));
}
if (inst.Rc)
{
GenerateRC();
}
ComputeRC(gpr.R(a));
}
else
{
@ -1828,9 +1662,7 @@ void Jit64::rlwimix(UGeckoInstruction inst)
AND(32, gpr.R(a), Imm32(~mask));
XOR(32, gpr.R(a), gpr.R(s));
if (inst.Rc)
{
GenerateRC();
}
ComputeRC(gpr.R(a));
}
gpr.UnlockAll();
}
@ -1864,9 +1696,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
ROL(32, gpr.R(a), R(ECX));
AND(32, gpr.R(a), Imm32(mask));
if (inst.Rc)
{
GenerateRC();
}
ComputeRC(gpr.R(a));
gpr.UnlockAll();
gpr.UnlockAllX();
}
@ -1898,14 +1728,10 @@ void Jit64::negx(UGeckoInstruction inst)
if (a != d)
MOV(32, gpr.R(d), gpr.R(a));
NEG(32, gpr.R(d));
if (inst.Rc)
{
GenerateRC();
}
if (inst.OE)
{
GenerateOverflow();
}
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
}
@ -1994,7 +1820,7 @@ void Jit64::slwx(UGeckoInstruction inst)
if (inst.Rc)
{
AND(32, gpr.R(a), gpr.R(a));
GenerateRC();
ComputeRC(gpr.R(a));
}
else
{
@ -2104,9 +1930,7 @@ void Jit64::srawix(UGeckoInstruction inst)
}
SAR(32, gpr.R(a), Imm8(amount));
if (inst.Rc)
{
GenerateRC();
}
ComputeRC(gpr.R(a));
SHL(32, R(EAX), Imm8(32-amount));
TEST(32, R(EAX), gpr.R(a));
FixupBranch nocarry = J_CC(CC_Z);

View file

@ -10,6 +10,130 @@
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out)
{
switch (bit)
{
case CR_SO_BIT: // check bit 61 set
MOV(64, R(ABI_PARAM1), Imm64(1ull << 61));
TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1));
SETcc(CC_NZ, R(out));
break;
case CR_EQ_BIT: // check bits 31-0 == 0
CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm32(0));
SETcc(CC_Z, R(out));
break;
case CR_GT_BIT: // check val > 0
MOV(64, R(ABI_PARAM1), M(&PowerPC::ppcState.cr_val[field]));
TEST(64, R(ABI_PARAM1), R(ABI_PARAM1));
SETcc(CC_G, R(out));
break;
case CR_LT_BIT: // check bit 62 set
MOV(64, R(ABI_PARAM1), Imm64(1ull << 62));
TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1));
SETcc(CC_NZ, R(out));
break;
default:
_assert_msg_(DYNA_REC, false, "Invalid CR bit");
}
}
void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in)
{
MOV(64, R(ABI_PARAM2), M(&PowerPC::ppcState.cr_val[field]));
TEST(8, R(in), Imm8(1));
FixupBranch input_is_set = J_CC(CC_NZ, false);
// New value is 0.
switch (bit)
{
case CR_SO_BIT: // unset bit 61
MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 61)));
AND(64, R(ABI_PARAM2), R(ABI_PARAM1));
break;
case CR_EQ_BIT: // set bit 0 to 1
OR(8, R(ABI_PARAM2), Imm8(1));
break;
case CR_GT_BIT: // !GT, set bit 63
MOV(64, R(ABI_PARAM1), Imm64(1ull << 63));
OR(64, R(ABI_PARAM2), R(ABI_PARAM1));
break;
case CR_LT_BIT: // !LT, unset bit 62
MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 62)));
AND(64, R(ABI_PARAM2), R(ABI_PARAM1));
break;
}
FixupBranch end = J();
SetJumpTarget(input_is_set);
switch (bit)
{
case CR_SO_BIT: // set bit 61
MOV(64, R(ABI_PARAM1), Imm64(1ull << 61));
OR(64, R(ABI_PARAM2), R(ABI_PARAM1));
break;
case CR_EQ_BIT: // set bits 31-0 to 0
MOV(64, R(ABI_PARAM1), Imm64(0xFFFFFFFF00000000));
AND(64, R(ABI_PARAM2), R(ABI_PARAM1));
break;
case CR_GT_BIT: // unset bit 63
MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 63)));
AND(64, R(ABI_PARAM2), R(ABI_PARAM1));
break;
case CR_LT_BIT: // set bit 62
MOV(64, R(ABI_PARAM1), Imm64(1ull << 62));
OR(64, R(ABI_PARAM2), R(ABI_PARAM1));
break;
}
SetJumpTarget(end);
MOV(64, R(ABI_PARAM1), Imm64(1ull << 32));
OR(64, R(ABI_PARAM2), R(ABI_PARAM1));
MOV(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM2));
}
FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
{
switch (bit)
{
case CR_SO_BIT: // check bit 61 set
MOV(64, R(RAX), Imm64(1ull << 61));
TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(RAX));
return J_CC(jump_if_set ? CC_NZ : CC_Z, true);
case CR_EQ_BIT: // check bits 31-0 == 0
CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm32(0));
return J_CC(jump_if_set ? CC_Z : CC_NZ, true);
case CR_GT_BIT: // check val > 0
MOV(64, R(RAX), M(&PowerPC::ppcState.cr_val[field]));
TEST(64, R(RAX), R(RAX));
return J_CC(jump_if_set ? CC_G : CC_LE, true);
case CR_LT_BIT: // check bit 62 set
MOV(64, R(RAX), Imm64(1ull << 62));
TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(RAX));
return J_CC(jump_if_set ? CC_NZ : CC_Z, true);
default:
_assert_msg_(DYNA_REC, false, "Invalid CR bit");
}
// Should never happen.
return FixupBranch();
}
void Jit64::mtspr(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -154,16 +278,47 @@ void Jit64::mfcr(UGeckoInstruction inst)
int d = inst.RD;
gpr.Lock(d);
gpr.KillImmediate(d, false, true);
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
XOR(32, R(EAX), R(EAX));
for (int i = 1; i < 8; i++)
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
X64Reg cr_val = ABI_PARAM1;
X64Reg tmp = ABI_PARAM2;
for (int i = 0; i < 8; i++)
{
SHL(32, R(EAX), Imm8(4));
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
if (i != 0)
SHL(32, R(EAX), Imm8(4));
MOV(64, R(cr_val), M(&PowerPC::ppcState.cr_val[i]));
// SO: Bit 61 set.
MOV(64, R(tmp), R(cr_val));
SHR(64, R(tmp), Imm8(61));
AND(32, R(tmp), Imm8(1));
OR(32, R(EAX), R(tmp));
// EQ: Bits 31-0 == 0.
XOR(32, R(tmp), R(tmp));
TEST(32, R(cr_val), R(cr_val));
SETcc(CC_Z, R(tmp));
SHL(32, R(tmp), Imm8(1));
OR(32, R(EAX), R(tmp));
// GT: Value > 0.
TEST(64, R(cr_val), R(cr_val));
SETcc(CC_G, R(tmp));
SHL(32, R(tmp), Imm8(2));
OR(32, R(EAX), R(tmp));
// LT: Bit 62 set.
MOV(64, R(tmp), R(cr_val));
SHR(64, R(tmp), Imm8(62 - 3));
AND(32, R(tmp), Imm8(0x8));
OR(32, R(EAX), R(tmp));
}
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
gpr.UnlockAllX();
}
void Jit64::mtcrf(UGeckoInstruction inst)
@ -182,7 +337,8 @@ void Jit64::mtcrf(UGeckoInstruction inst)
if ((crm & (0x80 >> i)) != 0)
{
u8 newcr = (gpr.R(inst.RS).offset >> (28 - (i * 4))) & 0xF;
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), Imm8(newcr));
MOV(64, R(RAX), Imm64(PPCCRToInternal(newcr)));
MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(RAX));
}
}
}
@ -190,17 +346,50 @@ void Jit64::mtcrf(UGeckoInstruction inst)
{
gpr.Lock(inst.RS);
gpr.BindToRegister(inst.RS, true, false);
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
for (int i = 0; i < 8; i++)
{
if ((crm & (0x80 >> i)) != 0)
{
MOV(32, R(EAX), gpr.R(inst.RS));
SHR(32, R(EAX), Imm8(28 - (i * 4)));
AND(32, R(EAX), Imm32(0xF));
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(EAX));
MOVZX(64, 32, EAX, gpr.R(inst.RS));
SHR(64, R(EAX), Imm8(28 - (i * 4)));
AND(64, R(EAX), Imm32(0xF));
X64Reg cr_val = ABI_PARAM1;
X64Reg tmp = ABI_PARAM2;
MOV(64, R(cr_val), Imm64(1ull << 32));
// SO
MOV(64, R(tmp), R(EAX));
SHL(64, R(tmp), Imm8(63));
SHR(64, R(tmp), Imm8(63 - 61));
OR(64, R(cr_val), R(tmp));
// EQ
MOV(64, R(tmp), R(EAX));
NOT(64, R(tmp));
AND(64, R(tmp), Imm8(CR_EQ));
OR(64, R(cr_val), R(tmp));
// GT
MOV(64, R(tmp), R(EAX));
NOT(64, R(tmp));
AND(64, R(tmp), Imm8(CR_GT));
SHL(64, R(tmp), Imm8(63 - 2));
OR(64, R(cr_val), R(tmp));
// LT
MOV(64, R(tmp), R(EAX));
AND(64, R(tmp), Imm8(CR_LT));
SHL(64, R(tmp), Imm8(62 - 3));
OR(64, R(cr_val), R(tmp));
MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(cr_val));
}
}
gpr.UnlockAll();
gpr.UnlockAllX();
}
}
}
@ -213,8 +402,8 @@ void Jit64::mcrf(UGeckoInstruction inst)
// USES_CR
if (inst.CRFS != inst.CRFD)
{
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRFS]));
MOV(8, M(&PowerPC::ppcState.cr_fast[inst.CRFD]), R(EAX));
MOV(64, R(EAX), M(&PowerPC::ppcState.cr_val[inst.CRFS]));
MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX));
}
}
@ -226,9 +415,41 @@ void Jit64::mcrxr(UGeckoInstruction inst)
// USES_CR
// Copy XER[0-3] into CR[inst.CRFD]
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(28));
MOV(8, M(&PowerPC::ppcState.cr_fast[inst.CRFD]), R(EAX));
MOVZX(64, 32, EAX, M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(64, R(EAX), Imm8(28));
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
X64Reg cr_val = ABI_PARAM1;
X64Reg tmp = ABI_PARAM2;
MOV(64, R(cr_val), Imm64(1ull << 32));
// SO
MOV(64, R(tmp), R(EAX));
SHL(64, R(tmp), Imm8(63));
SHR(64, R(tmp), Imm8(63 - 61));
OR(64, R(cr_val), R(tmp));
// EQ
MOV(64, R(tmp), R(EAX));
AND(64, R(tmp), Imm8(0x2));
OR(64, R(cr_val), R(tmp));
// GT
MOV(64, R(tmp), R(EAX));
NOT(64, R(tmp));
AND(64, R(tmp), Imm8(0x4));
SHL(64, R(tmp), Imm8(63 - 2));
OR(64, R(cr_val), R(tmp));
// LT
MOV(64, R(tmp), R(EAX));
AND(64, R(tmp), Imm8(0x8));
SHL(64, R(tmp), Imm8(62 - 3));
OR(64, R(cr_val), R(tmp));
MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(cr_val));
gpr.UnlockAllX();
// Clear XER[0-3]
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(0x0FFFFFFF));
@ -240,70 +461,59 @@ void Jit64::crXXX(UGeckoInstruction inst)
JITDISABLE(bJITSystemRegistersOff);
_dbg_assert_msg_(DYNA_REC, inst.OPCD == 19, "Invalid crXXX");
// TODO(delroth): Potential optimizations could be applied here. For
// instance, if the two CR bits being loaded are the same, two loads are
// not required.
// USES_CR
// Get bit CRBA in EAX aligned with bit CRBD
int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3);
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2]));
if (shiftA < 0)
SHL(8, R(EAX), Imm8(-shiftA));
else if (shiftA > 0)
SHR(8, R(EAX), Imm8(shiftA));
// Get bit CRBB in ECX aligned with bit CRBD
gpr.FlushLockX(ECX);
int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3);
MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2]));
if (shiftB < 0)
SHL(8, R(ECX), Imm8(-shiftB));
else if (shiftB > 0)
SHR(8, R(ECX), Imm8(shiftB));
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), ABI_PARAM2);
GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), EAX);
// Compute combined bit
switch (inst.SUBOP10)
{
case 33: // crnor
OR(8, R(EAX), R(ECX));
OR(8, R(EAX), R(ABI_PARAM2));
NOT(8, R(EAX));
break;
case 129: // crandc
NOT(8, R(ECX));
AND(8, R(EAX), R(ECX));
NOT(8, R(ABI_PARAM2));
AND(8, R(EAX), R(ABI_PARAM2));
break;
case 193: // crxor
XOR(8, R(EAX), R(ECX));
XOR(8, R(EAX), R(ABI_PARAM2));
break;
case 225: // crnand
AND(8, R(EAX), R(ECX));
AND(8, R(EAX), R(ABI_PARAM2));
NOT(8, R(EAX));
break;
case 257: // crand
AND(8, R(EAX), R(ECX));
AND(8, R(EAX), R(ABI_PARAM2));
break;
case 289: // creqv
XOR(8, R(EAX), R(ECX));
XOR(8, R(EAX), R(ABI_PARAM2));
NOT(8, R(EAX));
break;
case 417: // crorc
NOT(8, R(ECX));
OR(8, R(EAX), R(ECX));
NOT(8, R(ABI_PARAM2));
OR(8, R(EAX), R(ABI_PARAM2));
break;
case 449: // cror
OR(8, R(EAX), R(ECX));
OR(8, R(EAX), R(ABI_PARAM2));
break;
}
// Store result bit in CRBD
AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3)));
AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3))));
OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX));
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), EAX);
gpr.UnlockAllX();
}

View file

@ -85,7 +85,7 @@ static unsigned regReadUse(RegInfo& R, InstLoc I) {
return R.IInfo[I - R.FirstI] & 3;
}
static unsigned SlotSet[1000];
static u64 SlotSet[1000];
static u8 GC_ALIGNED16(FSlotSet[16*1000]);
static OpArg regLocForSlot(RegInfo& RI, unsigned slot) {
@ -107,7 +107,7 @@ static void regSpill(RegInfo& RI, X64Reg reg) {
unsigned slot = regGetSpill(RI, RI.regs[reg]);
if (!slot) {
slot = regCreateSpill(RI, RI.regs[reg]);
RI.Jit->MOV(32, regLocForSlot(RI, slot), R(reg));
RI.Jit->MOV(64, regLocForSlot(RI, slot), R(reg));
}
RI.regs[reg] = nullptr;
}
@ -550,6 +550,48 @@ static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) {
regNormalRegClear(RI, I);
}
static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) {
bool signed_compare = getOpcode(*I) == ICmpCRSigned;
X64Reg reg;
if (RI.IInfo[I - RI.FirstI] & 4)
{
reg = regEnsureInReg(RI, getOp1(I));
if (signed_compare)
RI.Jit->MOVSX(64, 32, reg, R(reg));
}
else
{
reg = regFindFreeReg(RI);
if (signed_compare)
RI.Jit->MOVSX(64, 32, reg, regLocForInst(RI, getOp1(I)));
else
RI.Jit->MOV(32, R(reg), regLocForInst(RI, getOp1(I)));
}
if (isImm(*getOp2(I)))
{
unsigned RHS = RI.Build->GetImmValue(getOp2(I));
if (!signed_compare && (RHS & 0x80000000U))
{
RI.Jit->MOV(32, R(EAX), Imm32(RHS));
RI.Jit->SUB(64, R(reg), R(RAX));
}
else if (RHS)
{
RI.Jit->SUB(64, R(reg), Imm32(RHS));
}
}
else
{
if (signed_compare)
RI.Jit->MOVSX(64, 32, RAX, regLocForInst(RI, getOp2(I)));
else
RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp2(I)));
RI.Jit->SUB(64, R(reg), R(RAX));
}
RI.regs[reg] = I;
regNormalRegClear(RI, I);
}
static void regWriteExit(RegInfo& RI, InstLoc dest) {
if (isImm(*dest)) {
RI.exitNumber++;
@ -621,6 +663,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
case FPDup1:
case FSNeg:
case FDNeg:
case ConvertFromFastCR:
case ConvertToFastCR:
case FastCRSOSet:
case FastCREQSet:
case FastCRGTSet:
case FastCRLTSet:
if (thisUsed)
regMarkUse(RI, I, getOp1(I), 1);
break;
@ -715,9 +763,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
regMarkUse(RI, I, getOp1(getOp1(I)), 1);
break;
case BranchCond: {
if (isICmp(*getOp1(I)) &&
isImm(*getOp2(getOp1(I)))) {
if (isICmp(*getOp1(I))) {
regMarkUse(RI, I, getOp1(getOp1(I)), 1);
if (!isImm(*getOp2(getOp1(I))))
regMarkUse(RI, I, getOp2(getOp1(I)), 2);
} else {
regMarkUse(RI, I, getOp1(I), 1);
}
@ -763,7 +812,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
if (!thisUsed) break;
X64Reg reg = regFindFreeReg(RI);
unsigned ppcreg = *I >> 8;
Jit->MOVZX(32, 8, reg, M(&PowerPC::ppcState.cr_fast[ppcreg]));
Jit->MOV(64, R(reg), M(&PowerPC::ppcState.cr_val[ppcreg]));
RI.regs[reg] = I;
break;
}
@ -813,10 +862,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
break;
}
case StoreCR: {
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
X64Reg reg = regEnsureInReg(RI, getOp1(I));
unsigned ppcreg = *I >> 16;
// CAUTION: uses 8-bit reg!
Jit->MOV(8, M(&PowerPC::ppcState.cr_fast[ppcreg]), R(ECX));
Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(reg));
regNormalRegClear(RI, I);
break;
}
@ -1076,40 +1124,138 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
regEmitICmpInst(RI, I, CC_LE);
break;
}
case ICmpCRUnsigned: {
case ICmpCRUnsigned:
{
if (!thisUsed) break;
regEmitCmp(RI, I);
X64Reg reg = regBinReg(RI, I);
FixupBranch pLesser = Jit->J_CC(CC_B);
FixupBranch pGreater = Jit->J_CC(CC_A);
Jit->MOV(32, R(reg), Imm32(0x2)); // _x86Reg == 0
FixupBranch continue1 = Jit->J();
Jit->SetJumpTarget(pGreater);
Jit->MOV(32, R(reg), Imm32(0x4)); // _x86Reg > 0
FixupBranch continue2 = Jit->J();
Jit->SetJumpTarget(pLesser);
Jit->MOV(32, R(reg), Imm32(0x8)); // _x86Reg < 0
Jit->SetJumpTarget(continue1);
Jit->SetJumpTarget(continue2);
regEmitICmpCRInst(RI, I);
break;
}
case ICmpCRSigned:
{
if (!thisUsed) break;
regEmitICmpCRInst(RI, I);
break;
}
case ConvertFromFastCR:
{
if (!thisUsed) break;
X64Reg cr_val = regUReg(RI, I);
Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I)));
Jit->XOR(32, R(EAX), R(EAX));
// SO: Bit 61 set.
Jit->MOV(64, R(RCX), R(cr_val));
Jit->SHR(64, R(RCX), Imm8(61));
Jit->AND(32, R(ECX), Imm8(1));
Jit->OR(32, R(EAX), R(ECX));
// EQ: Bits 31-0 == 0.
Jit->XOR(32, R(ECX), R(ECX));
Jit->TEST(32, R(cr_val), R(cr_val));
Jit->SETcc(CC_Z, R(ECX));
Jit->SHL(32, R(ECX), Imm8(1));
Jit->OR(32, R(EAX), R(ECX));
// GT: Value > 0.
Jit->XOR(32, R(ECX), R(ECX));
Jit->TEST(64, R(cr_val), R(cr_val));
Jit->SETcc(CC_G, R(ECX));
Jit->SHL(32, R(ECX), Imm8(2));
Jit->OR(32, R(EAX), R(ECX));
// LT: Bit 62 set.
Jit->MOV(64, R(ECX), R(cr_val));
Jit->SHR(64, R(ECX), Imm8(62 - 3));
Jit->AND(32, R(ECX), Imm8(0x8));
Jit->OR(32, R(EAX), R(ECX));
Jit->MOV(32, R(cr_val), R(EAX));
RI.regs[cr_val] = I;
regNormalRegClear(RI, I);
break;
}
case ConvertToFastCR:
{
if (!thisUsed) break;
X64Reg cr_val = regUReg(RI, I);
Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I)));
Jit->MOV(64, R(RCX), Imm64(1ull << 32));
// SO
Jit->MOV(64, R(RAX), R(cr_val));
Jit->SHL(64, R(RAX), Imm8(63));
Jit->SHR(64, R(RAX), Imm8(63 - 61));
Jit->OR(64, R(RCX), R(RAX));
// EQ
Jit->MOV(64, R(RAX), R(cr_val));
Jit->NOT(64, R(RAX));
Jit->AND(64, R(RAX), Imm8(CR_EQ));
Jit->OR(64, R(RCX), R(RAX));
// GT
Jit->MOV(64, R(RAX), R(cr_val));
Jit->NOT(64, R(RAX));
Jit->AND(64, R(RAX), Imm8(CR_GT));
Jit->SHL(64, R(RAX), Imm8(63 - 2));
Jit->OR(64, R(RCX), R(RAX));
// LT
Jit->MOV(64, R(RAX), R(cr_val));
Jit->AND(64, R(RAX), Imm8(CR_LT));
Jit->SHL(64, R(RAX), Imm8(62 - 3));
Jit->OR(64, R(RCX), R(RAX));
Jit->MOV(64, R(cr_val), R(RCX));
RI.regs[cr_val] = I;
regNormalRegClear(RI, I);
break;
}
case FastCRSOSet:
{
if (!thisUsed) break;
X64Reg reg = regUReg(RI, I);
Jit->MOV(64, R(RAX), Imm64(1ull << 61));
Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX));
Jit->SETcc(CC_NZ, R(AL));
Jit->MOVZX(32, 8, reg, R(AL));
RI.regs[reg] = I;
regNormalRegClear(RI, I);
break;
}
case ICmpCRSigned: {
case FastCREQSet:
{
if (!thisUsed) break;
regEmitCmp(RI, I);
X64Reg reg = regBinReg(RI, I);
FixupBranch pLesser = Jit->J_CC(CC_L);
FixupBranch pGreater = Jit->J_CC(CC_G);
Jit->MOV(32, R(reg), Imm32(0x2)); // _x86Reg == 0
FixupBranch continue1 = Jit->J();
Jit->SetJumpTarget(pGreater);
Jit->MOV(32, R(reg), Imm32(0x4)); // _x86Reg > 0
FixupBranch continue2 = Jit->J();
Jit->SetJumpTarget(pLesser);
Jit->MOV(32, R(reg), Imm32(0x8)); // _x86Reg < 0
Jit->SetJumpTarget(continue1);
Jit->SetJumpTarget(continue2);
X64Reg reg = regUReg(RI, I);
Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0));
Jit->SETcc(CC_Z, R(AL));
Jit->MOVZX(32, 8, reg, R(AL));
RI.regs[reg] = I;
regNormalRegClear(RI, I);
break;
}
case FastCRGTSet:
{
if (!thisUsed) break;
X64Reg reg = regUReg(RI, I);
Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0));
Jit->SETcc(CC_G, R(AL));
Jit->MOVZX(32, 8, reg, R(AL));
RI.regs[reg] = I;
regNormalRegClear(RI, I);
break;
}
case FastCRLTSet:
{
if (!thisUsed) break;
X64Reg reg = regUReg(RI, I);
Jit->MOV(64, R(RAX), Imm64(1ull << 62));
Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX));
Jit->SETcc(CC_NZ, R(AL));
Jit->MOVZX(32, 8, reg, R(AL));
RI.regs[reg] = I;
regNormalRegClear(RI, I);
break;
@ -1538,7 +1684,13 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
case CInt16: {
if (!thisUsed) break;
X64Reg reg = regFindFreeReg(RI);
Jit->MOV(32, R(reg), Imm32(ibuild->GetImmValue(I)));
u64 val = ibuild->GetImmValue64(I);
if ((u32)val == val)
Jit->MOV(32, R(reg), Imm32(val));
else if ((s32)val == val)
Jit->MOV(64, R(reg), Imm32(val));
else
Jit->MOV(64, R(reg), Imm64(val));
RI.regs[reg] = I;
break;
}
@ -1566,17 +1718,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
}
case BranchCond: {
if (isICmp(*getOp1(I)) &&
isImm(*getOp2(getOp1(I)))) {
Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))),
Imm32(RI.Build->GetImmValue(getOp2(getOp1(I)))));
if (isICmp(*getOp1(I))) {
regEmitCmp(RI, getOp1(I));
CCFlags flag;
switch (getOpcode(*getOp1(I))) {
case ICmpEq: flag = CC_NE; break;
case ICmpNe: flag = CC_E; break;
case ICmpUgt: flag = CC_BE; break;
case ICmpUlt: flag = CC_AE; break;
case ICmpUge: flag = CC_L; break;
case ICmpUge: flag = CC_B; break;
case ICmpUle: flag = CC_A; break;
case ICmpSgt: flag = CC_LE; break;
case ICmpSlt: flag = CC_GE; break;
@ -1589,7 +1739,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
Jit->SetJumpTarget(cont);
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(getOp1(I)));
} else {
if (RI.IInfo[I - RI.FirstI] & 8)
regClearInst(RI, getOp2(getOp1(I)));
}
else {
Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0));
FixupBranch cont = Jit->J_CC(CC_Z);
regWriteExit(RI, getOp2(I));

View file

@ -477,9 +477,9 @@ void JitIL::Trace()
}
#endif
DEBUG_LOG(DYNA_REC, "JITIL PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s",
PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3],
PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr,
DEBUG_LOG(DYNA_REC, "JITIL PC: %08x SRR0: %08x SRR1: %08x CRval: %016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx FPSCR: %08x MSR: %08x LR: %08x %s %s",
PC, SRR0, SRR1, PowerPC::ppcState.cr_val[0], PowerPC::ppcState.cr_val[1], PowerPC::ppcState.cr_val[2], PowerPC::ppcState.cr_val[3],
PowerPC::ppcState.cr_val[4], PowerPC::ppcState.cr_val[5], PowerPC::ppcState.cr_val[6], PowerPC::ppcState.cr_val[7], PowerPC::ppcState.fpscr,
PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str());
}

View file

@ -233,31 +233,25 @@ void JitArm::SingleStep()
void JitArm::Trace()
{
char regs[500] = "";
char fregs[750] = "";
std::string regs;
std::string fregs;
#ifdef JIT_LOG_GPR
for (int i = 0; i < 32; i++)
{
char reg[50];
sprintf(reg, "r%02d: %08x ", i, PowerPC::ppcState.gpr[i]);
strncat(regs, reg, sizeof(regs) - 1);
regs += StringFromFormat("r%02d: %08x ", i, PowerPC::ppcState.gpr[i]);
}
#endif
#ifdef JIT_LOG_FPR
for (int i = 0; i < 32; i++)
{
char reg[50];
sprintf(reg, "f%02d: %016x ", i, riPS0(i));
strncat(fregs, reg, sizeof(fregs) - 1);
fregs += StringFromFormat("f%02d: %016x ", i, riPS0(i));
}
#endif
DEBUG_LOG(DYNA_REC, "JITARM PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s",
PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3],
PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr,
PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs, fregs);
DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x FPSCR: %08x MSR: %08x LR: %08x %s %s",
PC, SRR0, SRR1, PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str());
}
void JitArm::PrintDebug(UGeckoInstruction inst, u32 level)

View file

@ -50,6 +50,8 @@ private:
void Helper_UpdateCR1(ARMReg fpscr, ARMReg temp);
void SetFPException(ARMReg Reg, u32 Exception);
FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
public:
JitArm() : code_buffer(32000) {}
~JitArm() {}
@ -96,8 +98,7 @@ public:
void WriteCallInterpreter(UGeckoInstruction _inst);
void Cleanup();
void GenerateRC(int cr = 0);
void ComputeRC(int cr = 0);
void ComputeRC(ARMReg value, int cr = 0);
void ComputeRC(s32 value, int cr);
void ComputeCarry();
@ -143,8 +144,6 @@ public:
void cntlzwx(UGeckoInstruction _inst);
void cmp (UGeckoInstruction _inst);
void cmpi(UGeckoInstruction _inst);
void cmpl(UGeckoInstruction _inst);
void cmpli(UGeckoInstruction _inst);
void negx(UGeckoInstruction _inst);
void mulhwux(UGeckoInstruction _inst);
void rlwimix(UGeckoInstruction _inst);
@ -160,13 +159,9 @@ public:
void mtspr(UGeckoInstruction _inst);
void mfspr(UGeckoInstruction _inst);
void mftb(UGeckoInstruction _inst);
void crXXX(UGeckoInstruction _inst);
void mcrf(UGeckoInstruction _inst);
void mfcr(UGeckoInstruction _inst);
void mtcrf(UGeckoInstruction _inst);
void mtsr(UGeckoInstruction _inst);
void mfsr(UGeckoInstruction _inst);
void mcrxr(UGeckoInstruction _inst);
void twx(UGeckoInstruction _inst);
// LoadStore
@ -193,8 +188,6 @@ public:
void fmaddx(UGeckoInstruction _inst);
void fctiwx(UGeckoInstruction _inst);
void fctiwzx(UGeckoInstruction _inst);
void fcmpo(UGeckoInstruction _inst);
void fcmpu(UGeckoInstruction _inst);
void fnmaddx(UGeckoInstruction _inst);
void fnmaddsx(UGeckoInstruction _inst);
void fresx(UGeckoInstruction _inst);
@ -232,10 +225,6 @@ public:
void ps_nabs(UGeckoInstruction _inst);
void ps_rsqrte(UGeckoInstruction _inst);
void ps_sel(UGeckoInstruction _inst);
void ps_cmpu0(UGeckoInstruction _inst);
void ps_cmpu1(UGeckoInstruction _inst);
void ps_cmpo0(UGeckoInstruction _inst);
void ps_cmpo1(UGeckoInstruction _inst);
// LoadStore paired
void psq_l(UGeckoInstruction _inst);

View file

@ -16,15 +16,6 @@
// The branches are known good, or at least reasonably good.
// No need for a disable-mechanism.
// If defined, clears CR0 at blr and bl-s. If the assumption that
// flags never carry over between functions holds, then the task for
// an optimizer becomes much easier.
// #define ACID_TEST
// Zelda and many more games seem to pass the Acid Test.
using namespace ArmGen;
void JitArm::sc(UGeckoInstruction inst)
{
@ -121,13 +112,7 @@ void JitArm::bx(UGeckoInstruction inst)
destination = SignExt26(inst.LI << 2);
else
destination = js.compilerPC + SignExt26(inst.LI << 2);
#ifdef ACID_TEST
if (inst.LK)
{
MOV(R14, 0);
STRB(R14, R9, PPCSTATE_OFF(cr_fast[0]));
}
#endif
if (destination == js.compilerPC)
{
//PanicAlert("Idle loop detected at %08x", destination);
@ -168,15 +153,10 @@ void JitArm::bcx(UGeckoInstruction inst)
FixupBranch pConditionDontBranch;
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit
{
LDRB(rA, R9, PPCSTATE_OFF(cr_fast) + (inst.BI >> 2));
TST(rA, 8 >> (inst.BI & 3));
//TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3)));
if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch
pConditionDontBranch = B_CC(CC_EQ); // Zero
else
pConditionDontBranch = B_CC(CC_NEQ); // Not Zero
pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
}
if (inst.LK)
{
u32 Jumpto = js.compilerPC + 4;
@ -240,20 +220,13 @@ void JitArm::bcctrx(UGeckoInstruction inst)
else
{
// Rare condition seen in (just some versions of?) Nintendo's NES Emulator
// BO_2 == 001zy -> b if false
// BO_2 == 011zy -> b if true
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
LDRB(rA, R9, PPCSTATE_OFF(cr_fast) + (inst.BI >> 2));
TST(rA, 8 >> (inst.BI & 3));
CCFlags branch;
if (inst.BO_2 & BO_BRANCH_IF_TRUE)
branch = CC_EQ;
else
branch = CC_NEQ;
FixupBranch b = B_CC(branch);
FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
LDR(rA, R9, PPCSTATE_OFF(spr[SPR_CTR]));
BIC(rA, rA, 0x3);
@ -304,25 +277,10 @@ void JitArm::bclrx(UGeckoInstruction inst)
FixupBranch pConditionDontBranch;
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit
{
LDRB(rA, R9, PPCSTATE_OFF(cr_fast) + (inst.BI >> 2));
TST(rA, 8 >> (inst.BI & 3));
//TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3)));
if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch
pConditionDontBranch = B_CC(CC_EQ); // Zero
else
pConditionDontBranch = B_CC(CC_NEQ); // Not Zero
pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
}
// This below line can be used to prove that blr "eats flags" in practice.
// This observation will let us do a lot of fun observations.
#ifdef ACID_TEST
if (inst.LK)
{
MOV(R14, 0);
STRB(R14, R9, PPCSTATE_OFF(cr_fast[0]));
}
#endif
//MOV(32, R(EAX), M(&LR));
//AND(32, R(EAX), Imm32(0xFFFFFFFC));
LDR(rA, R9, PPCSTATE_OFF(spr[SPR_LR]));

View file

@ -19,8 +19,6 @@
void JitArm::Helper_UpdateCR1(ARMReg fpscr, ARMReg temp)
{
UBFX(temp, fpscr, 28, 4);
STRB(temp, R9, PPCSTATE_OFF(cr_fast[1]));
}
void JitArm::fctiwx(UGeckoInstruction inst)
@ -129,7 +127,6 @@ void JitArm::fctiwx(UGeckoInstruction inst)
fpr.Unlock(V2);
}
void JitArm::fctiwzx(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -210,136 +207,6 @@ void JitArm::fctiwzx(UGeckoInstruction inst)
fpr.Unlock(V2);
}
void JitArm::fcmpo(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
u32 a = inst.FA, b = inst.FB;
int cr = inst.CRFD;
ARMReg vA = fpr.R0(a);
ARMReg vB = fpr.R0(b);
ARMReg fpscrReg = gpr.GetReg();
ARMReg crReg = gpr.GetReg();
Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
Operand2 LessThan(0x8, 0xA); // 0x8000
Operand2 GreaterThan(0x4, 0xA); // 0x4000
Operand2 EqualTo(0x2, 0xA); // 0x2000
Operand2 NANRes(0x1, 0xA); // 0x1000
FixupBranch Done1, Done2, Done3;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
BIC(fpscrReg, fpscrReg, FPRFMask);
VCMPE(vA, vB);
VMRS(_PC);
SetCC(CC_LT);
ORR(fpscrReg, fpscrReg, LessThan);
MOV(crReg, 8);
Done1 = B();
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, GreaterThan);
MOV(crReg, 4);
Done2 = B();
SetCC(CC_EQ);
ORR(fpscrReg, fpscrReg, EqualTo);
MOV(crReg, 2);
Done3 = B();
SetCC();
ORR(fpscrReg, fpscrReg, NANRes);
MOV(crReg, 1);
VCMPE(vA, vA);
VMRS(_PC);
FixupBranch NanA = B_CC(CC_NEQ);
VCMPE(vB, vB);
VMRS(_PC);
FixupBranch NanB = B_CC(CC_NEQ);
SetFPException(fpscrReg, FPSCR_VXVC);
FixupBranch Done4 = B();
SetJumpTarget(NanA);
SetJumpTarget(NanB);
SetFPException(fpscrReg, FPSCR_VXSNAN);
TST(fpscrReg, VEMask);
FixupBranch noVXVC = B_CC(CC_NEQ);
SetFPException(fpscrReg, FPSCR_VXVC);
SetJumpTarget(noVXVC);
SetJumpTarget(Done1);
SetJumpTarget(Done2);
SetJumpTarget(Done3);
SetJumpTarget(Done4);
STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, crReg);
}
void JitArm::fcmpu(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
u32 a = inst.FA, b = inst.FB;
int cr = inst.CRFD;
ARMReg vA = fpr.R0(a);
ARMReg vB = fpr.R0(b);
ARMReg fpscrReg = gpr.GetReg();
ARMReg crReg = gpr.GetReg();
Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
Operand2 LessThan(0x8, 0xA); // 0x8000
Operand2 GreaterThan(0x4, 0xA); // 0x4000
Operand2 EqualTo(0x2, 0xA); // 0x2000
Operand2 NANRes(0x1, 0xA); // 0x1000
FixupBranch Done1, Done2, Done3;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
BIC(fpscrReg, fpscrReg, FPRFMask);
VCMPE(vA, vB);
VMRS(_PC);
SetCC(CC_LT);
ORR(fpscrReg, fpscrReg, LessThan);
MOV(crReg, 8);
Done1 = B();
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, GreaterThan);
MOV(crReg, 4);
Done2 = B();
SetCC(CC_EQ);
ORR(fpscrReg, fpscrReg, EqualTo);
MOV(crReg, 2);
Done3 = B();
SetCC();
ORR(fpscrReg, fpscrReg, NANRes);
MOV(crReg, 1);
VCMPE(vA, vA);
VMRS(_PC);
FixupBranch NanA = B_CC(CC_NEQ);
VCMPE(vB, vB);
VMRS(_PC);
FixupBranch NanB = B_CC(CC_NEQ);
FixupBranch Done4 = B();
SetJumpTarget(NanA);
SetJumpTarget(NanB);
SetFPException(fpscrReg, FPSCR_VXSNAN);
SetJumpTarget(Done1);
SetJumpTarget(Done2);
SetJumpTarget(Done3);
SetJumpTarget(Done4);
STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, crReg);
}
void JitArm::fabsx(UGeckoInstruction inst)
{
INSTRUCTION_START

View file

@ -14,42 +14,27 @@
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
// Assumes that Sign and Zero flags were set by the last operation. Preserves all flags and registers.
// Jit64 ComputerRC is signed
// JIT64 GenerateRC is unsigned
void JitArm::GenerateRC(int cr) {
void JitArm::ComputeRC(ARMReg value, int cr) {
ARMReg rB = gpr.GetReg();
MOV(rB, 0x4); // Result > 0
SetCC(CC_EQ); MOV(rB, 0x2); // Result == 0
SetCC(CC_MI); MOV(rB, 0x8); // Result < 0
SetCC();
Operand2 ASRReg(value, ST_ASR, 31);
STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr);
gpr.Unlock(rB);
}
void JitArm::ComputeRC(int cr) {
ARMReg rB = gpr.GetReg();
STR(value, R9, PPCSTATE_OFF(cr_val[cr]));
MOV(rB, ASRReg);
STR(rB, R9, PPCSTATE_OFF(cr_val[cr]) + sizeof(u32));
MOV(rB, 0x2); // Result == 0
SetCC(CC_LT); MOV(rB, 0x8); // Result < 0
SetCC(CC_GT); MOV(rB, 0x4); // Result > 0
SetCC();
STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr);
gpr.Unlock(rB);
}
void JitArm::ComputeRC(s32 value, int cr) {
ARMReg rB = gpr.GetReg();
if (value < 0)
MOV(rB, 0x8);
else if (value > 0)
MOV(rB, 0x4);
else
MOV(rB, 0x2);
Operand2 ASRReg(rB, ST_ASR, 31);
MOVI2R(rB, value);
STR(rB, R9, PPCSTATE_OFF(cr_val[cr]));
MOV(rB, ASRReg);
STR(rB, R9, PPCSTATE_OFF(cr_val[cr]) + sizeof(u32));
STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr);
gpr.Unlock(rB);
}
@ -195,7 +180,6 @@ void JitArm::arith(UGeckoInstruction inst)
u32 Imm[2] = {0, 0};
bool Rc = false;
bool carry = false;
bool isUnsigned = false;
bool shiftedImm = false;
switch (inst.OPCD)
@ -306,7 +290,6 @@ void JitArm::arith(UGeckoInstruction inst)
case 522: // addcox
carry = true;
case 40: // subfx
isUnsigned = true;
case 235: // mullwx
case 266:
case 747: // mullwox
@ -431,6 +414,8 @@ void JitArm::arith(UGeckoInstruction inst)
if (Rc) ComputeRC(gpr.GetImm(dest), 0);
return;
}
u32 dest = d;
// One or the other isn't a IMM
switch (inst.OPCD)
{
@ -472,6 +457,7 @@ void JitArm::arith(UGeckoInstruction inst)
case 24:
case 25:
{
dest = a;
ARMReg rA = gpr.GetReg();
RS = gpr.R(s);
RA = gpr.R(a);
@ -483,6 +469,7 @@ void JitArm::arith(UGeckoInstruction inst)
case 26:
case 27:
{
dest = a;
ARMReg rA = gpr.GetReg();
RS = gpr.R(s);
RA = gpr.R(a);
@ -495,6 +482,7 @@ void JitArm::arith(UGeckoInstruction inst)
case 28:
case 29:
{
dest = a;
ARMReg rA = gpr.GetReg();
RS = gpr.R(s);
RA = gpr.R(a);
@ -507,12 +495,14 @@ void JitArm::arith(UGeckoInstruction inst)
switch (inst.SUBOP10)
{
case 24:
dest = a;
RA = gpr.R(a);
RS = gpr.R(s);
RB = gpr.R(b);
LSLS(RA, RS, RB);
break;
case 28:
dest = a;
RA = gpr.R(a);
RS = gpr.R(s);
RB = gpr.R(b);
@ -525,12 +515,14 @@ void JitArm::arith(UGeckoInstruction inst)
SUBS(RD, RB, RA);
break;
case 60:
dest = a;
RA = gpr.R(a);
RS = gpr.R(s);
RB = gpr.R(b);
BICS(RA, RS, RB);
break;
case 124:
dest = a;
RA = gpr.R(a);
RS = gpr.R(s);
RB = gpr.R(b);
@ -545,6 +537,7 @@ void JitArm::arith(UGeckoInstruction inst)
MULS(RD, RA, RB);
break;
case 284:
dest = a;
RA = gpr.R(a);
RS = gpr.R(s);
RB = gpr.R(b);
@ -552,6 +545,7 @@ void JitArm::arith(UGeckoInstruction inst)
MVNS(RA, RA);
break;
case 316:
dest = a;
RA = gpr.R(a);
RS = gpr.R(s);
RB = gpr.R(b);
@ -559,6 +553,7 @@ void JitArm::arith(UGeckoInstruction inst)
break;
case 412:
{
dest = a;
ARMReg rA = gpr.GetReg();
RA = gpr.R(a);
RS = gpr.R(s);
@ -569,12 +564,14 @@ void JitArm::arith(UGeckoInstruction inst)
}
break;
case 444:
dest = a;
RA = gpr.R(a);
RS = gpr.R(s);
RB = gpr.R(b);
ORRS(RA, RS, RB);
break;
case 476:
dest = a;
RA = gpr.R(a);
RS = gpr.R(s);
RB = gpr.R(b);
@ -582,12 +579,14 @@ void JitArm::arith(UGeckoInstruction inst)
MVNS(RA, RA);
break;
case 536:
dest = a;
RA = gpr.R(a);
RS = gpr.R(s);
RB = gpr.R(b);
LSRS(RA, RS, RB);
break;
case 792:
dest = a;
RA = gpr.R(a);
RS = gpr.R(s);
RB = gpr.R(b);
@ -605,7 +604,7 @@ void JitArm::arith(UGeckoInstruction inst)
break;
}
if (carry) ComputeCarry();
if (Rc) isUnsigned ? GenerateRC() : ComputeRC();
if (Rc) ComputeRC(gpr.R(dest));
}
void JitArm::addex(UGeckoInstruction inst)
@ -624,7 +623,7 @@ void JitArm::addex(UGeckoInstruction inst)
GetCarryAndClear(rA);
ADDS(RD, RA, RB);
FinalizeCarry(rA);
if (inst.Rc) ComputeRC();
if (inst.Rc) ComputeRC(RD);
gpr.Unlock(rA);
}
@ -638,10 +637,7 @@ void JitArm::cntlzwx(UGeckoInstruction inst)
ARMReg RS = gpr.R(s);
CLZ(RA, RS);
if (inst.Rc)
{
CMP(RA, 0);
ComputeRC();
}
ComputeRC(RA);
}
void JitArm::mulhwux(UGeckoInstruction inst)
@ -655,8 +651,8 @@ void JitArm::mulhwux(UGeckoInstruction inst)
ARMReg RB = gpr.R(b);
ARMReg RD = gpr.R(d);
ARMReg rA = gpr.GetReg(false);
UMULLS(rA, RD, RA, RB);
if (inst.Rc) ComputeRC();
UMULL(rA, RD, RA, RB);
if (inst.Rc) ComputeRC(RD);
}
void JitArm::extshx(UGeckoInstruction inst)
@ -674,10 +670,8 @@ void JitArm::extshx(UGeckoInstruction inst)
ARMReg rA = gpr.R(a);
ARMReg rS = gpr.R(s);
SXTH(rA, rS);
if (inst.Rc){
CMP(rA, 0);
ComputeRC();
}
if (inst.Rc)
ComputeRC(rA);
}
void JitArm::extsbx(UGeckoInstruction inst)
{
@ -694,10 +688,8 @@ void JitArm::extsbx(UGeckoInstruction inst)
ARMReg rA = gpr.R(a);
ARMReg rS = gpr.R(s);
SXTB(rA, rS);
if (inst.Rc){
CMP(rA, 0);
ComputeRC();
}
if (inst.Rc)
ComputeRC(rA);
}
void JitArm::cmp (UGeckoInstruction inst)
{
@ -713,11 +705,7 @@ void JitArm::cmp (UGeckoInstruction inst)
return;
}
ARMReg RA = gpr.R(a);
ARMReg RB = gpr.R(b);
CMP(RA, RB);
ComputeRC(crf);
FALLBACK_IF(true);
}
void JitArm::cmpi(UGeckoInstruction inst)
{
@ -726,71 +714,12 @@ void JitArm::cmpi(UGeckoInstruction inst)
u32 a = inst.RA;
int crf = inst.CRFD;
if (gpr.IsImm(a))
{
ComputeRC((s32)gpr.GetImm(a) - inst.SIMM_16, crf);
else
{
ARMReg RA = gpr.R(a);
if (inst.SIMM_16 >= 0 && inst.SIMM_16 < 256)
CMP(RA, inst.SIMM_16);
else
{
ARMReg rA = gpr.GetReg();
MOVI2R(rA, inst.SIMM_16);
CMP(RA, rA);
gpr.Unlock(rA);
}
ComputeRC(crf);
return;
}
}
void JitArm::cmpl(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
ARMReg RA = gpr.R(inst.RA);
ARMReg RB = gpr.R(inst.RB);
ARMReg rA = gpr.GetReg();
int crf = inst.CRFD;
CMP(RA, RB);
// Unsigned GenerateRC()
MOV(rA, 0x2); // Result == 0
SetCC(CC_LO); MOV(rA, 0x8); // Result < 0
SetCC(CC_HI); MOV(rA, 0x4); // Result > 0
SetCC();
STRB(rA, R9, PPCSTATE_OFF(cr_fast) + crf);
gpr.Unlock(rA);
}
void JitArm::cmpli(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
ARMReg RA = gpr.R(inst.RA);
ARMReg rA = gpr.GetReg();
int crf = inst.CRFD;
u32 uimm = (u32)inst.UIMM;
if (uimm < 256)
{
CMP(RA, uimm);
}
else
{
MOVI2R(rA, (u32)inst.UIMM);
CMP(RA, rA);
}
// Unsigned GenerateRC()
MOV(rA, 0x2); // Result == 0
SetCC(CC_LO); MOV(rA, 0x8); // Result < 0
SetCC(CC_HI); MOV(rA, 0x4); // Result > 0
SetCC();
STRB(rA, R9, PPCSTATE_OFF(cr_fast) + crf);
gpr.Unlock(rA);
FALLBACK_IF(true);
}
void JitArm::negx(UGeckoInstruction inst)
@ -801,11 +730,10 @@ void JitArm::negx(UGeckoInstruction inst)
ARMReg RA = gpr.R(inst.RA);
ARMReg RD = gpr.R(inst.RD);
RSBS(RD, RA, 0);
RSB(RD, RA, 0);
if (inst.Rc)
{
GenerateRC();
}
ComputeRC(RD);
if (inst.OE)
{
BKPT(0x333);
@ -825,19 +753,12 @@ void JitArm::rlwimix(UGeckoInstruction inst)
MOVI2R(rA, mask);
Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it.
BIC (rB, RA, rA); // RA & ~mask
AND (rA, rA, Shift);
ORR(RA, rB, rA);
if (inst.Rc)
{
BIC (rB, RA, rA); // RA & ~mask
AND (rA, rA, Shift);
ORRS(RA, rB, rA);
GenerateRC();
}
else
{
BIC (rB, RA, rA); // RA & ~mask
AND (rA, rA, Shift);
ORR(RA, rB, rA);
}
ComputeRC(RA);
gpr.Unlock(rA, rB);
}
@ -853,13 +774,10 @@ void JitArm::rlwinmx(UGeckoInstruction inst)
MOVI2R(rA, mask);
Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it.
AND(RA, rA, Shift);
if (inst.Rc)
{
ANDS(RA, rA, Shift);
GenerateRC();
}
else
AND (RA, rA, Shift);
ComputeRC(RA);
gpr.Unlock(rA);
//m_GPR[inst.RA] = _rotl(m_GPR[inst.RS],inst.SH) & mask;
@ -882,13 +800,10 @@ void JitArm::rlwnmx(UGeckoInstruction inst)
SUB(rB, rB, RB);
Operand2 Shift(RS, ST_ROR, rB); // Register shifted register
AND(RA, rA, Shift);
if (inst.Rc)
{
ANDS(RA, rA, Shift);
GenerateRC();
}
else
AND (RA, rA, Shift);
ComputeRC(RA);
gpr.Unlock(rA, rB);
}
@ -908,9 +823,9 @@ void JitArm::srawix(UGeckoInstruction inst)
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
MOV(tmp, RS);
ASRS(RA, RS, amount);
ASR(RA, RS, amount);
if (inst.Rc)
GenerateRC();
ComputeRC(RA);
LSL(tmp, tmp, 32 - amount);
TST(tmp, RA);

View file

@ -611,263 +611,4 @@ void JitArm::ps_nabs(UGeckoInstruction inst)
VABS(vD1, vB1);
VNEG(vD1, vD1);
}
void JitArm::ps_cmpu0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
u32 a = inst.FA, b = inst.FB;
int cr = inst.CRFD;
ARMReg vA = fpr.R0(a);
ARMReg vB = fpr.R0(b);
ARMReg fpscrReg = gpr.GetReg();
ARMReg crReg = gpr.GetReg();
Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
Operand2 LessThan(0x8, 0xA); // 0x8000
Operand2 GreaterThan(0x4, 0xA); // 0x4000
Operand2 EqualTo(0x2, 0xA); // 0x2000
Operand2 NANRes(0x1, 0xA); // 0x1000
FixupBranch Done1, Done2, Done3;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
BIC(fpscrReg, fpscrReg, FPRFMask);
VCMPE(vA, vB);
VMRS(_PC);
SetCC(CC_LT);
ORR(fpscrReg, fpscrReg, LessThan);
MOV(crReg, 8);
Done1 = B();
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, GreaterThan);
MOV(crReg, 4);
Done2 = B();
SetCC(CC_EQ);
ORR(fpscrReg, fpscrReg, EqualTo);
MOV(crReg, 2);
Done3 = B();
SetCC();
ORR(fpscrReg, fpscrReg, NANRes);
MOV(crReg, 1);
VCMPE(vA, vA);
VMRS(_PC);
FixupBranch NanA = B_CC(CC_NEQ);
VCMPE(vB, vB);
VMRS(_PC);
FixupBranch NanB = B_CC(CC_NEQ);
FixupBranch Done4 = B();
SetJumpTarget(NanA);
SetJumpTarget(NanB);
SetFPException(fpscrReg, FPSCR_VXSNAN);
SetJumpTarget(Done1);
SetJumpTarget(Done2);
SetJumpTarget(Done3);
SetJumpTarget(Done4);
STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, crReg);
}
void JitArm::ps_cmpu1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
u32 a = inst.FA, b = inst.FB;
int cr = inst.CRFD;
ARMReg vA = fpr.R1(a);
ARMReg vB = fpr.R1(b);
ARMReg fpscrReg = gpr.GetReg();
ARMReg crReg = gpr.GetReg();
Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
Operand2 LessThan(0x8, 0xA); // 0x8000
Operand2 GreaterThan(0x4, 0xA); // 0x4000
Operand2 EqualTo(0x2, 0xA); // 0x2000
Operand2 NANRes(0x1, 0xA); // 0x1000
FixupBranch Done1, Done2, Done3;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
BIC(fpscrReg, fpscrReg, FPRFMask);
VCMPE(vA, vB);
VMRS(_PC);
SetCC(CC_LT);
ORR(fpscrReg, fpscrReg, LessThan);
MOV(crReg, 8);
Done1 = B();
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, GreaterThan);
MOV(crReg, 4);
Done2 = B();
SetCC(CC_EQ);
ORR(fpscrReg, fpscrReg, EqualTo);
MOV(crReg, 2);
Done3 = B();
SetCC();
ORR(fpscrReg, fpscrReg, NANRes);
MOV(crReg, 1);
VCMPE(vA, vA);
VMRS(_PC);
FixupBranch NanA = B_CC(CC_NEQ);
VCMPE(vB, vB);
VMRS(_PC);
FixupBranch NanB = B_CC(CC_NEQ);
FixupBranch Done4 = B();
SetJumpTarget(NanA);
SetJumpTarget(NanB);
SetFPException(fpscrReg, FPSCR_VXSNAN);
SetJumpTarget(Done1);
SetJumpTarget(Done2);
SetJumpTarget(Done3);
SetJumpTarget(Done4);
STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, crReg);
}
void JitArm::ps_cmpo0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
u32 a = inst.FA, b = inst.FB;
int cr = inst.CRFD;
ARMReg vA = fpr.R0(a);
ARMReg vB = fpr.R0(b);
ARMReg fpscrReg = gpr.GetReg();
ARMReg crReg = gpr.GetReg();
Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
Operand2 LessThan(0x8, 0xA); // 0x8000
Operand2 GreaterThan(0x4, 0xA); // 0x4000
Operand2 EqualTo(0x2, 0xA); // 0x2000
Operand2 NANRes(0x1, 0xA); // 0x1000
FixupBranch Done1, Done2, Done3;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
BIC(fpscrReg, fpscrReg, FPRFMask);
VCMPE(vA, vB);
VMRS(_PC);
SetCC(CC_LT);
ORR(fpscrReg, fpscrReg, LessThan);
MOV(crReg, 8);
Done1 = B();
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, GreaterThan);
MOV(crReg, 4);
Done2 = B();
SetCC(CC_EQ);
ORR(fpscrReg, fpscrReg, EqualTo);
MOV(crReg, 2);
Done3 = B();
SetCC();
ORR(fpscrReg, fpscrReg, NANRes);
MOV(crReg, 1);
VCMPE(vA, vA);
VMRS(_PC);
FixupBranch NanA = B_CC(CC_NEQ);
VCMPE(vB, vB);
VMRS(_PC);
FixupBranch NanB = B_CC(CC_NEQ);
SetFPException(fpscrReg, FPSCR_VXVC);
FixupBranch Done4 = B();
SetJumpTarget(NanA);
SetJumpTarget(NanB);
SetFPException(fpscrReg, FPSCR_VXSNAN);
TST(fpscrReg, VEMask);
FixupBranch noVXVC = B_CC(CC_NEQ);
SetFPException(fpscrReg, FPSCR_VXVC);
SetJumpTarget(noVXVC);
SetJumpTarget(Done1);
SetJumpTarget(Done2);
SetJumpTarget(Done3);
SetJumpTarget(Done4);
STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, crReg);
}
void JitArm::ps_cmpo1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
u32 a = inst.FA, b = inst.FB;
int cr = inst.CRFD;
ARMReg vA = fpr.R1(a);
ARMReg vB = fpr.R1(b);
ARMReg fpscrReg = gpr.GetReg();
ARMReg crReg = gpr.GetReg();
Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
Operand2 LessThan(0x8, 0xA); // 0x8000
Operand2 GreaterThan(0x4, 0xA); // 0x4000
Operand2 EqualTo(0x2, 0xA); // 0x2000
Operand2 NANRes(0x1, 0xA); // 0x1000
FixupBranch Done1, Done2, Done3;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
BIC(fpscrReg, fpscrReg, FPRFMask);
VCMPE(vA, vB);
VMRS(_PC);
SetCC(CC_LT);
ORR(fpscrReg, fpscrReg, LessThan);
MOV(crReg, 8);
Done1 = B();
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, GreaterThan);
MOV(crReg, 4);
Done2 = B();
SetCC(CC_EQ);
ORR(fpscrReg, fpscrReg, EqualTo);
MOV(crReg, 2);
Done3 = B();
SetCC();
ORR(fpscrReg, fpscrReg, NANRes);
MOV(crReg, 1);
VCMPE(vA, vA);
VMRS(_PC);
FixupBranch NanA = B_CC(CC_NEQ);
VCMPE(vB, vB);
VMRS(_PC);
FixupBranch NanB = B_CC(CC_NEQ);
SetFPException(fpscrReg, FPSCR_VXVC);
FixupBranch Done4 = B();
SetJumpTarget(NanA);
SetJumpTarget(NanB);
SetFPException(fpscrReg, FPSCR_VXSNAN);
TST(fpscrReg, VEMask);
FixupBranch noVXVC = B_CC(CC_NEQ);
SetFPException(fpscrReg, FPSCR_VXVC);
SetJumpTarget(noVXVC);
SetJumpTarget(Done1);
SetJumpTarget(Done2);
SetJumpTarget(Done3);
SetJumpTarget(Done4);
STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, crReg);
}

View file

@ -14,6 +14,46 @@
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
FixupBranch JitArm::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
{
ARMReg RA = gpr.GetReg();
Operand2 SOBit(2, 2); // 0x10000000
Operand2 LTBit(1, 1); // 0x80000000
FixupBranch branch;
switch (bit)
{
case CR_SO_BIT: // check bit 61 set
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32));
TST(RA, SOBit);
branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ);
break;
case CR_EQ_BIT: // check bits 31-0 == 0
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]));
CMP(RA, 0);
branch = B_CC(jump_if_set ? CC_EQ : CC_NEQ);
break;
case CR_GT_BIT: // check val > 0
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]));
CMP(RA, 1);
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32));
SBCS(RA, RA, 0);
branch = B_CC(jump_if_set ? CC_GE : CC_LT);
break;
case CR_LT_BIT: // check bit 62 set
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32));
TST(RA, LTBit);
branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ);
break;
default:
_assert_msg_(DYNA_REC, false, "Invalid CR bit");
}
gpr.Unlock(RA);
return branch;
}
void JitArm::mtspr(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -84,67 +124,6 @@ void JitArm::mfspr(UGeckoInstruction inst)
}
}
void JitArm::mfcr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
// USES_CR
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
int d = inst.RD;
LDRB(rA, R9, PPCSTATE_OFF(cr_fast[0]));
for (int i = 1; i < 8; i++)
{
LDRB(rB, R9, PPCSTATE_OFF(cr_fast[i]));
LSL(rA, rA, 4);
ORR(rA, rA, rB);
}
MOV(gpr.R(d), rA);
gpr.Unlock(rA, rB);
}
void JitArm::mtcrf(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
ARMReg rA = gpr.GetReg();
// USES_CR
u32 crm = inst.CRM;
if (crm != 0)
{
if (gpr.IsImm(inst.RS))
{
for (int i = 0; i < 8; i++)
{
if ((crm & (0x80 >> i)) != 0)
{
u8 newcr = (gpr.GetImm(inst.RS) >> (28 - (i * 4))) & 0xF;
MOV(rA, newcr);
STRB(rA, R9, PPCSTATE_OFF(cr_fast[i]));
}
}
}
else
{
ARMReg rB = gpr.GetReg();
MOV(rA, gpr.R(inst.RS));
for (int i = 0; i < 8; i++)
{
if ((crm & (0x80 >> i)) != 0)
{
UBFX(rB, rA, 28 - (i * 4), 4);
STRB(rB, R9, PPCSTATE_OFF(cr_fast[i]));
}
}
gpr.Unlock(rB);
}
}
gpr.Unlock(rA);
}
void JitArm::mtsr(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -160,25 +139,6 @@ void JitArm::mfsr(UGeckoInstruction inst)
LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(sr[inst.SR]));
}
void JitArm::mcrxr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
// Copy XER[0-3] into CR[inst.CRFD]
LDR(rA, R9, PPCSTATE_OFF(spr[SPR_XER]));
MOV(rB, rA);
LSR(rA, rA, 28);
STRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRFD]));
// Clear XER[0-3]
Operand2 Top4(0xF, 2);
BIC(rB, rB, Top4);
STR(rB, R9, PPCSTATE_OFF(spr[SPR_XER]));
gpr.Unlock(rA, rB);
}
void JitArm::mtmsr(UGeckoInstruction inst)
{
@ -206,84 +166,16 @@ void JitArm::mcrf(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
ARMReg rA = gpr.GetReg();
if (inst.CRFS != inst.CRFD)
{
LDRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRFS]));
STRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRFD]));
LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS]));
STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD]));
LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS]) + sizeof(u32));
STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD]) + sizeof(u32));
}
gpr.Unlock(rA);
}
void JitArm::crXXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
// Get bit CRBA aligned with bit CRBD
LDRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRBA >> 2]));
int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3);
if (shiftA < 0)
LSL(rA, rA, -shiftA);
else if (shiftA > 0)
LSR(rA, rA, shiftA);
// Get bit CRBB aligned with bit CRBD
int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3);
LDRB(rB, R9, PPCSTATE_OFF(cr_fast[inst.CRBB >> 2]));
if (shiftB < 0)
LSL(rB, rB, -shiftB);
else if (shiftB > 0)
LSR(rB, rB, shiftB);
// Compute combined bit
switch (inst.SUBOP10)
{
case 33: // crnor
ORR(rA, rA, rB);
MVN(rA, rA);
break;
case 129: // crandc
MVN(rB, rB);
AND(rA, rA, rB);
break;
case 193: // crxor
EOR(rA, rA, rB);
break;
case 225: // crnand
AND(rA, rA, rB);
MVN(rA, rA);
break;
case 257: // crand
AND(rA, rA, rB);
break;
case 289: // creqv
EOR(rA, rA, rB);
MVN(rA, rA);
break;
case 417: // crorc
MVN(rA, rA);
ORR(rA, rA, rB);
break;
case 449: // cror
ORR(rA, rA, rB);
break;
}
// Store result bit in CRBD
AND(rA, rA, 0x8 >> (inst.CRBD & 3));
LDRB(rB, R9, PPCSTATE_OFF(cr_fast[inst.CRBD >> 2]));
BIC(rB, rB, 0x8 >> (inst.CRBD & 3));
ORR(rB, rB, rA);
STRB(rB, R9, PPCSTATE_OFF(cr_fast[inst.CRBD >> 2]));
gpr.Unlock(rA, rB);
}

View file

@ -47,7 +47,7 @@ static GekkoOPTemplate primarytable[] =
{7, &JitArm::arith}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}},
{8, &JitArm::subfic}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{10, &JitArm::cmpli}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{10, &JitArm::FallBackToInterpreter}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{11, &JitArm::cmpi}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{12, &JitArm::arith}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{13, &JitArm::arith}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
@ -112,14 +112,14 @@ static GekkoOPTemplate primarytable[] =
static GekkoOPTemplate table4[] =
{ //SUBOP10
{0, &JitArm::ps_cmpu0}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
{32, &JitArm::ps_cmpo0}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
{0, &JitArm::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
{32, &JitArm::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
{40, &JitArm::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitArm::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitArm::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{64, &JitArm::ps_cmpu1}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{64, &JitArm::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{72, &JitArm::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{96, &JitArm::ps_cmpo1}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{96, &JitArm::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{528, &JitArm::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitArm::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
{592, &JitArm::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
@ -162,14 +162,14 @@ static GekkoOPTemplate table19[] =
{
{528, &JitArm::bcctrx}, //"bcctrx", OPTYPE_BRANCH, FL_ENDBLOCK}},
{16, &JitArm::bclrx}, //"bclrx", OPTYPE_BRANCH, FL_ENDBLOCK}},
{257, &JitArm::crXXX}, //"crand", OPTYPE_CR, FL_EVIL}},
{129, &JitArm::crXXX}, //"crandc", OPTYPE_CR, FL_EVIL}},
{289, &JitArm::crXXX}, //"creqv", OPTYPE_CR, FL_EVIL}},
{225, &JitArm::crXXX}, //"crnand", OPTYPE_CR, FL_EVIL}},
{33, &JitArm::crXXX}, //"crnor", OPTYPE_CR, FL_EVIL}},
{449, &JitArm::crXXX}, //"cror", OPTYPE_CR, FL_EVIL}},
{417, &JitArm::crXXX}, //"crorc", OPTYPE_CR, FL_EVIL}},
{193, &JitArm::crXXX}, //"crxor", OPTYPE_CR, FL_EVIL}},
{257, &JitArm::FallBackToInterpreter}, //"crand", OPTYPE_CR, FL_EVIL}},
{129, &JitArm::FallBackToInterpreter}, //"crandc", OPTYPE_CR, FL_EVIL}},
{289, &JitArm::FallBackToInterpreter}, //"creqv", OPTYPE_CR, FL_EVIL}},
{225, &JitArm::FallBackToInterpreter}, //"crnand", OPTYPE_CR, FL_EVIL}},
{33, &JitArm::FallBackToInterpreter}, //"crnor", OPTYPE_CR, FL_EVIL}},
{449, &JitArm::FallBackToInterpreter}, //"cror", OPTYPE_CR, FL_EVIL}},
{417, &JitArm::FallBackToInterpreter}, //"crorc", OPTYPE_CR, FL_EVIL}},
{193, &JitArm::FallBackToInterpreter}, //"crxor", OPTYPE_CR, FL_EVIL}},
{150, &JitArm::DoNothing}, //"isync", OPTYPE_ICACHE, FL_EVIL}},
{0, &JitArm::mcrf}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}},
@ -190,7 +190,7 @@ static GekkoOPTemplate table31[] =
{476, &JitArm::arith}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{284, &JitArm::arith}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{0, &JitArm::cmp}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{32, &JitArm::cmpl}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{32, &JitArm::FallBackToInterpreter}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{26, &JitArm::cntlzwx}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{922, &JitArm::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{954, &JitArm::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
@ -266,16 +266,16 @@ static GekkoOPTemplate table31[] =
{759, &JitArm::stfXX}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{983, &JitArm::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{19, &JitArm::mfcr}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}},
{19, &JitArm::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}},
{83, &JitArm::mfmsr}, //"mfmsr", OPTYPE_SYSTEM, FL_OUT_D}},
{144, &JitArm::mtcrf}, //"mtcrf", OPTYPE_SYSTEM, 0}},
{144, &JitArm::FallBackToInterpreter}, //"mtcrf", OPTYPE_SYSTEM, 0}},
{146, &JitArm::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}},
{210, &JitArm::mtsr}, //"mtsr", OPTYPE_SYSTEM, 0}},
{242, &JitArm::FallBackToInterpreter}, //"mtsrin", OPTYPE_SYSTEM, 0}},
{339, &JitArm::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}},
{467, &JitArm::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}},
{371, &JitArm::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}},
{512, &JitArm::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, 0}},
{512, &JitArm::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}},
{595, &JitArm::mfsr}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}},
{659, &JitArm::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}},
@ -337,8 +337,8 @@ static GekkoOPTemplate table59[] =
static GekkoOPTemplate table63[] =
{
{264, &JitArm::fabsx}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{32, &JitArm::fcmpo}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}},
{0, &JitArm::fcmpu}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}},
{32, &JitArm::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}},
{0, &JitArm::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}},
{14, &JitArm::fctiwx}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}},
{15, &JitArm::fctiwzx}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
{72, &JitArm::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},

View file

@ -1,744 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include <algorithm>
#include "Common/ArmEmitter.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArmIL/JitIL.h"
#include "Core/PowerPC/JitArmIL/JitILAsm.h"
#include "Core/PowerPC/JitILCommon/IR.h"
using namespace IREmitter;
using namespace ArmGen;
static const unsigned int MAX_NUMBER_OF_REGS = 32;
struct RegInfo {
JitArmIL *Jit;
IRBuilder* Build;
InstLoc FirstI;
std::vector<unsigned> IInfo;
std::vector<InstLoc> lastUsed;
InstLoc regs[MAX_NUMBER_OF_REGS];
InstLoc fregs[MAX_NUMBER_OF_REGS];
unsigned numSpills;
unsigned numFSpills;
unsigned exitNumber;
RegInfo(JitArmIL* j, InstLoc f, unsigned insts) : Jit(j), FirstI(f), IInfo(insts), lastUsed(insts) {
for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) {
regs[i] = 0;
fregs[i] = 0;
}
numSpills = 0;
numFSpills = 0;
exitNumber = 0;
}
private:
RegInfo(RegInfo&); // DO NOT IMPLEMENT
};
static const ARMReg RegAllocOrder[] = {R0, R1, R2, R3, R4, R5, R6, R7, R8};
static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(ARMReg);
static unsigned SlotSet[1000];
static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) {
unsigned& info = R.IInfo[Op - R.FirstI];
if (info == 0) R.IInfo[I - R.FirstI] |= 1 << (OpNum + 1);
if (info < 2) info++;
R.lastUsed[Op - R.FirstI] = std::max(R.lastUsed[Op - R.FirstI], I);
}
static void regClearInst(RegInfo& RI, InstLoc I) {
for (int i = 0; i < RegAllocSize; i++)
if (RI.regs[RegAllocOrder[i]] == I)
RI.regs[RegAllocOrder[i]] = 0;
}
static void regNormalRegClear(RegInfo& RI, InstLoc I) {
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8)
regClearInst(RI, getOp2(I));
}
static unsigned regReadUse(RegInfo& R, InstLoc I) {
return R.IInfo[I - R.FirstI] & 3;
}
static u32 regLocForSlot(RegInfo& RI, unsigned slot) {
return (u32)&SlotSet[slot - 1];
}
static unsigned regCreateSpill(RegInfo& RI, InstLoc I) {
unsigned newSpill = ++RI.numSpills;
RI.IInfo[I - RI.FirstI] |= newSpill << 16;
return newSpill;
}
static unsigned regGetSpill(RegInfo& RI, InstLoc I) {
return RI.IInfo[I - RI.FirstI] >> 16;
}
static void regSpill(RegInfo& RI, ARMReg reg) {
if (!RI.regs[reg]) return;
unsigned slot = regGetSpill(RI, RI.regs[reg]);
if (!slot) {
slot = regCreateSpill(RI, RI.regs[reg]);
RI.Jit->MOVI2R(R14, regLocForSlot(RI, slot));
RI.Jit->STR(reg, R14, 0);
}
RI.regs[reg] = 0;
}
static ARMReg regFindFreeReg(RegInfo& RI) {
for (int i = 0; i < RegAllocSize; i++)
if (RI.regs[RegAllocOrder[i]] == 0)
return RegAllocOrder[i];
int bestIndex = -1;
InstLoc bestEnd = 0;
for (int i = 0; i < RegAllocSize; ++i) {
const InstLoc start = RI.regs[RegAllocOrder[i]];
const InstLoc end = RI.lastUsed[start - RI.FirstI];
if (bestEnd < end) {
bestEnd = end;
bestIndex = i;
}
}
ARMReg reg = RegAllocOrder[bestIndex];
regSpill(RI, reg);
return reg;
}
static ARMReg regLocForInst(RegInfo& RI, InstLoc I) {
for (int i = 0; i < RegAllocSize; i++)
if (RI.regs[RegAllocOrder[i]] == I)
return RegAllocOrder[i];
if (regGetSpill(RI, I) == 0)
PanicAlert("Retrieving unknown spill slot?!");
RI.Jit->MOVI2R(R14, regLocForSlot(RI, regGetSpill(RI, I)));
ARMReg reg = regFindFreeReg(RI);
RI.Jit->LDR(reg, R14, 0);
return reg;
}
static ARMReg regBinLHSReg(RegInfo& RI, InstLoc I) {
ARMReg reg = regFindFreeReg(RI);
RI.Jit->MOV(reg, regLocForInst(RI, getOp1(I)));
return reg;
}
// If the lifetime of the register used by an operand ends at I,
// return the register. Otherwise return a free register.
static ARMReg regBinReg(RegInfo& RI, InstLoc I) {
// FIXME: When regLocForInst() is extracted as a local variable,
// "Retrieving unknown spill slot?!" is shown.
if (RI.IInfo[I - RI.FirstI] & 4)
return regLocForInst(RI, getOp1(I));
else if (RI.IInfo[I - RI.FirstI] & 8)
return regLocForInst(RI, getOp2(I));
return regFindFreeReg(RI);
}
static void regSpillCallerSaved(RegInfo& RI) {
regSpill(RI, R0);
regSpill(RI, R1);
regSpill(RI, R2);
regSpill(RI, R3);
}
static ARMReg regEnsureInReg(RegInfo& RI, InstLoc I) {
return regLocForInst(RI, I);
}
static void regWriteExit(RegInfo& RI, InstLoc dest) {
if (isImm(*dest)) {
RI.exitNumber++;
RI.Jit->WriteExit(RI.Build->GetImmValue(dest));
} else {
RI.Jit->WriteExitDestInReg(regLocForInst(RI, dest));
}
}
static void regStoreInstToPPCState(RegInfo& RI, unsigned width, InstLoc I, s32 offset) {
void (JitArmIL::*op)(ARMReg, ARMReg, Operand2, bool);
switch (width)
{
case 32:
op = &JitArmIL::STR;
break;
case 8:
op = &JitArmIL::STRB;
break;
default:
PanicAlert("Not implemented!");
return;
break;
}
if (isImm(*I)) {
RI.Jit->MOVI2R(R12, RI.Build->GetImmValue(I));
(RI.Jit->*op)(R12, R9, offset, true);
return;
}
ARMReg reg = regEnsureInReg(RI, I);
(RI.Jit->*op)(reg, R9, offset, true);
}
//
// Mark and calculation routines for profiled load/store addresses
// Could be extended to unprofiled addresses.
static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum) {
if (isImm(*AI)) {
unsigned addr = RI.Build->GetImmValue(AI);
if (Memory::IsRAMAddress(addr))
return;
}
if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) {
regMarkUse(RI, I, getOp1(AI), OpNum);
return;
}
regMarkUse(RI, I, AI, OpNum);
}
// Binary ops
void JitArmIL::BIN_XOR(ARMReg reg, Operand2 op2)
{
EOR(reg, reg, op2);
}
void JitArmIL::BIN_OR(ARMReg reg, Operand2 op2)
{
ORR(reg, reg, op2);
}
void JitArmIL::BIN_AND(ARMReg reg, Operand2 op2)
{
AND(reg, reg, op2);
}
void JitArmIL::BIN_ADD(ARMReg reg, Operand2 op2)
{
ADD(reg, reg, op2);
}
static void regEmitShiftInst(RegInfo& RI, InstLoc I, void (JitArmIL::*op)(ARMReg, ARMReg, Operand2))
{
ARMReg reg = regBinLHSReg(RI, I);
if (isImm(*getOp2(I))) {
unsigned RHS = RI.Build->GetImmValue(getOp2(I));
(RI.Jit->*op)(reg, reg, RHS);
RI.regs[reg] = I;
return;
}
(RI.Jit->*op)(reg, reg, regLocForInst(RI, getOp2(I)));
RI.regs[reg] = I;
regNormalRegClear(RI, I);
}
static void regEmitBinInst(RegInfo& RI, InstLoc I,
void (JitArmIL::*op)(ARMReg, Operand2),
bool commutable = false) {
ARMReg reg;
bool commuted = false;
if (RI.IInfo[I - RI.FirstI] & 4) {
reg = regEnsureInReg(RI, getOp1(I));
} else if (commutable && (RI.IInfo[I - RI.FirstI] & 8)) {
reg = regEnsureInReg(RI, getOp2(I));
commuted = true;
} else {
reg = regFindFreeReg(RI);
RI.Jit->MOV(reg, regLocForInst(RI, getOp1(I)));
}
if (isImm(*getOp2(I))) {
unsigned RHS = RI.Build->GetImmValue(getOp2(I));
Operand2 RHSop;
if (TryMakeOperand2(RHS, RHSop))
(RI.Jit->*op)(reg, RHSop);
else
{
RI.Jit->MOVI2R(R12, RHS);
(RI.Jit->*op)(reg, R12);
}
} else if (commuted) {
(RI.Jit->*op)(reg, regLocForInst(RI, getOp1(I)));
} else {
(RI.Jit->*op)(reg, regLocForInst(RI, getOp2(I)));
}
RI.regs[reg] = I;
regNormalRegClear(RI, I);
}
static void regEmitCmp(RegInfo& RI, InstLoc I) {
if (isImm(*getOp2(I))) {
unsigned RHS = RI.Build->GetImmValue(getOp2(I));
Operand2 op;
if (TryMakeOperand2(RHS, op))
RI.Jit->CMP(regLocForInst(RI, getOp1(I)), op);
else
{
RI.Jit->MOVI2R(R12, RHS);
RI.Jit->CMP(regLocForInst(RI, getOp1(I)), R12);
}
} else {
ARMReg reg = regEnsureInReg(RI, getOp1(I));
RI.Jit->CMP(reg, regLocForInst(RI, getOp2(I)));
}
}
static void DoWriteCode(IRBuilder* ibuild, JitArmIL* Jit, u32 exitAddress) {
RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts());
RI.Build = ibuild;
// Pass to compute liveness
ibuild->StartBackPass();
for (unsigned int index = (unsigned int)RI.IInfo.size() - 1; index != -1U; --index) {
InstLoc I = ibuild->ReadBackward();
unsigned int op = getOpcode(*I);
bool thisUsed = regReadUse(RI, I) ? true : false;
switch (op) {
default:
PanicAlert("Unexpected inst!");
case Nop:
case CInt16:
case CInt32:
case LoadGReg:
case LoadLink:
case LoadCR:
case LoadCarry:
case LoadCTR:
case LoadMSR:
case LoadFReg:
case LoadFRegDENToZero:
case LoadGQR:
case BlockEnd:
case BlockStart:
case FallBackToInterpreter:
case SystemCall:
case RFIExit:
case InterpreterBranch:
case ShortIdleLoop:
case FPExceptionCheck:
case DSIExceptionCheck:
case ISIException:
case ExtExceptionCheck:
case BreakPointCheck:
case Int3:
case Tramp:
// No liveness effects
break;
case SExt8:
case SExt16:
case BSwap32:
case BSwap16:
case Cntlzw:
case Not:
case DupSingleToMReg:
case DoubleToSingle:
case ExpandPackedToMReg:
case CompactMRegToPacked:
case FPNeg:
case FPDup0:
case FPDup1:
case FSNeg:
case FDNeg:
if (thisUsed)
regMarkUse(RI, I, getOp1(I), 1);
break;
case Load8:
case Load16:
case Load32:
regMarkMemAddress(RI, I, getOp1(I), 1);
break;
case LoadDouble:
case LoadSingle:
case LoadPaired:
if (thisUsed)
regMarkUse(RI, I, getOp1(I), 1);
break;
case StoreCR:
case StoreCarry:
case StoreFPRF:
regMarkUse(RI, I, getOp1(I), 1);
break;
case StoreGReg:
case StoreLink:
case StoreCTR:
case StoreMSR:
case StoreGQR:
case StoreSRR:
case StoreFReg:
if (!isImm(*getOp1(I)))
regMarkUse(RI, I, getOp1(I), 1);
break;
case Add:
case Sub:
case And:
case Or:
case Xor:
case Mul:
case MulHighUnsigned:
case Rol:
case Shl:
case Shrl:
case Sarl:
case ICmpCRUnsigned:
case ICmpCRSigned:
case ICmpEq:
case ICmpNe:
case ICmpUgt:
case ICmpUlt:
case ICmpUge:
case ICmpUle:
case ICmpSgt:
case ICmpSlt:
case ICmpSge:
case ICmpSle:
case FSMul:
case FSAdd:
case FSSub:
case FDMul:
case FDAdd:
case FDSub:
case FPAdd:
case FPMul:
case FPSub:
case FPMerge00:
case FPMerge01:
case FPMerge10:
case FPMerge11:
case FDCmpCR:
case InsertDoubleInMReg:
if (thisUsed) {
regMarkUse(RI, I, getOp1(I), 1);
if (!isImm(*getOp2(I)))
regMarkUse(RI, I, getOp2(I), 2);
}
break;
case Store8:
case Store16:
case Store32:
if (!isImm(*getOp1(I)))
regMarkUse(RI, I, getOp1(I), 1);
regMarkMemAddress(RI, I, getOp2(I), 2);
break;
case StoreSingle:
case StoreDouble:
case StorePaired:
regMarkUse(RI, I, getOp1(I), 1);
regMarkUse(RI, I, getOp2(I), 2);
break;
case BranchUncond:
if (!isImm(*getOp1(I)))
regMarkUse(RI, I, getOp1(I), 1);
break;
case IdleBranch:
regMarkUse(RI, I, getOp1(getOp1(I)), 1);
break;
case BranchCond: {
if (isICmp(*getOp1(I)) &&
isImm(*getOp2(getOp1(I)))) {
regMarkUse(RI, I, getOp1(getOp1(I)), 1);
} else {
regMarkUse(RI, I, getOp1(I), 1);
}
if (!isImm(*getOp2(I)))
regMarkUse(RI, I, getOp2(I), 2);
break;
}
}
}
ibuild->StartForwardPass();
for (unsigned i = 0; i != RI.IInfo.size(); i++) {
InstLoc I = ibuild->ReadForward();
bool thisUsed = regReadUse(RI, I) ? true : false;
if (thisUsed) {
// Needed for IR Writer
ibuild->SetMarkUsed(I);
}
switch (getOpcode(*I)) {
case CInt32:
case CInt16: {
if (!thisUsed) break;
ARMReg reg = regFindFreeReg(RI);
Jit->MOVI2R(reg, ibuild->GetImmValue(I));
RI.regs[reg] = I;
break;
}
case BranchUncond: {
regWriteExit(RI, getOp1(I));
regNormalRegClear(RI, I);
break;
}
case BranchCond: {
if (isICmp(*getOp1(I)) &&
isImm(*getOp2(getOp1(I)))) {
unsigned imm = RI.Build->GetImmValue(getOp2(getOp1(I)));
if (imm > 255)
{
Jit->MOVI2R(R14, imm);
Jit->CMP(regLocForInst(RI, getOp1(getOp1(I))), R14);
}
else
Jit->CMP(regLocForInst(RI, getOp1(getOp1(I))), imm);
CCFlags flag;
switch (getOpcode(*getOp1(I))) {
case ICmpEq: flag = CC_NEQ; break;
case ICmpNe: flag = CC_EQ; break;
case ICmpUgt: flag = CC_LS; break;
case ICmpUlt: flag = CC_HI; break;
case ICmpUge: flag = CC_HS; break;
case ICmpUle: flag = CC_LO; break;
case ICmpSgt: flag = CC_LT; break;
case ICmpSlt: flag = CC_GT; break;
case ICmpSge: flag = CC_LE; break;
case ICmpSle: flag = CC_GE; break;
default: PanicAlert("cmpXX"); flag = CC_AL; break;
}
FixupBranch cont = Jit->B_CC(flag);
regWriteExit(RI, getOp2(I));
Jit->SetJumpTarget(cont);
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(getOp1(I)));
} else {
Jit->CMP(regLocForInst(RI, getOp1(I)), 0);
FixupBranch cont = Jit->B_CC(CC_EQ);
regWriteExit(RI, getOp2(I));
Jit->SetJumpTarget(cont);
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(I));
}
if (RI.IInfo[I - RI.FirstI] & 8)
regClearInst(RI, getOp2(I));
break;
}
case StoreGReg: {
unsigned ppcreg = *I >> 16;
regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(gpr[ppcreg]));
regNormalRegClear(RI, I);
break;
}
case StoreCR: {
unsigned ppcreg = *I >> 16;
regStoreInstToPPCState(RI, 8, getOp1(I), PPCSTATE_OFF(cr_fast[ppcreg]));
regNormalRegClear(RI, I);
break;
}
case StoreLink: {
regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(spr[SPR_LR]));
regNormalRegClear(RI, I);
break;
}
case StoreCTR: {
regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(spr[SPR_CTR]));
regNormalRegClear(RI, I);
break;
}
case StoreMSR: {
regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(msr));
regNormalRegClear(RI, I);
break;
}
case LoadGReg: {
if (!thisUsed) break;
ARMReg reg = regFindFreeReg(RI);
unsigned ppcreg = *I >> 8;
Jit->LDR(reg, R9, PPCSTATE_OFF(gpr[ppcreg]));
RI.regs[reg] = I;
break;
}
case LoadCR: {
if (!thisUsed) break;
ARMReg reg = regFindFreeReg(RI);
unsigned ppcreg = *I >> 8;
Jit->LDRB(reg, R9, PPCSTATE_OFF(cr_fast[ppcreg]));
RI.regs[reg] = I;
break;
}
case LoadCTR: {
if (!thisUsed) break;
ARMReg reg = regFindFreeReg(RI);
Jit->LDR(reg, R9, PPCSTATE_OFF(spr[SPR_CTR]));
RI.regs[reg] = I;
break;
}
case LoadLink: {
if (!thisUsed) break;
ARMReg reg = regFindFreeReg(RI);
Jit->LDR(reg, R9, PPCSTATE_OFF(spr[SPR_LR]));
RI.regs[reg] = I;
break;
}
case FallBackToInterpreter: {
unsigned InstCode = ibuild->GetImmValue(getOp1(I));
unsigned InstLoc = ibuild->GetImmValue(getOp2(I));
// There really shouldn't be anything live across an
// interpreter call at the moment, but optimizing interpreter
// calls isn't completely out of the question...
regSpillCallerSaved(RI);
Jit->MOVI2R(R14, InstLoc);
Jit->STR(R14, R9, PPCSTATE_OFF(pc));
Jit->MOVI2R(R14, InstLoc + 4);
Jit->STR(R14, R9, PPCSTATE_OFF(npc));
Jit->MOVI2R(R0, InstCode);
Jit->MOVI2R(R14, (u32)GetInterpreterOp(InstCode));
Jit->BL(R14);
break;
}
case SystemCall: {
unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
Jit->MOVI2R(R14, InstLoc + 4);
Jit->STR(R14, R9, PPCSTATE_OFF(pc));
Jit->LDR(R14, R9, PPCSTATE_OFF(Exceptions));
Jit->ORR(R14, R14, EXCEPTION_SYSCALL);
Jit->STR(R14, R9, PPCSTATE_OFF(Exceptions));
Jit->WriteExceptionExit();
break;
}
case ShortIdleLoop: {
unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
Jit->MOVI2R(R14, (u32)&CoreTiming::Idle);
Jit->BL(R14);
Jit->MOVI2R(R14, InstLoc);
Jit->STR(R14, R9, PPCSTATE_OFF(pc));
Jit->WriteExceptionExit();
break;
}
case InterpreterBranch: {
Jit->LDR(R14, R9, PPCSTATE_OFF(npc));
Jit->WriteExitDestInReg(R14);
break;
}
case RFIExit: {
const u32 mask = 0x87C0FFFF;
const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
// MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13;
// R0 = MSR location
// R1 = MSR contents
// R2 = Mask
// R3 = Mask
ARMReg rA = R14;
ARMReg rB = R12;
ARMReg rC = R11;
ARMReg rD = R10;
Jit->MOVI2R(rB, (~mask) & clearMSR13);
Jit->MOVI2R(rC, mask & clearMSR13);
Jit->LDR(rD, R9, PPCSTATE_OFF(msr));
Jit->AND(rD, rD, rB); // rD = Masked MSR
Jit->LDR(rB, R9, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here
Jit->AND(rB, rB, rC); // rB contains masked SRR1 here
Jit->ORR(rB, rD, rB); // rB = Masked MSR OR masked SRR1
Jit->STR(rB, R9, PPCSTATE_OFF(msr)); // STR rB in to rA
Jit->LDR(rA, R9, PPCSTATE_OFF(spr[SPR_SRR0]));
Jit->WriteRfiExitDestInR(rA); // rA gets unlocked here
break;
}
case Shl: {
if (!thisUsed) break;
regEmitShiftInst(RI, I, &JitArmIL::LSL);
break;
}
case Shrl: {
if (!thisUsed) break;
regEmitShiftInst(RI, I, &JitArmIL::LSR);
break;
}
case Sarl: {
if (!thisUsed) break;
regEmitShiftInst(RI, I, &JitArmIL::ASR);
break;
}
case And: {
if (!thisUsed) break;
regEmitBinInst(RI, I, &JitArmIL::BIN_AND, true);
break;
}
case Not: {
if (!thisUsed) break;
ARMReg reg = regBinLHSReg(RI, I);
Jit->MVN(reg, reg);
RI.regs[reg] = I;
regNormalRegClear(RI, I);
break;
}
case Or: {
if (!thisUsed) break;
regEmitBinInst(RI, I, &JitArmIL::BIN_OR, true);
break;
}
case Xor: {
if (!thisUsed) break;
regEmitBinInst(RI, I, &JitArmIL::BIN_XOR, true);
break;
}
case Add: {
if (!thisUsed) break;
regEmitBinInst(RI, I, &JitArmIL::BIN_ADD, true);
break;
}
case ICmpCRUnsigned: {
if (!thisUsed) break;
regEmitCmp(RI, I);
ARMReg reg = regBinReg(RI, I);
Jit->MOV(reg, 0x2); // Result == 0
Jit->SetCC(CC_LO); Jit->MOV(reg, 0x8); // Result < 0
Jit->SetCC(CC_HI); Jit->MOV(reg, 0x4); // Result > 0
Jit->SetCC();
RI.regs[reg] = I;
regNormalRegClear(RI, I);
break;
}
case ICmpCRSigned: {
if (!thisUsed) break;
regEmitCmp(RI, I);
ARMReg reg = regBinReg(RI, I);
Jit->MOV(reg, 0x2); // Result == 0
Jit->SetCC(CC_LT); Jit->MOV(reg, 0x8); // Result < 0
Jit->SetCC(CC_GT); Jit->MOV(reg, 0x4); // Result > 0
Jit->SetCC();
RI.regs[reg] = I;
regNormalRegClear(RI, I);
break;
}
case Int3:
Jit->BKPT(0x321);
break;
case Tramp: break;
case Nop: break;
default:
PanicAlert("Unknown JIT instruction; aborting!");
ibuild->WriteToFile(0);
exit(1);
}
}
for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) {
if (RI.regs[i]) {
// Start a game in Burnout 2 to get this. Or animal crossing.
PanicAlert("Incomplete cleanup! (regs)");
exit(1);
}
if (RI.fregs[i]) {
PanicAlert("Incomplete cleanup! (fregs)");
exit(1);
}
}
Jit->WriteExit(exitAddress);
Jit->BKPT(0x111);
}
void JitArmIL::WriteCode(u32 exitAddress) {
DoWriteCode(&ibuild, this, exitAddress);
}

View file

@ -1 +0,0 @@

View file

@ -1,345 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include <map>
#include "Common/ArmEmitter.h"
#include "Common/Common.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PatchEngine.h"
#include "Core/HLE/HLE.h"
#include "Core/HW/GPFifo.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/Profiler.h"
#include "Core/PowerPC/JitArmIL/JitIL.h"
#include "Core/PowerPC/JitArmIL/JitIL_Tables.h"
using namespace ArmGen;
using namespace PowerPC;
static int CODE_SIZE = 1024*1024*32;
void JitArmIL::Init()
{
AllocCodeSpace(CODE_SIZE);
blocks.Init();
asm_routines.Init();
code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa;
}
void JitArmIL::ClearCache()
{
ClearCodeSpace();
blocks.Clear();
}
void JitArmIL::Shutdown()
{
FreeCodeSpace();
blocks.Shutdown();
asm_routines.Shutdown();
}
void JitArmIL::unknown_instruction(UGeckoInstruction inst)
{
// CCPU::Break();
PanicAlert("unknown_instruction %08x - Fix me ;)", inst.hex);
}
void JitArmIL::FallBackToInterpreter(UGeckoInstruction _inst)
{
ibuild.EmitFallBackToInterpreter(
ibuild.EmitIntConst(_inst.hex),
ibuild.EmitIntConst(js.compilerPC));
}
void JitArmIL::HLEFunction(UGeckoInstruction _inst)
{
// XXX
}
void JitArmIL::DoNothing(UGeckoInstruction _inst)
{
// Yup, just don't do anything.
}
void JitArmIL::Break(UGeckoInstruction _inst)
{
ibuild.EmitINT3();
}
void JitArmIL::DoDownCount()
{
ARMReg rA = R12;
LDR(rA, R9, PPCSTATE_OFF(downcount));
if (js.downcountAmount < 255) // We can enlarge this if we used rotations
{
SUBS(rA, rA, js.downcountAmount);
}
else
{
ARMReg rB = R11;
MOVI2R(rB, js.downcountAmount);
SUBS(rA, rA, rB);
}
STR(rA, R9, PPCSTATE_OFF(downcount));
}
void JitArmIL::WriteExitDestInReg(ARMReg Reg)
{
STR(Reg, R9, PPCSTATE_OFF(pc));
DoDownCount();
MOVI2R(Reg, (u32)asm_routines.dispatcher);
B(Reg);
}
void JitArmIL::WriteRfiExitDestInR(ARMReg Reg)
{
STR(Reg, R9, PPCSTATE_OFF(pc));
DoDownCount();
LDR(R0, R9, PPCSTATE_OFF(pc));
STR(R0, R9, PPCSTATE_OFF(npc));
QuickCallFunction(R0, (void*)&PowerPC::CheckExceptions);
LDR(R0, R9, PPCSTATE_OFF(npc));
STR(R0, R9, PPCSTATE_OFF(pc));
MOVI2R(R0, (u32)asm_routines.dispatcher);
B(R0);
}
void JitArmIL::WriteExceptionExit()
{
DoDownCount();
LDR(R0, R9, PPCSTATE_OFF(pc));
STR(R0, R9, PPCSTATE_OFF(npc));
QuickCallFunction(R0, (void*)&PowerPC::CheckExceptions);
LDR(R0, R9, PPCSTATE_OFF(npc));
STR(R0, R9, PPCSTATE_OFF(pc));
MOVI2R(R0, (u32)asm_routines.dispatcher);
B(R0);
}
void JitArmIL::WriteExit(u32 destination)
{
DoDownCount();
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
JitBlock::LinkData linkData;
linkData.exitAddress = destination;
linkData.exitPtrs = GetWritableCodePtr();
linkData.linkStatus = false;
// Link opportunity!
int block;
if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0)
{
// It exists! Joy of joy!
B(blocks.GetBlock(block)->checkedEntry);
linkData.linkStatus = true;
}
else
{
MOVI2R(R14, destination);
STR(R14, R9, PPCSTATE_OFF(pc));
MOVI2R(R14, (u32)asm_routines.dispatcher);
B(R14);
}
b->linkData.push_back(linkData);
}
void JitArmIL::PrintDebug(UGeckoInstruction inst, u32 level)
{
if (level > 0)
printf("Start: %08x OP '%s' Info\n", (u32)GetCodePtr(), PPCTables::GetInstructionName(inst));
if (level > 1)
{
GekkoOPInfo* Info = GetOpInfo(inst.hex);
printf("\tOuts\n");
if (Info->flags & FL_OUT_A)
printf("\t-OUT_A: %x\n", inst.RA);
if (Info->flags & FL_OUT_D)
printf("\t-OUT_D: %x\n", inst.RD);
printf("\tIns\n");
// A, AO, B, C, S
if (Info->flags & FL_IN_A)
printf("\t-IN_A: %x\n", inst.RA);
if (Info->flags & FL_IN_A0)
printf("\t-IN_A0: %x\n", inst.RA);
if (Info->flags & FL_IN_B)
printf("\t-IN_B: %x\n", inst.RB);
if (Info->flags & FL_IN_C)
printf("\t-IN_C: %x\n", inst.RC);
if (Info->flags & FL_IN_S)
printf("\t-IN_S: %x\n", inst.RS);
}
}
void STACKALIGN JitArmIL::Run()
{
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
pExecAddr();
}
void JitArmIL::SingleStep()
{
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
pExecAddr();
}
void STACKALIGN JitArmIL::Jit(u32 em_address)
{
if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || Core::g_CoreStartupParameter.bJITNoBlockCache)
{
ClearCache();
}
int block_num = blocks.AllocateBlock(PowerPC::ppcState.pc);
JitBlock *b = blocks.GetBlock(block_num);
const u8* BlockPtr = DoJit(PowerPC::ppcState.pc, &code_buffer, b);
blocks.FinalizeBlock(block_num, jo.enableBlocklink, BlockPtr);
}
const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b)
{
int blockSize = code_buf->GetSize();
if (Core::g_CoreStartupParameter.bEnableDebugging)
{
// Comment out the following to disable breakpoints (speed-up)
blockSize = 1;
}
if (em_address == 0)
{
Core::SetState(Core::CORE_PAUSE);
PanicAlert("ERROR: Compiling at 0. LR=%08x CTR=%08x", LR, CTR);
}
js.isLastInstruction = false;
js.blockStart = em_address;
js.fifoBytesThisBlock = 0;
js.curBlock = b;
u32 nextPC = em_address;
// Analyze the block, collect all instructions it is made of (including inlining,
// if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize);
PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
const u8 *start = GetCodePtr();
b->checkedEntry = start;
b->runCount = 0;
// Downcount flag check, Only valid for linked blocks
{
// XXX
}
const u8 *normalEntry = GetCodePtr();
b->normalEntry = normalEntry;
if (js.fpa.any)
{
// XXX
// This block uses FPU - needs to add FP exception bailout
}
js.rewriteStart = (u8*)GetCodePtr();
u64 codeHash = -1;
{
// For profiling and IR Writer
for (u32 i = 0; i < code_block.m_num_instructions; i++)
{
const u64 inst = ops[i].inst.hex;
// Ported from boost::hash
codeHash ^= inst + (codeHash << 6) + (codeHash >> 2);
}
}
// Conditionally add profiling code.
if (Profiler::g_ProfileBlocks) {
// XXX
}
// Start up IR builder (structure that collects the
// instruction processed by the JIT routines)
ibuild.Reset();
js.downcountAmount = 0;
if (!Core::g_CoreStartupParameter.bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
js.skipnext = false;
js.compilerPC = nextPC;
// Translate instructions
for (u32 i = 0; i < code_block.m_num_instructions; i++)
{
js.compilerPC = ops[i].address;
js.op = &ops[i];
js.instructionNumber = i;
const GekkoOPInfo *opinfo = ops[i].opinfo;
js.downcountAmount += opinfo->numCycles;
if (i == (code_block.m_num_instructions - 1))
{
// WARNING - cmp->branch merging will screw this up.
js.isLastInstruction = true;
js.next_inst = 0;
if (Profiler::g_ProfileBlocks) {
// CAUTION!!! push on stack regs you use, do your stuff, then pop
PROFILER_VPUSH;
// get end tic
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop);
// tic counter += (end tic - start tic)
PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart);
PROFILER_VPOP;
}
}
else
{
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
}
if (!ops[i].skip)
{
PrintDebug(ops[i].inst, 0);
if (js.memcheck && (opinfo->flags & FL_USE_FPU))
{
// Don't do this yet
BKPT(0x7777);
}
JitArmILTables::CompileInstruction(ops[i]);
if (js.memcheck && (opinfo->flags & FL_LOADSTORE))
{
// Don't do this yet
BKPT(0x666);
}
}
}
if (code_block.m_memory_exception)
BKPT(0x500);
if (code_block.m_broken)
{
printf("Broken Block going to 0x%08x\n", nextPC);
WriteExit(nextPC);
}
// Perform actual code generation
WriteCode(nextPC);
b->codeSize = (u32)(GetCodePtr() - normalEntry);
b->originalSize = code_block.m_num_instructions;;
FlushIcache();
return start;
}

View file

@ -1,91 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#pragma once
#include "Common/ArmEmitter.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/JitArm32/JitArmCache.h"
#include "Core/PowerPC/JitArmIL/JitILAsm.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/JitILCommon/IR.h"
#include "Core/PowerPC/JitILCommon/JitILBase.h"
#define PPCSTATE_OFF(elem) ((s32)STRUCT_OFF(PowerPC::ppcState, elem) - (s32)STRUCT_OFF(PowerPC::ppcState, spr[0]))
class JitArmIL : public JitILBase, public ArmGen::ARMCodeBlock
{
private:
JitArmBlockCache blocks;
JitArmILAsmRoutineManager asm_routines;
void PrintDebug(UGeckoInstruction inst, u32 level);
void DoDownCount();
public:
// Initialization, etc
JitArmIL() {}
~JitArmIL() {}
void Init();
void Shutdown();
// Jit!
void Jit(u32 em_address);
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b);
JitBaseBlockCache *GetBlockCache() { return &blocks; }
const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) { return nullptr; }
bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); }
void ClearCache();
const u8 *GetDispatcher() {
return asm_routines.dispatcher; // asm_routines.dispatcher
}
const CommonAsmRoutinesBase *GetAsmRoutines() {
return &asm_routines;
}
const char *GetName() {
return "JITARMIL";
}
// Run!
void Run();
void SingleStep();
//
void WriteCode(u32 exitAddress);
void WriteExit(u32 destination);
void WriteExitDestInReg(ArmGen::ARMReg Reg);
void WriteRfiExitDestInR(ArmGen::ARMReg Reg);
void WriteExceptionExit();
// OPCODES
void unknown_instruction(UGeckoInstruction inst);
void FallBackToInterpreter(UGeckoInstruction inst);
void DoNothing(UGeckoInstruction inst);
void HLEFunction(UGeckoInstruction inst);
void Break(UGeckoInstruction inst);
void DynaRunTable4(UGeckoInstruction inst);
void DynaRunTable19(UGeckoInstruction inst);
void DynaRunTable31(UGeckoInstruction inst);
void DynaRunTable59(UGeckoInstruction inst);
void DynaRunTable63(UGeckoInstruction inst);
// Binary ops
void BIN_AND(ArmGen::ARMReg reg, ArmGen::Operand2 op2);
void BIN_XOR(ArmGen::ARMReg reg, ArmGen::Operand2 op2);
void BIN_OR(ArmGen::ARMReg reg, ArmGen::Operand2 op2);
void BIN_ADD(ArmGen::ARMReg reg, ArmGen::Operand2 op2);
// Branches
void bx(UGeckoInstruction inst);
void bcx(UGeckoInstruction inst);
void bclrx(UGeckoInstruction inst);
void bcctrx(UGeckoInstruction inst);
};

View file

@ -1,106 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/MemoryUtil.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HW/GPFifo.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/JitArmIL/JitIL.h"
#include "Core/PowerPC/JitArmIL/JitILAsm.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
using namespace ArmGen;
void JitArmILAsmRoutineManager::Generate()
{
enterCode = GetCodePtr();
PUSH(9, R4, R5, R6, R7, R8, R9, R10, R11, _LR);
// Take care to 8-byte align stack for function calls.
// We are misaligned here because of an odd number of args for PUSH.
// It's not like x86 where you need to account for an extra 4 bytes
// consumed by CALL.
SUB(_SP, _SP, 4);
MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]);
FixupBranch skipToRealDispatcher = B();
dispatcher = GetCodePtr();
printf("ILDispatcher is %p\n", dispatcher);
// Downcount Check
// The result of slice decrementation should be in flags if somebody jumped here
// IMPORTANT - We jump on negative, not carry!!!
FixupBranch bail = B_CC(CC_MI);
SetJumpTarget(skipToRealDispatcher);
dispatcherNoCheck = GetCodePtr();
// This block of code gets the address of the compiled block of code
// It runs though to the compiling portion if it isn't found
LDR(R12, R9, PPCSTATE_OFF(pc));// Load the current PC into R12
Operand2 iCacheMask = Operand2(0xE, 2); // JIT_ICACHE_MASK
BIC(R12, R12, iCacheMask); // R12 contains PC & JIT_ICACHE_MASK here.
MOVI2R(R14, (u32)jit->GetBlockCache()->iCache);
LDR(R12, R14, R12); // R12 contains iCache[PC & JIT_ICACHE_MASK] here
// R12 Confirmed this is the correct iCache Location loaded.
TST(R12, 0x80); // Test to see if it is a JIT block.
SetCC(CC_EQ);
// Success, it is our Jitblock.
MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers());
// LDR R14 right here to get CodePointers()[0] pointer.
LSL(R12, R12, 2); // Multiply by four because address locations are u32 in size
LDR(R14, R14, R12); // Load the block address in to R14
B(R14);
// No need to jump anywhere after here, the block will go back to dispatcher start
SetCC();
// If we get to this point, that means that we don't have the block cached to execute
// So call ArmJit to compile the block and then execute it.
MOVI2R(R14, (u32)&Jit);
BL(R14);
B(dispatcherNoCheck);
SetJumpTarget(bail);
doTiming = GetCodePtr();
// XXX: In JIT64, Advance() gets called /after/ the exception checking
// once it jumps back to the start of outerLoop
QuickCallFunction(R14, (void*)&CoreTiming::Advance);
// Does exception checking
LDR(R0, R9, PPCSTATE_OFF(pc));
STR(R0, R9, PPCSTATE_OFF(npc));
QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions);
LDR(R0, R9, PPCSTATE_OFF(npc));
STR(R0, R9, PPCSTATE_OFF(pc));
// Check the state pointer to see if we are exiting
// Gets checked on every exception check
MOVI2R(R0, (u32)PowerPC::GetStatePtr());
MVN(R1, 0);
LDR(R0, R0);
TST(R0, R1);
FixupBranch Exit = B_CC(CC_NEQ);
B(dispatcher);
SetJumpTarget(Exit);
ADD(_SP, _SP, 4);
POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns
GenerateCommon();
FlushIcache();
}

View file

@ -1,27 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#pragma once
#include "Common/ArmEmitter.h"
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
class JitArmILAsmRoutineManager : public CommonAsmRoutinesBase, public ArmGen::ARMCodeBlock
{
private:
void Generate();
void GenerateCommon() {}
public:
void Init() {
AllocCodeSpace(8192);
Generate();
WriteProtect();
}
void Shutdown() {
FreeCodeSpace();
}
};

View file

@ -1,168 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Common.h"
#include "Core/ConfigManager.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArmIL/JitIL.h"
// FIXME
#define NORMALBRANCH_START FallBackToInterpreter(inst); ibuild.EmitInterpreterBranch(); return;
//#define NORMALBRANCH_START
void JitArmIL::bx(UGeckoInstruction inst)
{
//NORMALBRANCH_START
INSTRUCTION_START;
// We must always process the following sentence
// even if the blocks are merged by PPCAnalyst::Flatten().
if (inst.LK)
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
// If this is not the last instruction of a block,
// we will skip the rest process.
// Because PPCAnalyst::Flatten() merged the blocks.
if (!js.isLastInstruction) {
return;
}
u32 destination;
if (inst.AA)
destination = SignExt26(inst.LI << 2);
else
destination = js.compilerPC + SignExt26(inst.LI << 2);
if (destination == js.compilerPC) {
ibuild.EmitShortIdleLoop(ibuild.EmitIntConst(js.compilerPC));
return;
}
ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination));
}
static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) {
IREmitter::InstLoc CRTest = 0, CTRTest = 0;
if ((inst.BO & 16) == 0) // Test a CR bit
{
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3));
CRTest = ibuild.EmitAnd(CRReg, CRCmp);
if (!(inst.BO & 8))
CRTest = ibuild.EmitXor(CRCmp, CRTest);
}
if ((inst.BO & 4) == 0) {
IREmitter::InstLoc c = ibuild.EmitLoadCTR();
c = ibuild.EmitSub(c, ibuild.EmitIntConst(1));
ibuild.EmitStoreCTR(c);
if (inst.BO & 2) {
CTRTest = ibuild.EmitICmpEq(c,
ibuild.EmitIntConst(0));
} else {
CTRTest = c;
}
}
IREmitter::InstLoc Test = CRTest;
if (CTRTest) {
if (Test)
Test = ibuild.EmitAnd(Test, CTRTest);
else
Test = CTRTest;
}
if (!Test) {
Test = ibuild.EmitIntConst(1);
}
return Test;
}
void JitArmIL::bclrx(UGeckoInstruction inst)
{
NORMALBRANCH_START
if (!js.isLastInstruction &&
(inst.BO & (1 << 4)) && (inst.BO & (1 << 2))) {
if (inst.LK)
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
return;
}
if (inst.hex == 0x4e800020) {
ibuild.EmitBranchUncond(ibuild.EmitLoadLink());
return;
}
IREmitter::InstLoc test = TestBranch(ibuild, inst);
test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0));
ibuild.EmitBranchCond(test, ibuild.EmitIntConst(js.compilerPC + 4));
IREmitter::InstLoc destination = ibuild.EmitLoadLink();
destination = ibuild.EmitAnd(destination, ibuild.EmitIntConst(-4));
if (inst.LK)
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
ibuild.EmitBranchUncond(destination);
}
void JitArmIL::bcx(UGeckoInstruction inst)
{
NORMALBRANCH_START
if (inst.LK)
ibuild.EmitStoreLink(
ibuild.EmitIntConst(js.compilerPC + 4));
IREmitter::InstLoc Test = TestBranch(ibuild, inst);
u32 destination;
if (inst.AA)
destination = SignExt16(inst.BD << 2);
else
destination = js.compilerPC + SignExt16(inst.BD << 2);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle &&
inst.hex == 0x4182fff8 &&
(Memory::ReadUnchecked_U32(js.compilerPC - 8) & 0xFFFF0000) == 0x800D0000 &&
(Memory::ReadUnchecked_U32(js.compilerPC - 4) == 0x28000000 ||
(SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC - 4) == 0x2C000000))
)
{
ibuild.EmitIdleBranch(Test, ibuild.EmitIntConst(destination));
}
else
{
ibuild.EmitBranchCond(Test, ibuild.EmitIntConst(destination));
}
ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4));
}
void JitArmIL::bcctrx(UGeckoInstruction inst)
{
NORMALBRANCH_START
if ((inst.BO & 4) == 0) {
IREmitter::InstLoc c = ibuild.EmitLoadCTR();
c = ibuild.EmitSub(c, ibuild.EmitIntConst(1));
ibuild.EmitStoreCTR(c);
}
IREmitter::InstLoc test;
if ((inst.BO & 16) == 0) // Test a CR bit
{
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3));
test = ibuild.EmitAnd(CRReg, CRCmp);
if (!(inst.BO & 8))
test = ibuild.EmitXor(test, CRCmp);
} else {
test = ibuild.EmitIntConst(1);
}
test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0));
ibuild.EmitBranchCond(test, ibuild.EmitIntConst(js.compilerPC + 4));
IREmitter::InstLoc destination = ibuild.EmitLoadCTR();
destination = ibuild.EmitAnd(destination, ibuild.EmitIntConst(-4));
if (inst.LK)
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
ibuild.EmitBranchUncond(destination);
}

View file

@ -1,492 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/JitArmIL/JitIL.h"
#include "Core/PowerPC/JitArmIL/JitIL_Tables.h"
// Should be moved in to the Jit class
typedef void (JitArmIL::*_Instruction) (UGeckoInstruction instCode);
static _Instruction dynaOpTable[64];
static _Instruction dynaOpTable4[1024];
static _Instruction dynaOpTable19[1024];
static _Instruction dynaOpTable31[1024];
static _Instruction dynaOpTable59[32];
static _Instruction dynaOpTable63[1024];
void JitArmIL::DynaRunTable4(UGeckoInstruction _inst) {(this->*dynaOpTable4 [_inst.SUBOP10])(_inst);}
void JitArmIL::DynaRunTable19(UGeckoInstruction _inst) {(this->*dynaOpTable19[_inst.SUBOP10])(_inst);}
void JitArmIL::DynaRunTable31(UGeckoInstruction _inst) {(this->*dynaOpTable31[_inst.SUBOP10])(_inst);}
void JitArmIL::DynaRunTable59(UGeckoInstruction _inst) {(this->*dynaOpTable59[_inst.SUBOP5 ])(_inst);}
void JitArmIL::DynaRunTable63(UGeckoInstruction _inst) {(this->*dynaOpTable63[_inst.SUBOP10])(_inst);}
struct GekkoOPTemplate
{
int opcode;
_Instruction Inst;
//GekkoOPInfo opinfo; // Doesn't need opinfo, Interpreter fills it out
};
static GekkoOPTemplate primarytable[] =
{
{4, &JitArmIL::DynaRunTable4}, //"RunTable4", OPTYPE_SUBTABLE | (4<<24), 0}},
{19, &JitArmIL::DynaRunTable19}, //"RunTable19", OPTYPE_SUBTABLE | (19<<24), 0}},
{31, &JitArmIL::DynaRunTable31}, //"RunTable31", OPTYPE_SUBTABLE | (31<<24), 0}},
{59, &JitArmIL::DynaRunTable59}, //"RunTable59", OPTYPE_SUBTABLE | (59<<24), 0}},
{63, &JitArmIL::DynaRunTable63}, //"RunTable63", OPTYPE_SUBTABLE | (63<<24), 0}},
{16, &JitArmIL::bcx}, //"bcx", OPTYPE_SYSTEM, FL_ENDBLOCK}},
{18, &JitArmIL::bx}, //"bx", OPTYPE_SYSTEM, FL_ENDBLOCK}},
{1, &JitArmIL::HLEFunction}, //"HLEFunction", OPTYPE_SYSTEM, FL_ENDBLOCK}},
{2, &JitArmIL::FallBackToInterpreter}, //"DynaBlock", OPTYPE_SYSTEM, 0}},
{3, &JitArmIL::Break}, //"twi", OPTYPE_SYSTEM, FL_ENDBLOCK}},
{17, &JitArmIL::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}},
{7, &JitArmIL::FallBackToInterpreter}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}},
{8, &JitArmIL::FallBackToInterpreter}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{10, &JitArmIL::cmpXX}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{11, &JitArmIL::cmpXX}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{12, &JitArmIL::FallBackToInterpreter}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{13, &JitArmIL::FallBackToInterpreter}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
{14, &JitArmIL::reg_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
{15, &JitArmIL::reg_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
{20, &JitArmIL::FallBackToInterpreter}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}},
{21, &JitArmIL::FallBackToInterpreter}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{23, &JitArmIL::FallBackToInterpreter}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}},
{24, &JitArmIL::reg_imm}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}},
{25, &JitArmIL::reg_imm}, //"oris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}},
{26, &JitArmIL::reg_imm}, //"xori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}},
{27, &JitArmIL::reg_imm}, //"xoris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}},
{28, &JitArmIL::reg_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
{29, &JitArmIL::reg_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
{32, &JitArmIL::FallBackToInterpreter}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{33, &JitArmIL::FallBackToInterpreter}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{34, &JitArmIL::FallBackToInterpreter}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{35, &JitArmIL::FallBackToInterpreter}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{40, &JitArmIL::FallBackToInterpreter}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{41, &JitArmIL::FallBackToInterpreter}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{42, &JitArmIL::FallBackToInterpreter}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{43, &JitArmIL::FallBackToInterpreter}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{44, &JitArmIL::FallBackToInterpreter}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{45, &JitArmIL::FallBackToInterpreter}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{36, &JitArmIL::FallBackToInterpreter}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{37, &JitArmIL::FallBackToInterpreter}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{38, &JitArmIL::FallBackToInterpreter}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{39, &JitArmIL::FallBackToInterpreter}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{46, &JitArmIL::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{47, &JitArmIL::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{48, &JitArmIL::FallBackToInterpreter}, //"lfs", OPTYPE_LOADFP, FL_IN_A}},
{49, &JitArmIL::FallBackToInterpreter}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{50, &JitArmIL::FallBackToInterpreter}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
{51, &JitArmIL::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{52, &JitArmIL::FallBackToInterpreter}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
{53, &JitArmIL::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{54, &JitArmIL::FallBackToInterpreter}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &JitArmIL::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{56, &JitArmIL::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}},
{57, &JitArmIL::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
{60, &JitArmIL::FallBackToInterpreter}, //"psq_st", OPTYPE_PS, FL_IN_A}},
{61, &JitArmIL::FallBackToInterpreter}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
//missing: 0, 5, 6, 9, 22, 30, 62, 58
{0, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
{5, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
{6, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
{9, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
{22, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
{30, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
{62, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
{58, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
};
static GekkoOPTemplate table4[] =
{ //SUBOP10
{0, &JitArmIL::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
{32, &JitArmIL::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
{40, &JitArmIL::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitArmIL::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitArmIL::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{64, &JitArmIL::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{72, &JitArmIL::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{96, &JitArmIL::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{528, &JitArmIL::FallBackToInterpreter}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitArmIL::FallBackToInterpreter}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
{592, &JitArmIL::FallBackToInterpreter}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
{624, &JitArmIL::FallBackToInterpreter}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}},
{1014, &JitArmIL::FallBackToInterpreter}, //"dcbz_l", OPTYPE_SYSTEM, 0}},
};
static GekkoOPTemplate table4_2[] =
{
{10, &JitArmIL::FallBackToInterpreter}, //"ps_sum0", OPTYPE_PS, 0}},
{11, &JitArmIL::FallBackToInterpreter}, //"ps_sum1", OPTYPE_PS, 0}},
{12, &JitArmIL::FallBackToInterpreter}, //"ps_muls0", OPTYPE_PS, 0}},
{13, &JitArmIL::FallBackToInterpreter}, //"ps_muls1", OPTYPE_PS, 0}},
{14, &JitArmIL::FallBackToInterpreter}, //"ps_madds0", OPTYPE_PS, 0}},
{15, &JitArmIL::FallBackToInterpreter}, //"ps_madds1", OPTYPE_PS, 0}},
{18, &JitArmIL::FallBackToInterpreter}, //"ps_div", OPTYPE_PS, 0, 16}},
{20, &JitArmIL::FallBackToInterpreter}, //"ps_sub", OPTYPE_PS, 0}},
{21, &JitArmIL::FallBackToInterpreter}, //"ps_add", OPTYPE_PS, 0}},
{23, &JitArmIL::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}},
{24, &JitArmIL::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
{25, &JitArmIL::FallBackToInterpreter}, //"ps_mul", OPTYPE_PS, 0}},
{26, &JitArmIL::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
{28, &JitArmIL::FallBackToInterpreter}, //"ps_msub", OPTYPE_PS, 0}},
{29, &JitArmIL::FallBackToInterpreter}, //"ps_madd", OPTYPE_PS, 0}},
{30, &JitArmIL::FallBackToInterpreter}, //"ps_nmsub", OPTYPE_PS, 0}},
{31, &JitArmIL::FallBackToInterpreter}, //"ps_nmadd", OPTYPE_PS, 0}},
};
static GekkoOPTemplate table4_3[] =
{
{6, &JitArmIL::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}},
{7, &JitArmIL::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}},
{38, &JitArmIL::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}},
{39, &JitArmIL::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}},
};
static GekkoOPTemplate table19[] =
{
{528, &JitArmIL::bcctrx}, //"bcctrx", OPTYPE_BRANCH, FL_ENDBLOCK}},
{16, &JitArmIL::bclrx}, //"bclrx", OPTYPE_BRANCH, FL_ENDBLOCK}},
{257, &JitArmIL::crXX}, //"crand", OPTYPE_CR, FL_EVIL}},
{129, &JitArmIL::crXX}, //"crandc", OPTYPE_CR, FL_EVIL}},
{289, &JitArmIL::crXX}, //"creqv", OPTYPE_CR, FL_EVIL}},
{225, &JitArmIL::crXX}, //"crnand", OPTYPE_CR, FL_EVIL}},
{33, &JitArmIL::crXX}, //"crnor", OPTYPE_CR, FL_EVIL}},
{449, &JitArmIL::crXX}, //"cror", OPTYPE_CR, FL_EVIL}},
{417, &JitArmIL::crXX}, //"crorc", OPTYPE_CR, FL_EVIL}},
{193, &JitArmIL::crXX}, //"crxor", OPTYPE_CR, FL_EVIL}},
{150, &JitArmIL::FallBackToInterpreter}, //"isync", OPTYPE_ICACHE, FL_EVIL}},
{0, &JitArmIL::FallBackToInterpreter}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}},
{50, &JitArmIL::rfi}, //"rfi", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 1}},
{18, &JitArmIL::Break}, //"rfid", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS}}
};
static GekkoOPTemplate table31[] =
{
{28, &JitArmIL::boolX}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{60, &JitArmIL::boolX}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{444, &JitArmIL::boolX}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{124, &JitArmIL::boolX}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{316, &JitArmIL::boolX}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{412, &JitArmIL::boolX}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{476, &JitArmIL::boolX}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{284, &JitArmIL::boolX}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{0, &JitArmIL::cmpXX}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{32, &JitArmIL::cmpXX}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{26, &JitArmIL::FallBackToInterpreter}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{922, &JitArmIL::FallBackToInterpreter}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{954, &JitArmIL::FallBackToInterpreter}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{536, &JitArmIL::FallBackToInterpreter}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{792, &JitArmIL::FallBackToInterpreter}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{824, &JitArmIL::FallBackToInterpreter}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{24, &JitArmIL::FallBackToInterpreter}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{54, &JitArmIL::FallBackToInterpreter}, //"dcbst", OPTYPE_DCACHE, 0, 4}},
{86, &JitArmIL::FallBackToInterpreter}, //"dcbf", OPTYPE_DCACHE, 0, 4}},
{246, &JitArmIL::FallBackToInterpreter}, //"dcbtst", OPTYPE_DCACHE, 0, 1}},
{278, &JitArmIL::FallBackToInterpreter}, //"dcbt", OPTYPE_DCACHE, 0, 1}},
{470, &JitArmIL::FallBackToInterpreter}, //"dcbi", OPTYPE_DCACHE, 0, 4}},
{758, &JitArmIL::FallBackToInterpreter}, //"dcba", OPTYPE_DCACHE, 0, 4}},
{1014, &JitArmIL::FallBackToInterpreter}, //"dcbz", OPTYPE_DCACHE, 0, 4}},
//load word
{23, &JitArmIL::FallBackToInterpreter}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{55, &JitArmIL::FallBackToInterpreter}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load halfword
{279, &JitArmIL::FallBackToInterpreter}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{311, &JitArmIL::FallBackToInterpreter}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load halfword signextend
{343, &JitArmIL::FallBackToInterpreter}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{375, &JitArmIL::FallBackToInterpreter}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load byte
{87, &JitArmIL::FallBackToInterpreter}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{119, &JitArmIL::FallBackToInterpreter}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load byte reverse
{534, &JitArmIL::FallBackToInterpreter}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{790, &JitArmIL::FallBackToInterpreter}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
// Conditional load/store (Wii SMP)
{150, &JitArmIL::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}},
{20, &JitArmIL::FallBackToInterpreter}, //"lwarx", OPTYPE_LOAD, FL_EVIL | FL_OUT_D | FL_IN_A0B | FL_SET_CR0}},
//load string (interpret these)
{533, &JitArmIL::FallBackToInterpreter}, //"lswx", OPTYPE_LOAD, FL_EVIL | FL_IN_A | FL_OUT_D}},
{597, &JitArmIL::FallBackToInterpreter}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}},
//store word
{151, &JitArmIL::FallBackToInterpreter}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{183, &JitArmIL::FallBackToInterpreter}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store halfword
{407, &JitArmIL::FallBackToInterpreter}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{439, &JitArmIL::FallBackToInterpreter}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store byte
{215, &JitArmIL::FallBackToInterpreter}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{247, &JitArmIL::FallBackToInterpreter}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store bytereverse
{662, &JitArmIL::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{918, &JitArmIL::FallBackToInterpreter}, //"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B}},
{661, &JitArmIL::FallBackToInterpreter}, //"stswx", OPTYPE_STORE, FL_EVIL}},
{725, &JitArmIL::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}},
// fp load/store
{535, &JitArmIL::FallBackToInterpreter}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{567, &JitArmIL::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{599, &JitArmIL::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{631, &JitArmIL::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{663, &JitArmIL::FallBackToInterpreter}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{695, &JitArmIL::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{727, &JitArmIL::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{759, &JitArmIL::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{983, &JitArmIL::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{19, &JitArmIL::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}},
{83, &JitArmIL::FallBackToInterpreter}, //"mfmsr", OPTYPE_SYSTEM, FL_OUT_D}},
{144, &JitArmIL::FallBackToInterpreter}, //"mtcrf", OPTYPE_SYSTEM, 0}},
{146, &JitArmIL::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}},
{210, &JitArmIL::FallBackToInterpreter}, //"mtsr", OPTYPE_SYSTEM, 0}},
{242, &JitArmIL::FallBackToInterpreter}, //"mtsrin", OPTYPE_SYSTEM, 0}},
{339, &JitArmIL::FallBackToInterpreter}, //"mfspr", OPTYPE_SPR, FL_OUT_D}},
{467, &JitArmIL::FallBackToInterpreter}, //"mtspr", OPTYPE_SPR, 0, 2}},
{371, &JitArmIL::FallBackToInterpreter}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}},
{512, &JitArmIL::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}},
{595, &JitArmIL::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}},
{659, &JitArmIL::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}},
{4, &JitArmIL::Break}, //"tw", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}},
{598, &JitArmIL::FallBackToInterpreter}, //"sync", OPTYPE_SYSTEM, 0, 2}},
{982, &JitArmIL::icbi}, //"icbi", OPTYPE_SYSTEM, FL_ENDBLOCK, 3}},
// Unused instructions on GC
{310, &JitArmIL::FallBackToInterpreter}, //"eciwx", OPTYPE_INTEGER, FL_RC_BIT}},
{438, &JitArmIL::FallBackToInterpreter}, //"ecowx", OPTYPE_INTEGER, FL_RC_BIT}},
{854, &JitArmIL::FallBackToInterpreter}, //"eieio", OPTYPE_INTEGER, FL_RC_BIT}},
{306, &JitArmIL::FallBackToInterpreter}, //"tlbie", OPTYPE_SYSTEM, 0}},
{370, &JitArmIL::FallBackToInterpreter}, //"tlbia", OPTYPE_SYSTEM, 0}},
{566, &JitArmIL::FallBackToInterpreter}, //"tlbsync", OPTYPE_SYSTEM, 0}},
};
static GekkoOPTemplate table31_2[] =
{
{266, &JitArmIL::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{778, &JitArmIL::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{10, &JitArmIL::FallBackToInterpreter}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{522, &JitArmIL::FallBackToInterpreter}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{138, &JitArmIL::FallBackToInterpreter}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{650, &JitArmIL::FallBackToInterpreter}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{234, &JitArmIL::FallBackToInterpreter}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{202, &JitArmIL::FallBackToInterpreter}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{491, &JitArmIL::FallBackToInterpreter}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{1003, &JitArmIL::FallBackToInterpreter}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{459, &JitArmIL::FallBackToInterpreter}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{971, &JitArmIL::FallBackToInterpreter}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{75, &JitArmIL::FallBackToInterpreter}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{11, &JitArmIL::FallBackToInterpreter}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{235, &JitArmIL::FallBackToInterpreter}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{747, &JitArmIL::FallBackToInterpreter}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}},
{104, &JitArmIL::FallBackToInterpreter}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{40, &JitArmIL::FallBackToInterpreter}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{552, &JitArmIL::FallBackToInterpreter}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{8, &JitArmIL::FallBackToInterpreter}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{520, &JitArmIL::FallBackToInterpreter}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{136, &JitArmIL::FallBackToInterpreter}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{232, &JitArmIL::FallBackToInterpreter}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{200, &JitArmIL::FallBackToInterpreter}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
};
static GekkoOPTemplate table59[] =
{
{18, &JitArmIL::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}},
{20, &JitArmIL::FallBackToInterpreter}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArmIL::FallBackToInterpreter}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}},
// {22, &JitArmIL::FallBackToInterpreter}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}},
{24, &JitArmIL::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArmIL::FallBackToInterpreter}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArmIL::FallBackToInterpreter}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArmIL::FallBackToInterpreter}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArmIL::FallBackToInterpreter}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArmIL::FallBackToInterpreter}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}},
};
static GekkoOPTemplate table63[] =
{
{264, &JitArmIL::FallBackToInterpreter}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{32, &JitArmIL::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}},
{0, &JitArmIL::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}},
{14, &JitArmIL::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}},
{15, &JitArmIL::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
{72, &JitArmIL::FallBackToInterpreter}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
{136, &JitArmIL::FallBackToInterpreter}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{40, &JitArmIL::FallBackToInterpreter}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
{12, &JitArmIL::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}},
{64, &JitArmIL::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}},
{583, &JitArmIL::FallBackToInterpreter}, //"mffsx", OPTYPE_SYSTEMFP, 0}},
{70, &JitArmIL::FallBackToInterpreter}, //"mtfsb0x", OPTYPE_SYSTEMFP, 0, 2}},
{38, &JitArmIL::FallBackToInterpreter}, //"mtfsb1x", OPTYPE_SYSTEMFP, 0, 2}},
{134, &JitArmIL::FallBackToInterpreter}, //"mtfsfix", OPTYPE_SYSTEMFP, 0, 2}},
{711, &JitArmIL::FallBackToInterpreter}, //"mtfsfx", OPTYPE_SYSTEMFP, 0, 2}},
};
static GekkoOPTemplate table63_2[] =
{
{18, &JitArmIL::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}},
{20, &JitArmIL::FallBackToInterpreter}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, &JitArmIL::FallBackToInterpreter}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}},
{22, &JitArmIL::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
{23, &JitArmIL::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitArmIL::FallBackToInterpreter}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
{26, &JitArmIL::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitArmIL::FallBackToInterpreter}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitArmIL::FallBackToInterpreter}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitArmIL::FallBackToInterpreter}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{31, &JitArmIL::FallBackToInterpreter}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
};
namespace JitArmILTables
{
void CompileInstruction(PPCAnalyst::CodeOp & op)
{
JitArmIL *jitarm = (JitArmIL *)jit;
(jitarm->*dynaOpTable[op.inst.OPCD])(op.inst);
GekkoOPInfo *info = op.opinfo;
if (info) {
#ifdef OPLOG
if (!strcmp(info->opname, OP_TO_LOG)){ ///"mcrfs"
rsplocations.push_back(jit.js.compilerPC);
}
#endif
info->compileCount++;
info->lastUse = jit->js.compilerPC;
}
}
void InitTables()
{
// once initialized, tables are read-only
static bool initialized = false;
if (initialized)
return;
//clear
for (int i = 0; i < 32; i++)
{
dynaOpTable59[i] = &JitArmIL::unknown_instruction;
}
for (int i = 0; i < 1024; i++)
{
dynaOpTable4 [i] = &JitArmIL::unknown_instruction;
dynaOpTable19[i] = &JitArmIL::unknown_instruction;
dynaOpTable31[i] = &JitArmIL::unknown_instruction;
dynaOpTable63[i] = &JitArmIL::unknown_instruction;
}
for (int i = 0; i < (int)(sizeof(primarytable) / sizeof(GekkoOPTemplate)); i++)
{
dynaOpTable[primarytable[i].opcode] = primarytable[i].Inst;
}
for (int i = 0; i < 32; i++)
{
int fill = i << 5;
for (int j = 0; j < (int)(sizeof(table4_2) / sizeof(GekkoOPTemplate)); j++)
{
int op = fill+table4_2[j].opcode;
dynaOpTable4[op] = table4_2[j].Inst;
}
}
for (int i = 0; i < 16; i++)
{
int fill = i << 6;
for (int j = 0; j < (int)(sizeof(table4_3) / sizeof(GekkoOPTemplate)); j++)
{
int op = fill+table4_3[j].opcode;
dynaOpTable4[op] = table4_3[j].Inst;
}
}
for (int i = 0; i < (int)(sizeof(table4) / sizeof(GekkoOPTemplate)); i++)
{
int op = table4[i].opcode;
dynaOpTable4[op] = table4[i].Inst;
}
for (int i = 0; i < (int)(sizeof(table31) / sizeof(GekkoOPTemplate)); i++)
{
int op = table31[i].opcode;
dynaOpTable31[op] = table31[i].Inst;
}
for (int i = 0; i < 1; i++)
{
int fill = i << 9;
for (int j = 0; j < (int)(sizeof(table31_2) / sizeof(GekkoOPTemplate)); j++)
{
int op = fill + table31_2[j].opcode;
dynaOpTable31[op] = table31_2[j].Inst;
}
}
for (int i = 0; i < (int)(sizeof(table19) / sizeof(GekkoOPTemplate)); i++)
{
int op = table19[i].opcode;
dynaOpTable19[op] = table19[i].Inst;
}
for (int i = 0; i < (int)(sizeof(table59) / sizeof(GekkoOPTemplate)); i++)
{
int op = table59[i].opcode;
dynaOpTable59[op] = table59[i].Inst;
}
for (int i = 0; i < (int)(sizeof(table63) / sizeof(GekkoOPTemplate)); i++)
{
int op = table63[i].opcode;
dynaOpTable63[op] = table63[i].Inst;
}
for (int i = 0; i < 32; i++)
{
int fill = i << 5;
for (int j = 0; j < (int)(sizeof(table63_2) / sizeof(GekkoOPTemplate)); j++)
{
int op = fill + table63_2[j].opcode;
dynaOpTable63[op] = table63_2[j].Inst;
}
}
initialized = true;
}
} // namespace

View file

@ -1,14 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#pragma once
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/PPCTables.h"
namespace JitArmILTables
{
void CompileInstruction(PPCAnalyst::CodeOp & op);
void InitTables();
}

View file

@ -356,6 +356,31 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) {
return getOp1(Op1);
}
}
if (Opcode == FastCRGTSet)
{
if (getOpcode(*Op1) == ICmpCRSigned)
return EmitICmpSgt(getOp1(Op1), getOp2(Op1));
if (getOpcode(*Op1) == ICmpCRUnsigned)
return EmitICmpUgt(getOp1(Op1), getOp2(Op1));
if (isImm(*Op1))
return EmitIntConst((s64)GetImmValue64(Op1) > 0);
}
if (Opcode == FastCRLTSet)
{
if (getOpcode(*Op1) == ICmpCRSigned)
return EmitICmpSlt(getOp1(Op1), getOp2(Op1));
if (getOpcode(*Op1) == ICmpCRUnsigned)
return EmitICmpUlt(getOp1(Op1), getOp2(Op1));
if (isImm(*Op1))
return EmitIntConst(!!(GetImmValue64(Op1) & (1ull << 62)));
}
if (Opcode == FastCREQSet)
{
if (getOpcode(*Op1) == ICmpCRSigned || getOpcode(*Op1) == ICmpCRUnsigned)
return EmitICmpEq(getOp1(Op1), getOp2(Op1));
if (isImm(*Op1))
return EmitIntConst((GetImmValue64(Op1) & 0xFFFFFFFFU) == 0);
}
return EmitUOp(Opcode, Op1, extra);
}
@ -778,6 +803,35 @@ InstLoc IRBuilder::FoldOr(InstLoc Op1, InstLoc Op2) {
return EmitBiOp(Or, Op1, Op2);
}
static unsigned ICmpInverseOp(unsigned op)
{
switch (op)
{
case ICmpEq:
return ICmpNe;
case ICmpNe:
return ICmpEq;
case ICmpUlt:
return ICmpUge;
case ICmpUgt:
return ICmpUle;
case ICmpUle:
return ICmpUgt;
case ICmpUge:
return ICmpUlt;
case ICmpSlt:
return ICmpSge;
case ICmpSgt:
return ICmpSle;
case ICmpSle:
return ICmpSgt;
case ICmpSge:
return ICmpSlt;
}
PanicAlert("Bad opcode");
return Nop;
}
InstLoc IRBuilder::FoldXor(InstLoc Op1, InstLoc Op2) {
simplifyCommutative(Xor, Op1, Op2);
@ -794,6 +848,11 @@ InstLoc IRBuilder::FoldXor(InstLoc Op1, InstLoc Op2) {
GetImmValue(getOp2(Op1));
return FoldXor(getOp1(Op1), EmitIntConst(RHS));
}
if (isICmp(getOpcode(*Op1)) && GetImmValue(Op2) == 1)
{
return FoldBiOp(ICmpInverseOp(getOpcode(*Op1)), getOp1(Op1), getOp2(Op1));
}
}
if (Op1 == Op2) return EmitIntConst(0);
@ -849,42 +908,6 @@ InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) {
return EmitBranchUncond(Op2);
return nullptr;
}
if (getOpcode(*Op1) == And &&
isImm(*getOp2(Op1)) &&
getOpcode(*getOp1(Op1)) == ICmpCRSigned) {
unsigned branchValue = GetImmValue(getOp2(Op1));
if (branchValue == 2)
return FoldBranchCond(EmitICmpEq(getOp1(getOp1(Op1)),
getOp2(getOp1(Op1))), Op2);
if (branchValue == 4)
return FoldBranchCond(EmitICmpSgt(getOp1(getOp1(Op1)),
getOp2(getOp1(Op1))), Op2);
if (branchValue == 8)
return FoldBranchCond(EmitICmpSlt(getOp1(getOp1(Op1)),
getOp2(getOp1(Op1))), Op2);
}
if (getOpcode(*Op1) == Xor &&
isImm(*getOp2(Op1))) {
InstLoc XOp1 = getOp1(Op1);
unsigned branchValue = GetImmValue(getOp2(Op1));
if (getOpcode(*XOp1) == And &&
isImm(*getOp2(XOp1)) &&
getOpcode(*getOp1(XOp1)) == ICmpCRSigned) {
unsigned innerBranchValue =
GetImmValue(getOp2(XOp1));
if (branchValue == innerBranchValue) {
if (branchValue == 2)
return FoldBranchCond(EmitICmpNe(getOp1(getOp1(XOp1)),
getOp2(getOp1(XOp1))), Op2);
if (branchValue == 4)
return FoldBranchCond(EmitICmpSle(getOp1(getOp1(XOp1)),
getOp2(getOp1(XOp1))), Op2);
if (branchValue == 8)
return FoldBranchCond(EmitICmpSge(getOp1(getOp1(XOp1)),
getOp2(getOp1(XOp1))), Op2);
}
}
}
return EmitBiOp(BranchCond, Op1, Op2);
}
@ -967,16 +990,8 @@ InstLoc IRBuilder::FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
InstLoc IRBuilder::FoldICmpCRSigned(InstLoc Op1, InstLoc Op2) {
if (isImm(*Op1)) {
if (isImm(*Op2)) {
int c1 = (int)GetImmValue(Op1),
c2 = (int)GetImmValue(Op2),
result;
if (c1 == c2)
result = 2;
else if (c1 > c2)
result = 4;
else
result = 8;
return EmitIntConst(result);
s64 diff = (s64)(s32)GetImmValue(Op1) - (s64)(s32)GetImmValue(Op2);
return EmitIntConst64((u64)diff);
}
}
return EmitBiOp(ICmpCRSigned, Op1, Op2);
@ -985,16 +1000,8 @@ InstLoc IRBuilder::FoldICmpCRSigned(InstLoc Op1, InstLoc Op2) {
InstLoc IRBuilder::FoldICmpCRUnsigned(InstLoc Op1, InstLoc Op2) {
if (isImm(*Op1)) {
if (isImm(*Op2)) {
unsigned int c1 = GetImmValue(Op1),
c2 = GetImmValue(Op2),
result;
if (c1 == c2)
result = 2;
else if (c1 > c2)
result = 4;
else
result = 8;
return EmitIntConst(result);
u64 diff = (u64)GetImmValue(Op1) - (u64)GetImmValue(Op2);
return EmitIntConst64(diff);
}
}
return EmitBiOp(ICmpCRUnsigned, Op1, Op2);
@ -1056,7 +1063,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned
}
}
InstLoc IRBuilder::EmitIntConst(unsigned value) {
InstLoc IRBuilder::EmitIntConst64(u64 value) {
InstLoc curIndex = InstList.data() + InstList.size();
InstList.push_back(CInt32 | ((unsigned int)ConstList.size() << 8));
MarkUsed.push_back(false);
@ -1064,7 +1071,7 @@ InstLoc IRBuilder::EmitIntConst(unsigned value) {
return curIndex;
}
unsigned IRBuilder::GetImmValue(InstLoc I) const {
u64 IRBuilder::GetImmValue64(InstLoc I) const {
return ConstList[*I >> 8];
}
@ -1129,9 +1136,9 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const {
numberOfOperands[CInt16] = 0;
numberOfOperands[CInt32] = 0;
static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, };
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, };
static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, };
static unsigned ZeroOp[] = { LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, };
static unsigned UOp[] = { StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR, FastCRSOSet, FastCREQSet, FastCRGTSet, FastCRLTSet, };
static unsigned BiOp[] = { BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, };
for (auto& op : ZeroOp) {
numberOfOperands[op] = 0;
}
@ -1235,10 +1242,12 @@ static std::unique_ptr<Writer> writer;
static const std::string opcodeNames[] = {
"Nop", "LoadGReg", "LoadLink", "LoadCR", "LoadCarry", "LoadCTR",
"LoadMSR", "LoadGQR", "SExt8", "SExt16", "BSwap32", "BSwap16", "Cntlzw",
"Not", "Load8", "Load16", "Load32", "BranchUncond", "StoreGReg",
"StoreCR", "StoreLink", "StoreCarry", "StoreCTR", "StoreMSR", "StoreFPRF",
"StoreGQR", "StoreSRR", "FallBackToInterpreter", "Add", "Mul", "And", "Or",
"Xor", "MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol",
"Not", "Load8", "Load16", "Load32", "BranchUncond", "ConvertFromFastCR",
"ConvertToFastCR", "StoreGReg", "StoreCR", "StoreLink", "StoreCarry",
"StoreCTR", "StoreMSR", "StoreFPRF", "StoreGQR", "StoreSRR",
"FastCRSOSet", "FastCREQSet", "FastCRGTSet", "FastCRLTSet",
"FallBackToInterpreter", "Add", "Mul", "And", "Or", "Xor",
"MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol",
"ICmpCRSigned", "ICmpCRUnsigned", "ICmpEq", "ICmpNe", "ICmpUgt",
"ICmpUlt", "ICmpUge", "ICmpUle", "ICmpSgt", "ICmpSlt", "ICmpSge",
"ICmpSle", "Store8", "Store16", "Store32", "BranchCond", "FResult_Start",

View file

@ -33,6 +33,9 @@ enum Opcode {
Load8, // These loads zext
Load16,
Load32,
// CR conversions
ConvertFromFastCR,
ConvertToFastCR,
// Branches
BranchUncond,
// Register store operators
@ -45,6 +48,11 @@ enum Opcode {
StoreFPRF,
StoreGQR,
StoreSRR,
// Branch conditions
FastCRSOSet,
FastCREQSet,
FastCRGTSet,
FastCRLTSet,
// Arbitrary interpreter instruction
FallBackToInterpreter,
@ -74,6 +82,7 @@ enum Opcode {
ICmpSlt,
ICmpSge,
ICmpSle, // Opposite of sgt
// Memory store operators
Store8,
Store16,
@ -237,7 +246,8 @@ private:
unsigned ComputeKnownZeroBits(InstLoc I) const;
public:
InstLoc EmitIntConst(unsigned value);
InstLoc EmitIntConst(unsigned value) { return EmitIntConst64(value); }
InstLoc EmitIntConst64(u64 value);
InstLoc EmitStoreLink(InstLoc val) {
return FoldUOp(StoreLink, val);
}
@ -373,6 +383,24 @@ public:
InstLoc EmitICmpCRUnsigned(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpCRUnsigned, op1, op2);
}
InstLoc EmitConvertFromFastCR(InstLoc op1) {
return FoldUOp(ConvertFromFastCR, op1);
}
InstLoc EmitConvertToFastCR(InstLoc op1) {
return FoldUOp(ConvertToFastCR, op1);
}
InstLoc EmitFastCRSOSet(InstLoc op1) {
return FoldUOp(FastCRSOSet, op1);
}
InstLoc EmitFastCREQSet(InstLoc op1) {
return FoldUOp(FastCREQSet, op1);
}
InstLoc EmitFastCRLTSet(InstLoc op1) {
return FoldUOp(FastCRLTSet, op1);
}
InstLoc EmitFastCRGTSet(InstLoc op1) {
return FoldUOp(FastCRGTSet, op1);
}
InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) {
return FoldBiOp(FallBackToInterpreter, op1, op2);
}
@ -532,7 +560,8 @@ public:
InstLoc getFirstInst() { return InstList.data(); }
unsigned int getNumInsts() { return (unsigned int)InstList.size(); }
unsigned int ReadInst(InstLoc I) { return *I; }
unsigned int GetImmValue(InstLoc I) const;
unsigned int GetImmValue(InstLoc I) const { return (u32)GetImmValue64(I); }
u64 GetImmValue64(InstLoc I) const;
void SetMarkUsed(InstLoc I);
bool IsMarkUsed(InstLoc I) const;
void WriteToFile(u64 codeHash);
@ -571,7 +600,7 @@ private:
std::vector<Inst> InstList; // FIXME: We must ensure this is continuous!
std::vector<bool> MarkUsed; // Used for IRWriter
std::vector<unsigned> ConstList;
std::vector<u64> ConstList;
InstLoc curReadPtr;
InstLoc GRegCache[32];
InstLoc GRegCacheStore[32];

View file

@ -61,15 +61,35 @@ void JitILBase::bx(UGeckoInstruction inst)
ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination));
}
static IREmitter::InstLoc EmitCRTest(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst)
{
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
IREmitter::InstLoc CRTest;
switch (3 - (inst.BI & 3))
{
case CR_SO_BIT:
CRTest = ibuild.EmitFastCRSOSet(CRReg);
break;
case CR_EQ_BIT:
CRTest = ibuild.EmitFastCREQSet(CRReg);
break;
case CR_GT_BIT:
CRTest = ibuild.EmitFastCRGTSet(CRReg);
break;
case CR_LT_BIT:
CRTest = ibuild.EmitFastCRLTSet(CRReg);
break;
}
if (!(inst.BO & 8))
CRTest = ibuild.EmitXor(CRTest, ibuild.EmitIntConst(1));
return CRTest;
}
static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) {
IREmitter::InstLoc CRTest = nullptr, CTRTest = nullptr;
if ((inst.BO & 16) == 0) // Test a CR bit
{
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3));
CRTest = ibuild.EmitAnd(CRReg, CRCmp);
if (!(inst.BO & 8))
CRTest = ibuild.EmitXor(CRCmp, CRTest);
CRTest = EmitCRTest(ibuild, inst);
}
if ((inst.BO & 4) == 0) {
@ -140,12 +160,10 @@ void JitILBase::bcctrx(UGeckoInstruction inst)
IREmitter::InstLoc test;
if ((inst.BO & 16) == 0) // Test a CR bit
{
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3));
test = ibuild.EmitAnd(CRReg, CRCmp);
if (!(inst.BO & 8))
test = ibuild.EmitXor(test, CRCmp);
} else {
test = EmitCRTest(ibuild, inst);
}
else
{
test = ibuild.EmitIntConst(1);
}
test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0));

View file

@ -86,7 +86,7 @@ void JitILBase::fcmpx(UGeckoInstruction inst)
int ordered = (inst.SUBOP10 == 32) ? 1 : 0;
res = ibuild.EmitFDCmpCR(lhs, rhs, ordered);
ibuild.EmitStoreFPRF(res);
ibuild.EmitStoreCR(res, inst.CRFD);
ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD);
}
void JitILBase::fsign(UGeckoInstruction inst)

View file

@ -107,8 +107,10 @@ void JitILBase::mfcr(UGeckoInstruction inst)
IREmitter::InstLoc d = ibuild.EmitIntConst(0);
for (int i = 0; i < 8; ++i)
{
d = ibuild.EmitShl(d, ibuild.EmitIntConst(4));
d = ibuild.EmitOr(d, ibuild.EmitLoadCR(i));
IREmitter::InstLoc cr = ibuild.EmitLoadCR(i);
cr = ibuild.EmitConvertFromFastCR(cr);
cr = ibuild.EmitShl(cr, ibuild.EmitIntConst(28 - 4 * i));
d = ibuild.EmitOr(d, cr);
}
ibuild.EmitStoreGReg(d, inst.RD);
}
@ -126,6 +128,7 @@ void JitILBase::mtcrf(UGeckoInstruction inst)
IREmitter::InstLoc value;
value = ibuild.EmitShrl(s, ibuild.EmitIntConst(28 - i * 4));
value = ibuild.EmitAnd(value, ibuild.EmitIntConst(0xF));
value = ibuild.EmitConvertToFastCR(value);
ibuild.EmitStoreCR(value, i);
}
}
@ -150,6 +153,7 @@ void JitILBase::crXX(UGeckoInstruction inst)
// Get bit CRBA in EAX aligned with bit CRBD
const int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3);
IREmitter::InstLoc eax = ibuild.EmitLoadCR(inst.CRBA >> 2);
eax = ibuild.EmitConvertFromFastCR(eax);
if (shiftA < 0)
eax = ibuild.EmitShl(eax, ibuild.EmitIntConst(-shiftA));
else if (shiftA > 0)
@ -158,6 +162,7 @@ void JitILBase::crXX(UGeckoInstruction inst)
// Get bit CRBB in ECX aligned with bit CRBD
const int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3);
IREmitter::InstLoc ecx = ibuild.EmitLoadCR(inst.CRBB >> 2);
ecx = ibuild.EmitConvertFromFastCR(ecx);
if (shiftB < 0)
ecx = ibuild.EmitShl(ecx, ibuild.EmitIntConst(-shiftB));
else if (shiftB > 0)
@ -211,7 +216,9 @@ void JitILBase::crXX(UGeckoInstruction inst)
// Store result bit in CRBD
eax = ibuild.EmitAnd(eax, ibuild.EmitIntConst(0x8 >> (inst.CRBD & 3)));
IREmitter::InstLoc bd = ibuild.EmitLoadCR(inst.CRBD >> 2);
bd = ibuild.EmitConvertFromFastCR(bd);
bd = ibuild.EmitAnd(bd, ibuild.EmitIntConst(~(0x8 >> (inst.CRBD & 3))));
bd = ibuild.EmitOr(bd, eax);
bd = ibuild.EmitConvertToFastCR(bd);
ibuild.EmitStoreCR(bd, inst.CRBD >> 2);
}

View file

@ -27,8 +27,6 @@
#if _M_ARM_32
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArm_Tables.h"
#include "Core/PowerPC/JitArmIL/JitIL.h"
#include "Core/PowerPC/JitArmIL/JitIL_Tables.h"
#endif
static bool bFakeVMEM = false;
@ -67,11 +65,6 @@ namespace JitInterface
ptr = new JitArm();
break;
}
case 4:
{
ptr = new JitArmIL();
break;
}
#endif
default:
{
@ -106,11 +99,6 @@ namespace JitInterface
JitArmTables::InitTables();
break;
}
case 4:
{
JitArmILTables::InitTables();
break;
}
#endif
default:
{

View file

@ -41,10 +41,10 @@ PPCDebugInterface debug_interface;
u32 CompactCR()
{
u32 new_cr = ppcState.cr_fast[0] << 28;
for (int i = 1; i < 8; i++)
u32 new_cr = 0;
for (int i = 0; i < 8; i++)
{
new_cr |= ppcState.cr_fast[i] << (28 - i * 4);
new_cr |= GetCRField(i) << (28 - i * 4);
}
return new_cr;
}
@ -53,7 +53,7 @@ void ExpandCR(u32 cr)
{
for (int i = 0; i < 8; i++)
{
ppcState.cr_fast[i] = (cr >> (28 - i * 4)) & 0xF;
SetCRField(i, (cr >> (28 - i * 4)) & 0xF);
}
}
@ -99,7 +99,8 @@ static void ResetRegisters()
ppcState.pc = 0;
ppcState.npc = 0;
ppcState.Exceptions = 0;
((u64*)(&ppcState.cr_fast[0]))[0] = 0;
for (auto& v : ppcState.cr_val)
v = 0x8000000000000001;
TL = 0;
TU = 0;

View file

@ -38,7 +38,20 @@ struct GC_ALIGNED64(PowerPCState)
u32 pc; // program counter
u32 npc;
u8 cr_fast[8]; // Possibly reorder to 0, 2, 4, 8, 1, 3, 5, 7 so that we can make Compact and Expand super fast?
// Optimized CR implementation. Instead of storing CR in its PowerPC format
// (4 bit value, SO/EQ/LT/GT), we store instead a 64 bit value for each of
// the 8 CR register parts. This 64 bit value follows this format:
// - SO iff. bit 61 is set
// - EQ iff. lower 32 bits == 0
// - GT iff. (s64)cr_val > 0
// - LT iff. bit 62 is set
//
// This has the interesting property that sign-extending the result of an
// operation from 32 to 64 bits results in a 64 bit value that works as a
// CR value. Checking each part of CR is also fast, as it is equivalent to
// testing one bit or the low 32 bit part of a register. And CR can still
// be manipulated bit by bit fairly easily.
u64 cr_val[8];
u32 msr; // machine specific register
u32 fpscr; // floating point flags/status bits
@ -149,27 +162,63 @@ void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst);
} // namespace
// Fast CR system - store them in single bytes instead of nibbles to not have to
// mask/shift them out.
enum CRBits
{
CR_SO = 1,
CR_EQ = 2,
CR_GT = 4,
CR_LT = 8,
// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all.
CR_SO_BIT = 0,
CR_EQ_BIT = 1,
CR_GT_BIT = 2,
CR_LT_BIT = 3,
};
// Convert between PPC and internal representation of CR.
inline u64 PPCCRToInternal(u8 value)
{
u64 cr_val = 0x100000000;
cr_val |= (u64)!!(value & CR_SO) << 61;
cr_val |= (u64)!(value & CR_EQ);
cr_val |= (u64)!(value & CR_GT) << 63;
cr_val |= (u64)!!(value & CR_LT) << 62;
return cr_val;
}
// Warning: these CR operations are fairly slow since they need to convert from
// PowerPC format (4 bit) to our internal 64 bit format. See the definition of
// ppcState.cr_val for more explanations.
inline void SetCRField(int cr_field, int value) {
PowerPC::ppcState.cr_fast[cr_field] = value;
PowerPC::ppcState.cr_val[cr_field] = PPCCRToInternal(value);
}
inline u32 GetCRField(int cr_field) {
return PowerPC::ppcState.cr_fast[cr_field];
u64 cr_val = PowerPC::ppcState.cr_val[cr_field];
u32 ppc_cr = 0;
// SO
ppc_cr |= !!(cr_val & (1ull << 61));
// EQ
ppc_cr |= ((cr_val & 0xFFFFFFFF) == 0) << 1;
// GT
ppc_cr |= ((s64)cr_val > 0) << 2;
// LT
ppc_cr |= !!(cr_val & (1ull << 62)) << 3;
return ppc_cr;
}
inline u32 GetCRBit(int bit) {
return (PowerPC::ppcState.cr_fast[bit >> 2] >> (3 - (bit & 3))) & 1;
return (GetCRField(bit >> 2) >> (3 - (bit & 3))) & 1;
}
inline void SetCRBit(int bit, int value) {
if (value & 1)
PowerPC::ppcState.cr_fast[bit >> 2] |= 0x8 >> (bit & 3);
SetCRField(bit >> 2, GetCRField(bit >> 2) | (0x8 >> (bit & 3)));
else
PowerPC::ppcState.cr_fast[bit >> 2] &= ~(0x8 >> (bit & 3));
SetCRField(bit >> 2, GetCRField(bit >> 2) & ~(0x8 >> (bit & 3)));
}
// SetCR and GetCR are fairly slow. Should be avoided if possible.