From 9b9817f9272eea1cceeeef54b112664daa56bba9 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Thu, 29 May 2014 19:32:12 +0200 Subject: [PATCH 01/11] x64Emitter: Fix REX encoding for SETcc Previously using the new "lower 8 bits" registers (SIL, SPL, ...) caused SETcc to write to other registers (for example, SETcc SIL would generate SETcc DH). --- Source/Core/Common/x64Emitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index b4ec2bc331..5d3718e5bd 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -721,7 +721,7 @@ void XEmitter::SETcc(CCFlags flag, OpArg dest) { if (dest.IsImm()) _assert_msg_(DYNA_REC, 0, "SETcc - Imm argument"); dest.operandReg = 0; - dest.WriteRex(this, 0, 0); + dest.WriteRex(this, 0, 8); Write8(0x0F); Write8(0x90 + (u8)flag); dest.WriteRest(this); From 0ff1481494562025696f99e4b136bdd9e0b18423 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Sat, 31 May 2014 00:43:52 +0200 Subject: [PATCH 02/11] Optimize PPC CR emulation by using magic 64 bit values PowerPC has a 32 bit CR register, which is used to store flags for results of computations. Most instructions have an optional bit that tells the CPU whether the flags should be updated. This 32 bit register actually contains 8 sets of 4 flags: Summary Overflow (SO), Equals (EQ), Greater Than (GT), Less Than (LT). These 8 sets are usually called CR0-CR7 and accessed independently. In the most common operations, the flags are computed from the result of the operation in the following fashion: * EQ is set iff result == 0 * LT is set iff result < 0 * GT is set iff result > 0 * (Dolphin does not emulate SO) While X86 architectures have a similar concept of flags, it is very difficult to access the FLAGS register directly to translate its value to an equivalent PowerPC value. With the current Dolphin implementation, updating a PPC CR register requires CPU branching, which has a few performance issues: it uses space in the BTB, and in the worst case (!GT, !LT, EQ) requires 2 branches not taken. After some brainstorming on IRC about how this could be improved, calc84maniac figured out a neat trick that makes common CR operations way more efficient to JIT on 64 bit X86 architectures. It relies on emulating each CRn bitfield with a 64 bit register internally, whose value is the result of the operation from which flags are updated, sign extended to 64 bits. Then, checking if a CR bit is set can be done in the following way: * EQ is set iff LOWER_32_BITS(cr_64b_val) == 0 * GT is set iff (s64)cr_64b_val > 0 * LT is set iff bit 62 of cr_64b_val is set To take a few examples, if the result of an operation is: * -1 (0xFFFFFFFFFFFFFFFF) -> lower 32 bits not 0 => !EQ -> (s64)val (-1) is not > 0 => !GT -> bit 62 is set => LT !EQ, !GT, LT * 0 (0x0000000000000000) -> lower 32 bits are 0 => EQ -> (s64)val (0) is not > 0 => !GT -> bit 62 is not set => !LT EQ, !GT, !LT * 1 (0x0000000000000001) -> lower 32 bits not 0 => !EQ -> (s64)val (1) is > 0 => GT -> bit 62 is not set => !LT !EQ, GT, !LT Sometimes we need to convert PPC CR values to these 64 bit values. The following convention is used in this case: * Bit 0 (LSB) is set iff !EQ * Bit 62 is set iff LT * Bit 63 is set iff !GT * Bit 32 always set to disambiguize between EQ and GT Some more examples: * !EQ, GT, LT -> 0x4000000100000001 (!B63, B62, B32, B0) -> lower 32 bits not 0 => !EQ -> (s64)val is > 0 => GT -> bit 62 is set => LT * EQ, GT, !LT -> 0x0000000100000000 -> lower 32 bits are 0 => EQ -> (s64)val is > 0 (note: B32) => GT -> bit 62 is not set => !LT --- .../Core/PowerPC/Interpreter/Interpreter.cpp | 2 +- .../Interpreter/Interpreter_Integer.cpp | 34 +- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 6 +- Source/Core/Core/PowerPC/Jit64/Jit.h | 10 + Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp | 24 +- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 24 +- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 352 +++++------------- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 296 ++++++++++++--- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 6 +- Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp | 6 +- Source/Core/Core/PowerPC/PowerPC.cpp | 11 +- Source/Core/Core/PowerPC/PowerPC.h | 58 ++- 12 files changed, 450 insertions(+), 379 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp index a82d7bcc82..d1c03420fc 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp @@ -84,7 +84,7 @@ static void Trace(UGeckoInstruction& instCode) char ppcInst[256]; DisassembleGekko(instCode.hex, PC, ppcInst, 256); - DEBUG_LOG(POWERPC, "INTER PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s %08x %s", PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str(), instCode.hex, ppcInst); + DEBUG_LOG(POWERPC, "INTER PC: %08x SRR0: %08x SRR1: %08x CRval: %016lx FPSCR: %08x MSR: %08x LR: %08x %s %08x %s", PC, SRR0, SRR1, PowerPC::ppcState.cr_val[0], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), instCode.hex, ppcInst); } int Interpreter::SingleStepInner(void) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp index c166f107f2..0fa0f8f4a8 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp @@ -4,32 +4,22 @@ #include "Core/PowerPC/Interpreter/Interpreter.h" -void Interpreter::Helper_UpdateCR0(u32 _uValue) +void Interpreter::Helper_UpdateCR0(u32 value) { - u32 new_cr0; - int sValue = (int)_uValue; - if (sValue > 0) - new_cr0 = 0x4; - else if (sValue < 0) - new_cr0 = 0x8; - else - new_cr0 = 0x2; - new_cr0 |= GetXER_SO(); - SetCRField(0, new_cr0); + s64 sign_extended = (s64)(s32)value; + u64 cr_val = (u64)sign_extended; + cr_val = (cr_val & ~(1ull << 61)) | ((u64)GetXER_SO() << 61); + + PowerPC::ppcState.cr_val[0] = cr_val; } -void Interpreter::Helper_UpdateCRx(int _x, u32 _uValue) +void Interpreter::Helper_UpdateCRx(int idx, u32 value) { - u32 new_crX; - int sValue = (int)_uValue; - if (sValue > 0) - new_crX = 0x4; - else if (sValue < 0) - new_crX = 0x8; - else - new_crX = 0x2; - new_crX |= GetXER_SO(); - SetCRField(_x, new_crX); + s64 sign_extended = (s64)(s32)value; + u64 cr_val = (u64)sign_extended; + cr_val = (cr_val & ~(1ull << 61)) | ((u64)GetXER_SO() << 61); + + PowerPC::ppcState.cr_val[idx] = cr_val; } u32 Interpreter::Helper_Carry(u32 _uValue1, u32 _uValue2) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 837b3c4be9..ce0c36b6a5 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -377,10 +377,8 @@ void Jit64::Trace() } #endif - DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s", - PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], - PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, - PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); + DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x FPSCR: %08x MSR: %08x LR: %08x %s %s", + PC, SRR0, SRR1, PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); } void STACKALIGN Jit64::Jit(u32 em_address) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 8c5056c372..ab25a88d6b 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -106,6 +106,16 @@ public: void GenerateRC(); void ComputeRC(const Gen::OpArg & arg); + // Reads a given bit of a given CR register part. Clobbers ABI_PARAM1, + // don't forget to xlock it before. + void GetCRFieldBit(int field, int bit, Gen::X64Reg out); + // Clobbers ABI_PARAM1 and ABI_PARAM2, xlock them before. + void SetCRFieldBit(int field, int bit, Gen::X64Reg in); + + // Generates a branch that will check if a given bit of a CR register part + // is set or not. + FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true); + void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)); typedef u32 (*Operation)(u32 a, u32 b); void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 4b4e9ab0e6..135e0c4f3c 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -117,11 +117,8 @@ void Jit64::bcx(UGeckoInstruction inst) FixupBranch pConditionDontBranch; if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit { - TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = J_CC(CC_Z, true); - else - pConditionDontBranch = J_CC(CC_NZ, true); + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } if (inst.LK) @@ -179,14 +176,8 @@ void Jit64::bcctrx(UGeckoInstruction inst) // BO_2 == 001zy -> b if false // BO_2 == 011zy -> b if true - // Ripped from bclrx - TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - Gen::CCFlags branch; - if (inst.BO_2 & BO_BRANCH_IF_TRUE) - branch = CC_Z; - else - branch = CC_NZ; - FixupBranch b = J_CC(branch, true); + FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); MOV(32, R(EAX), M(&CTR)); AND(32, R(EAX), Imm32(0xFFFFFFFC)); //MOV(32, M(&PC), R(EAX)); => Already done in WriteExitDestInEAX() @@ -222,11 +213,8 @@ void Jit64::bclrx(UGeckoInstruction inst) FixupBranch pConditionDontBranch; if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit { - TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = J_CC(CC_Z, true); - else - pConditionDontBranch = J_CC(CC_NZ, true); + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } // This below line can be used to prove that blr "eats flags" in practice. diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 648da27d26..f2c1834f8a 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -237,26 +237,33 @@ void Jit64::fcmpx(UGeckoInstruction inst) pGreater = J_CC(CC_B); } - // Equal - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); + // Read the documentation about cr_val in PowerPC.h to understand these + // magic values. + + // Equal: !GT (bit 63 set), !LT (bit 62 not set), !SO (bit 61 not set), EQ + // (bits 31-0 not set). + MOV(64, R(RAX), Imm64(0x8000000000000000)); continue1 = J(); - // NAN + // NAN: !GT (bit 63 set), !LT (bit 62 not set), SO (bit 61 set), !EQ (bit 0 + // set). SetJumpTarget(pNaN); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x1)); + MOV(64, R(RAX), Imm64(0xA000000000000001)); if (a != b) { continue2 = J(); - // Greater Than + // Greater Than: GT (bit 63 not set), !LT (bit 62 not set), !SO (bit 61 + // not set), !EQ (bit 0 set). SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); + MOV(64, R(RAX), Imm64(0x0000000000000001)); continue3 = J(); - // Less Than + // Less Than: !GT (bit 63 set), LT (bit 62 set), !SO (bit 61 not set), + // !EQ (bit 0 set). SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); + MOV(64, R(RAX), Imm64(0xC000000000000001)); } SetJumpTarget(continue1); @@ -266,6 +273,7 @@ void Jit64::fcmpx(UGeckoInstruction inst) SetJumpTarget(continue3); } + MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); fpr.UnlockAll(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 67c5e96921..9eb8d94cb0 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -116,57 +116,17 @@ void Jit64::GenerateCarry() SetJumpTarget(pContinue); } -// Assumes that Sign and Zero flags were set by the last operation. Preserves all flags and registers. -void Jit64::GenerateRC() -{ - FixupBranch pZero = J_CC(CC_Z); - FixupBranch pNegative = J_CC(CC_S); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // Result > 0 - FixupBranch continue1 = J(); - - SetJumpTarget(pNegative); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // Result < 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pZero); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // Result == 0 - - SetJumpTarget(continue1); - SetJumpTarget(continue2); -} - void Jit64::ComputeRC(const Gen::OpArg & arg) { if (arg.IsImm()) { - s32 value = (s32)arg.offset; - if (value < 0) - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); - else if (value > 0) - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); - else - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); + MOV(32, R(EAX), Imm32((s32)arg.offset)); + MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX)); } else { - if (arg.IsSimpleReg()) - TEST(32, arg, arg); - else - CMP(32, arg, Imm8(0)); - FixupBranch pLesser = J_CC(CC_L); - FixupBranch pGreater = J_CC(CC_G); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // _x86Reg == 0 - FixupBranch continue1 = J(); - - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // _x86Reg > 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // _x86Reg < 0 - - SetJumpTarget(continue1); - SetJumpTarget(continue2); + MOVSX(64, 32, RAX, arg); + MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX)); } } @@ -192,26 +152,20 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void { gpr.KillImmediate(d, true, true); (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; - if (Rc) - { - // All of the possible passed operators affect Sign/Zero flags - GenerateRC(); - } if (carry) GenerateCarry(); + if (Rc) + ComputeRC(gpr.R(d)); } else { gpr.BindToRegister(d, false); MOV(32, gpr.R(d), gpr.R(a)); (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; - if (Rc) - { - // All of the possible passed operators affect Sign/Zero flags - GenerateRC(); - } if (carry) GenerateCarry(); + if (Rc) + ComputeRC(gpr.R(d)); } } else if (doop == Add) @@ -219,9 +173,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void // a == 0, which for these instructions imply value = 0 gpr.SetImmediate32(d, value); if (Rc) - { ComputeRC(gpr.R(d)); - } } else { @@ -378,7 +330,8 @@ void Jit64::cmpXX(UGeckoInstruction inst) else compareResult = 0x8; } - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(compareResult)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(compareResult))); + MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); gpr.UnlockAll(); if (merge_branch) @@ -436,71 +389,58 @@ void Jit64::cmpXX(UGeckoInstruction inst) } else { - Gen::CCFlags less_than, greater_than; if (signedCompare) { - less_than = CC_L; - greater_than = CC_G; + if (gpr.R(a).IsImm()) + MOV(64, R(RAX), gpr.R(a)); + else + MOVSX(64, 32, RAX, gpr.R(a)); + if (!comparand.IsImm()) + { + MOVSX(64, 32, ABI_PARAM1, comparand); + comparand = R(ABI_PARAM1); + } } else { - less_than = CC_B; - greater_than = CC_A; - } + if (gpr.R(a).IsImm()) + MOV(32, R(RAX), gpr.R(a)); + else + MOVZX(64, 32, RAX, gpr.R(a)); - if (gpr.R(a).IsImm() || (!gpr.R(a).IsSimpleReg() && !comparand.IsImm() && !comparand.IsSimpleReg())) - { - // Syntax for CMP is invalid with such arguments. We must load RA in a register. - gpr.BindToRegister(a, true, false); + if (comparand.IsImm()) + MOV(32, R(ABI_PARAM1), comparand); + else + MOVZX(64, 32, ABI_PARAM1, comparand); + comparand = R(ABI_PARAM1); } - CMP(32, gpr.R(a), comparand); - gpr.UnlockAll(); + SUB(64, R(RAX), comparand); + MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); - if (!merge_branch) - { - // Keep the normal code separate for clarity. - - FixupBranch pLesser = J_CC(less_than); - FixupBranch pGreater = J_CC(greater_than); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0 - FixupBranch continue1 = J(); - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0 - FixupBranch continue2 = J(); - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0 - SetJumpTarget(continue1); - SetJumpTarget(continue2); - // TODO: If we ever care about SO, borrow a trick from - // http://maws.mameworld.info/maws/mamesrc/src/emu/cpu/powerpc/drc_ops.c : bt, adc - } - else + if (merge_branch) { js.downcountAmount++; int test_bit = 8 >> (js.next_inst.BI & 3); - bool condition = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? false : true; + bool condition = js.next_inst.BO & BO_BRANCH_IF_TRUE; // Test swapping (in the future, will be used to inline across branches the right way) // if (rand() & 1) // std::swap(destination1, destination2), condition = !condition; + gpr.UnlockAll(); gpr.Flush(); fpr.Flush(); - FixupBranch pLesser = J_CC(less_than); - FixupBranch pGreater = J_CC(greater_than); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // == 0 - FixupBranch continue1 = J(); + FixupBranch pDontBranch; + if (test_bit & 8) + pDontBranch = J_CC(condition ? CC_GE : CC_L); // Test < 0, so jump over if >= 0. + else if (test_bit & 4) + pDontBranch = J_CC(condition ? CC_LE : CC_G); // Test > 0, so jump over if <= 0. + else if (test_bit & 2) + pDontBranch = J_CC(condition ? CC_NE : CC_E); // Test = 0, so jump over if != 0. + else // SO bit, do not branch (we don't emulate SO for cmp). + pDontBranch = J(); - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // > 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // < 0 - FixupBranch continue3; - if (!!(8 & test_bit) == condition) continue3 = J(); - if (!!(4 & test_bit) != condition) SetJumpTarget(continue2); - if (!!(2 & test_bit) != condition) SetJumpTarget(continue1); + // Code that handles successful PPC branching. if (js.next_inst.OPCD == 16) // bcx { if (js.next_inst.LK) @@ -534,9 +474,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) PanicAlert("WTF invalid branch"); } - if (!!(8 & test_bit) == condition) SetJumpTarget(continue3); - if (!!(4 & test_bit) == condition) SetJumpTarget(continue2); - if (!!(2 & test_bit) == condition) SetJumpTarget(continue1); + SetJumpTarget(pDontBranch); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) { @@ -619,9 +557,7 @@ void Jit64::boolX(UGeckoInstruction inst) PanicAlert("WTF!"); } if (inst.Rc) - { ComputeRC(gpr.R(a)); - } } else if ((a == s) || (a == b)) { @@ -632,19 +568,11 @@ void Jit64::boolX(UGeckoInstruction inst) if (inst.SUBOP10 == 28) /* andx */ { AND(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 476) /* nandx */ { AND(32, gpr.R(a), operand); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 60) /* andcx */ { @@ -659,27 +587,15 @@ void Jit64::boolX(UGeckoInstruction inst) NOT(32, R(EAX)); AND(32, gpr.R(a), R(EAX)); } - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 444) /* orx */ { OR(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 124) /* norx */ { OR(32, gpr.R(a), operand); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 412) /* orcx */ { @@ -694,32 +610,22 @@ void Jit64::boolX(UGeckoInstruction inst) NOT(32, R(EAX)); OR(32, gpr.R(a), R(EAX)); } - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 316) /* xorx */ { XOR(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 284) /* eqvx */ { NOT(32, gpr.R(a)); XOR(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else { PanicAlert("WTF"); } + if (inst.Rc) + ComputeRC(gpr.R(a)); gpr.UnlockAll(); } else @@ -731,83 +637,53 @@ void Jit64::boolX(UGeckoInstruction inst) { MOV(32, gpr.R(a), gpr.R(s)); AND(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 476) /* nandx */ { MOV(32, gpr.R(a), gpr.R(s)); AND(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 60) /* andcx */ { MOV(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); AND(32, gpr.R(a), gpr.R(s)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 444) /* orx */ { MOV(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 124) /* norx */ { MOV(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 412) /* orcx */ { MOV(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); OR(32, gpr.R(a), gpr.R(s)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 316) /* xorx */ { MOV(32, gpr.R(a), gpr.R(s)); XOR(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 284) /* eqvx */ { MOV(32, gpr.R(a), gpr.R(s)); NOT(32, gpr.R(a)); XOR(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else { PanicAlert("WTF!"); } + if (inst.Rc) + ComputeRC(gpr.R(a)); gpr.UnlockAll(); } } @@ -943,9 +819,8 @@ void Jit64::subfcx(UGeckoInstruction inst) MOV(32, gpr.R(d), gpr.R(b)); SUB(32, gpr.R(d), gpr.R(a)); } - if (inst.Rc) { - GenerateRC(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); FinalizeCarryOverflow(inst.OE, true); gpr.UnlockAll(); @@ -980,10 +855,9 @@ void Jit64::subfex(UGeckoInstruction inst) NOT(32, gpr.R(d)); ADC(32, gpr.R(d), gpr.R(b)); } - if (inst.Rc) { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE, invertedCarry); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } @@ -1004,11 +878,9 @@ void Jit64::subfmex(UGeckoInstruction inst) } NOT(32, gpr.R(d)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } @@ -1029,11 +901,9 @@ void Jit64::subfzex(UGeckoInstruction inst) } NOT(32, gpr.R(d)); ADC(32, gpr.R(d), Imm8(0)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } @@ -1076,14 +946,10 @@ void Jit64::subfx(UGeckoInstruction inst) MOV(32, gpr.R(d), gpr.R(b)); SUB(32, gpr.R(d), gpr.R(a)); } - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1505,14 +1371,10 @@ void Jit64::addx(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, true); ADD(32, gpr.R(d), gpr.R(operand)); - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1521,14 +1383,10 @@ void Jit64::addx(UGeckoInstruction inst) gpr.BindToRegister(d, false); MOV(32, gpr.R(d), gpr.R(a)); ADD(32, gpr.R(d), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1547,11 +1405,9 @@ void Jit64::addex(UGeckoInstruction inst) GetCarryEAXAndClear(); ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1562,11 +1418,9 @@ void Jit64::addex(UGeckoInstruction inst) GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1584,11 +1438,9 @@ void Jit64::addcx(UGeckoInstruction inst) gpr.BindToRegister(d, true); JitClearCAOV(inst.OE); ADD(32, gpr.R(d), gpr.R(operand)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryOverflow(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1598,11 +1450,9 @@ void Jit64::addcx(UGeckoInstruction inst) JitClearCAOV(inst.OE); MOV(32, gpr.R(d), gpr.R(a)); ADD(32, gpr.R(d), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryOverflow(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1621,11 +1471,9 @@ void Jit64::addmex(UGeckoInstruction inst) GetCarryEAXAndClear(); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1636,11 +1484,9 @@ void Jit64::addmex(UGeckoInstruction inst) GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1659,11 +1505,9 @@ void Jit64::addzex(UGeckoInstruction inst) GetCarryEAXAndClear(); ADC(32, gpr.R(d), Imm8(0)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1674,11 +1518,9 @@ void Jit64::addzex(UGeckoInstruction inst) GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm8(0)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1714,17 +1556,13 @@ void Jit64::rlwinmx(UGeckoInstruction inst) { SHL(32, gpr.R(a), Imm8(inst.SH)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else if (inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH) { SHR(32, gpr.R(a), Imm8(inst.MB)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else { @@ -1736,9 +1574,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst) { AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME))); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else if (inst.Rc) { @@ -1818,9 +1654,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) XOR(32, gpr.R(a), R(EAX)); } if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else { @@ -1828,9 +1662,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) AND(32, gpr.R(a), Imm32(~mask)); XOR(32, gpr.R(a), gpr.R(s)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } gpr.UnlockAll(); } @@ -1864,9 +1696,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst) ROL(32, gpr.R(a), R(ECX)); AND(32, gpr.R(a), Imm32(mask)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); gpr.UnlockAll(); gpr.UnlockAllX(); } @@ -1898,14 +1728,10 @@ void Jit64::negx(UGeckoInstruction inst) if (a != d) MOV(32, gpr.R(d), gpr.R(a)); NEG(32, gpr.R(d)); - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1994,7 +1820,7 @@ void Jit64::slwx(UGeckoInstruction inst) if (inst.Rc) { AND(32, gpr.R(a), gpr.R(a)); - GenerateRC(); + ComputeRC(gpr.R(a)); } else { @@ -2104,9 +1930,7 @@ void Jit64::srawix(UGeckoInstruction inst) } SAR(32, gpr.R(a), Imm8(amount)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); SHL(32, R(EAX), Imm8(32-amount)); TEST(32, R(EAX), gpr.R(a)); FixupBranch nocarry = J_CC(CC_Z); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 977e298dd2..e21a3646d0 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -10,6 +10,130 @@ #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/JitRegCache.h" +void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out) +{ + switch (bit) + { + case 0: // SO, check bit 61 set + MOV(64, R(ABI_PARAM1), Imm64(1ull << 61)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); + SETcc(CC_NZ, R(out)); + break; + + case 1: // EQ, check bits 31-0 == 0 + CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm32(0)); + SETcc(CC_Z, R(out)); + break; + + case 2: // GT, check val > 0 + MOV(64, R(ABI_PARAM1), M(&PowerPC::ppcState.cr_val[field])); + TEST(64, R(ABI_PARAM1), R(ABI_PARAM1)); + SETcc(CC_G, R(out)); + break; + + case 3: // LT, check bit 62 set + MOV(64, R(ABI_PARAM1), Imm64(1ull << 62)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); + SETcc(CC_NZ, R(out)); + break; + + default: + _assert_msg_(DYNA_REC, false, "Invalid CR bit"); + } +} + +void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) +{ + MOV(64, R(ABI_PARAM2), M(&PowerPC::ppcState.cr_val[field])); + TEST(8, R(in), Imm8(1)); + FixupBranch input_is_set = J_CC(CC_NZ, false); + + // New value is 0. + switch (bit) + { + case 0: // !SO, unset bit 61 + MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 61))); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case 1: // !EQ, set bit 0 to 1 + OR(8, R(ABI_PARAM2), Imm8(1)); + break; + + case 2: // !GT, set bit 63 + MOV(64, R(ABI_PARAM1), Imm64(1ull << 63)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case 3: // !LT, unset bit 62 + MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 62))); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + } + + FixupBranch end = J(); + SetJumpTarget(input_is_set); + + switch (bit) + { + case 0: // SO, set bit 61 + MOV(64, R(ABI_PARAM1), Imm64(1ull << 61)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case 1: // EQ, set bits 31-0 to 0 + MOV(64, R(ABI_PARAM1), Imm64(0xFFFFFFFF00000000)); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case 2: // GT, unset bit 63 + MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 63))); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case 3: // LT, set bit 62 + MOV(64, R(ABI_PARAM1), Imm64(1ull << 62)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + } + + SetJumpTarget(end); + MOV(64, R(ABI_PARAM1), Imm64(1ull << 32)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + MOV(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM2)); +} + +FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) +{ + switch (bit) + { + case 0: // SO, check bit 61 set + MOV(64, R(RAX), Imm64(1ull << 61)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(RAX)); + return J_CC(jump_if_set ? CC_NZ : CC_Z, true); + + case 1: // EQ, check bits 31-0 == 0 + CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm32(0)); + return J_CC(jump_if_set ? CC_Z : CC_NZ, true); + + case 2: // GT, check val > 0 + MOV(64, R(RAX), M(&PowerPC::ppcState.cr_val[field])); + TEST(64, R(RAX), R(RAX)); + return J_CC(jump_if_set ? CC_G : CC_LE, true); + + case 3: // LT, check bit 62 set + MOV(64, R(RAX), Imm64(1ull << 62)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(RAX)); + return J_CC(jump_if_set ? CC_NZ : CC_Z, true); + + default: + _assert_msg_(DYNA_REC, false, "Invalid CR bit"); + } + + // Should never happen. + return FixupBranch(); +} + void Jit64::mtspr(UGeckoInstruction inst) { INSTRUCTION_START @@ -154,16 +278,47 @@ void Jit64::mfcr(UGeckoInstruction inst) int d = inst.RD; gpr.Lock(d); gpr.KillImmediate(d, false, true); - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); + XOR(32, R(EAX), R(EAX)); - for (int i = 1; i < 8; i++) + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + X64Reg cr_val = ABI_PARAM1; + X64Reg tmp = ABI_PARAM2; + for (int i = 0; i < 8; i++) { - SHL(32, R(EAX), Imm8(4)); - OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i])); + if (i != 0) + SHL(32, R(EAX), Imm8(4)); + + MOV(64, R(cr_val), M(&PowerPC::ppcState.cr_val[i])); + + // SO: Bit 61 set. + MOV(64, R(tmp), R(cr_val)); + SHR(64, R(tmp), Imm8(61)); + AND(32, R(tmp), Imm8(1)); + OR(32, R(EAX), R(tmp)); + + // EQ: Bits 31-0 == 0. + XOR(32, R(tmp), R(tmp)); + TEST(32, R(cr_val), R(cr_val)); + SETcc(CC_Z, R(tmp)); + SHL(32, R(tmp), Imm8(1)); + OR(32, R(EAX), R(tmp)); + + // GT: Value > 0. + TEST(64, R(cr_val), R(cr_val)); + SETcc(CC_G, R(tmp)); + SHL(32, R(tmp), Imm8(2)); + OR(32, R(EAX), R(tmp)); + + // LT: Bit 62 set. + MOV(64, R(tmp), R(cr_val)); + SHR(64, R(tmp), Imm8(62 - 3)); + AND(32, R(tmp), Imm8(0x8)); + OR(32, R(EAX), R(tmp)); } MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); + gpr.UnlockAllX(); } void Jit64::mtcrf(UGeckoInstruction inst) @@ -182,7 +337,8 @@ void Jit64::mtcrf(UGeckoInstruction inst) if ((crm & (0x80 >> i)) != 0) { u8 newcr = (gpr.R(inst.RS).offset >> (28 - (i * 4))) & 0xF; - MOV(8, M(&PowerPC::ppcState.cr_fast[i]), Imm8(newcr)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(newcr))); + MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(RAX)); } } } @@ -190,17 +346,50 @@ void Jit64::mtcrf(UGeckoInstruction inst) { gpr.Lock(inst.RS); gpr.BindToRegister(inst.RS, true, false); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); for (int i = 0; i < 8; i++) { if ((crm & (0x80 >> i)) != 0) { - MOV(32, R(EAX), gpr.R(inst.RS)); - SHR(32, R(EAX), Imm8(28 - (i * 4))); - AND(32, R(EAX), Imm32(0xF)); - MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(EAX)); + MOVZX(64, 32, EAX, gpr.R(inst.RS)); + SHR(64, R(EAX), Imm8(28 - (i * 4))); + AND(64, R(EAX), Imm32(0xF)); + + X64Reg cr_val = ABI_PARAM1; + X64Reg tmp = ABI_PARAM2; + + MOV(64, R(cr_val), Imm64(1ull << 32)); + + // SO + MOV(64, R(tmp), R(EAX)); + SHL(64, R(tmp), Imm8(63)); + SHR(64, R(tmp), Imm8(63 - 61)); + OR(64, R(cr_val), R(tmp)); + + // EQ + MOV(64, R(tmp), R(EAX)); + NOT(64, R(tmp)); + AND(64, R(tmp), Imm8(0x2)); + OR(64, R(cr_val), R(tmp)); + + // GT + MOV(64, R(tmp), R(EAX)); + NOT(64, R(tmp)); + AND(64, R(tmp), Imm8(0x4)); + SHL(64, R(tmp), Imm8(63 - 2)); + OR(64, R(cr_val), R(tmp)); + + // LT + MOV(64, R(tmp), R(EAX)); + AND(64, R(tmp), Imm8(0x8)); + SHL(64, R(tmp), Imm8(62 - 3)); + OR(64, R(cr_val), R(tmp)); + + MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(cr_val)); } } gpr.UnlockAll(); + gpr.UnlockAllX(); } } } @@ -213,8 +402,8 @@ void Jit64::mcrf(UGeckoInstruction inst) // USES_CR if (inst.CRFS != inst.CRFD) { - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRFS])); - MOV(8, M(&PowerPC::ppcState.cr_fast[inst.CRFD]), R(EAX)); + MOV(64, R(EAX), M(&PowerPC::ppcState.cr_val[inst.CRFS])); + MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX)); } } @@ -226,9 +415,41 @@ void Jit64::mcrxr(UGeckoInstruction inst) // USES_CR // Copy XER[0-3] into CR[inst.CRFD] - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - SHR(32, R(EAX), Imm8(28)); - MOV(8, M(&PowerPC::ppcState.cr_fast[inst.CRFD]), R(EAX)); + MOVZX(64, 32, EAX, M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(64, R(EAX), Imm8(28)); + + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + X64Reg cr_val = ABI_PARAM1; + X64Reg tmp = ABI_PARAM2; + + MOV(64, R(cr_val), Imm64(1ull << 32)); + + // SO + MOV(64, R(tmp), R(EAX)); + SHL(64, R(tmp), Imm8(63)); + SHR(64, R(tmp), Imm8(63 - 61)); + OR(64, R(cr_val), R(tmp)); + + // EQ + MOV(64, R(tmp), R(EAX)); + AND(64, R(tmp), Imm8(0x2)); + OR(64, R(cr_val), R(tmp)); + + // GT + MOV(64, R(tmp), R(EAX)); + NOT(64, R(tmp)); + AND(64, R(tmp), Imm8(0x4)); + SHL(64, R(tmp), Imm8(63 - 2)); + OR(64, R(cr_val), R(tmp)); + + // LT + MOV(64, R(tmp), R(EAX)); + AND(64, R(tmp), Imm8(0x8)); + SHL(64, R(tmp), Imm8(62 - 3)); + OR(64, R(cr_val), R(tmp)); + + MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(cr_val)); + gpr.UnlockAllX(); // Clear XER[0-3] AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(0x0FFFFFFF)); @@ -240,70 +461,59 @@ void Jit64::crXXX(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); _dbg_assert_msg_(DYNA_REC, inst.OPCD == 19, "Invalid crXXX"); + // TODO(delroth): Potential optimizations could be applied here. For + // instance, if the two CR bits being loaded are the same, two loads are + // not required. + // USES_CR - // Get bit CRBA in EAX aligned with bit CRBD - int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2])); - if (shiftA < 0) - SHL(8, R(EAX), Imm8(-shiftA)); - else if (shiftA > 0) - SHR(8, R(EAX), Imm8(shiftA)); - - // Get bit CRBB in ECX aligned with bit CRBD - gpr.FlushLockX(ECX); - int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); - MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2])); - if (shiftB < 0) - SHL(8, R(ECX), Imm8(-shiftB)); - else if (shiftB > 0) - SHR(8, R(ECX), Imm8(shiftB)); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), ABI_PARAM2); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), EAX); // Compute combined bit switch (inst.SUBOP10) { case 33: // crnor - OR(8, R(EAX), R(ECX)); + OR(8, R(EAX), R(ABI_PARAM2)); NOT(8, R(EAX)); break; case 129: // crandc - NOT(8, R(ECX)); - AND(8, R(EAX), R(ECX)); + NOT(8, R(ABI_PARAM2)); + AND(8, R(EAX), R(ABI_PARAM2)); break; case 193: // crxor - XOR(8, R(EAX), R(ECX)); + XOR(8, R(EAX), R(ABI_PARAM2)); break; case 225: // crnand - AND(8, R(EAX), R(ECX)); + AND(8, R(EAX), R(ABI_PARAM2)); NOT(8, R(EAX)); break; case 257: // crand - AND(8, R(EAX), R(ECX)); + AND(8, R(EAX), R(ABI_PARAM2)); break; case 289: // creqv - XOR(8, R(EAX), R(ECX)); + XOR(8, R(EAX), R(ABI_PARAM2)); NOT(8, R(EAX)); break; case 417: // crorc - NOT(8, R(ECX)); - OR(8, R(EAX), R(ECX)); + NOT(8, R(ABI_PARAM2)); + OR(8, R(EAX), R(ABI_PARAM2)); break; case 449: // cror - OR(8, R(EAX), R(ECX)); + OR(8, R(EAX), R(ABI_PARAM2)); break; } // Store result bit in CRBD - AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3))); - AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3)))); - OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX)); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), EAX); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index c1f1aacd40..585485271e 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -763,7 +763,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { if (!thisUsed) break; X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOVZX(32, 8, reg, M(&PowerPC::ppcState.cr_fast[ppcreg])); + // TODO(delroth): unbreak + //Jit->MOVZX(32, 8, reg, M(&PowerPC::ppcState.cr_fast[ppcreg])); RI.regs[reg] = I; break; } @@ -816,7 +817,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); unsigned ppcreg = *I >> 16; // CAUTION: uses 8-bit reg! - Jit->MOV(8, M(&PowerPC::ppcState.cr_fast[ppcreg]), R(ECX)); + // TODO(delroth): Unbreak. + //Jit->MOV(8, M(&PowerPC::ppcState.cr_fast[ppcreg]), R(ECX)); regNormalRegClear(RI, I); break; } diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index a3cb619f62..e86a829ab0 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -477,9 +477,9 @@ void JitIL::Trace() } #endif - DEBUG_LOG(DYNA_REC, "JITIL PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s", - PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], - PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, + DEBUG_LOG(DYNA_REC, "JITIL PC: %08x SRR0: %08x SRR1: %08x CRval: %016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx FPSCR: %08x MSR: %08x LR: %08x %s %s", + PC, SRR0, SRR1, PowerPC::ppcState.cr_val[0], PowerPC::ppcState.cr_val[1], PowerPC::ppcState.cr_val[2], PowerPC::ppcState.cr_val[3], + PowerPC::ppcState.cr_val[4], PowerPC::ppcState.cr_val[5], PowerPC::ppcState.cr_val[6], PowerPC::ppcState.cr_val[7], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); } diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 9778c2234a..8b0f9d6362 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -41,10 +41,10 @@ PPCDebugInterface debug_interface; u32 CompactCR() { - u32 new_cr = ppcState.cr_fast[0] << 28; - for (int i = 1; i < 8; i++) + u32 new_cr = 0; + for (int i = 0; i < 8; i++) { - new_cr |= ppcState.cr_fast[i] << (28 - i * 4); + new_cr |= GetCRField(i) << (28 - i * 4); } return new_cr; } @@ -53,7 +53,7 @@ void ExpandCR(u32 cr) { for (int i = 0; i < 8; i++) { - ppcState.cr_fast[i] = (cr >> (28 - i * 4)) & 0xF; + SetCRField(i, (cr >> (28 - i * 4)) & 0xF); } } @@ -99,7 +99,8 @@ static void ResetRegisters() ppcState.pc = 0; ppcState.npc = 0; ppcState.Exceptions = 0; - ((u64*)(&ppcState.cr_fast[0]))[0] = 0; + for (auto& v : ppcState.cr_val) + v = 0x8000000000000001; TL = 0; TU = 0; diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 107c63da95..7cf049bfa8 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -38,7 +38,20 @@ struct GC_ALIGNED64(PowerPCState) u32 pc; // program counter u32 npc; - u8 cr_fast[8]; // Possibly reorder to 0, 2, 4, 8, 1, 3, 5, 7 so that we can make Compact and Expand super fast? + // Optimized CR implementation. Instead of storing CR in its PowerPC format + // (4 bit value, SO/EQ/LT/GT), we store instead a 64 bit value for each of + // the 8 CR register parts. This 64 bit value follows this format: + // - SO iff. bit 61 is set + // - EQ iff. lower 32 bits == 0 + // - GT iff. (s64)cr_val > 0 + // - LT iff. bit 62 is set + // + // This has the interesting property that sign-extending the result of an + // operation from 32 to 64 bits results in a 64 bit value that works as a + // CR value. Checking each part of CR is also fast, as it is equivalent to + // testing one bit or the low 32 bit part of a register. And CR can still + // be manipulated bit by bit fairly easily. + u64 cr_val[8]; u32 msr; // machine specific register u32 fpscr; // floating point flags/status bits @@ -149,27 +162,54 @@ void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst); } // namespace -// Fast CR system - store them in single bytes instead of nibbles to not have to -// mask/shift them out. +// Convert between PPC and internal representation of CR. +inline u64 PPCCRToInternal(u8 value) +{ + u64 cr_val = 0x100000000; + // SO + cr_val |= (u64)!!(value & 1) << 61; + // EQ + cr_val |= (u64)!(value & 2); + // GT + cr_val |= (u64)!(value & 4) << 63; + // LT + cr_val |= (u64)!!(value & 8) << 62; -// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all. + return cr_val; +} + +// Warning: these CR operations are fairly slow since they need to convert from +// PowerPC format (4 bit) to our internal 64 bit format. See the definition of +// ppcState.cr_val for more explanations. inline void SetCRField(int cr_field, int value) { - PowerPC::ppcState.cr_fast[cr_field] = value; + PowerPC::ppcState.cr_val[cr_field] = PPCCRToInternal(value); } inline u32 GetCRField(int cr_field) { - return PowerPC::ppcState.cr_fast[cr_field]; + u64 cr_val = PowerPC::ppcState.cr_val[cr_field]; + u32 ppc_cr = 0; + + // SO + ppc_cr |= !!(cr_val & (1ull << 61)); + // EQ + ppc_cr |= ((cr_val & 0xFFFFFFFF) == 0) << 1; + // GT + ppc_cr |= ((s64)cr_val > 0) << 2; + // LT + ppc_cr |= !!(cr_val & (1ull << 62)) << 3; + + return ppc_cr; } inline u32 GetCRBit(int bit) { - return (PowerPC::ppcState.cr_fast[bit >> 2] >> (3 - (bit & 3))) & 1; + return (GetCRField(bit >> 2) >> (3 - (bit & 3))) & 1; } inline void SetCRBit(int bit, int value) { if (value & 1) - PowerPC::ppcState.cr_fast[bit >> 2] |= 0x8 >> (bit & 3); + SetCRField(bit >> 2, GetCRField(bit >> 2) | (0x8 >> (bit & 3))); else - PowerPC::ppcState.cr_fast[bit >> 2] &= ~(0x8 >> (bit & 3)); + SetCRField(bit >> 2, GetCRField(bit >> 2) & ~(0x8 >> (bit & 3))); } // SetCR and GetCR are fairly slow. Should be avoided if possible. From 5506e57ab8f59982d1cf59c78e7f620a68b06346 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Sun, 22 Jun 2014 17:14:31 +0200 Subject: [PATCH 03/11] CR: Replace some magic values with constants. --- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 19 ++-------- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 12 +++--- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 38 +++++++++---------- Source/Core/Core/PowerPC/PowerPC.h | 25 ++++++++---- 4 files changed, 46 insertions(+), 48 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index f2c1834f8a..781ab58172 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -237,33 +237,22 @@ void Jit64::fcmpx(UGeckoInstruction inst) pGreater = J_CC(CC_B); } - // Read the documentation about cr_val in PowerPC.h to understand these - // magic values. - - // Equal: !GT (bit 63 set), !LT (bit 62 not set), !SO (bit 61 not set), EQ - // (bits 31-0 not set). - MOV(64, R(RAX), Imm64(0x8000000000000000)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ))); continue1 = J(); - // NAN: !GT (bit 63 set), !LT (bit 62 not set), SO (bit 61 set), !EQ (bit 0 - // set). SetJumpTarget(pNaN); - MOV(64, R(RAX), Imm64(0xA000000000000001)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO))); if (a != b) { continue2 = J(); - // Greater Than: GT (bit 63 not set), !LT (bit 62 not set), !SO (bit 61 - // not set), !EQ (bit 0 set). SetJumpTarget(pGreater); - MOV(64, R(RAX), Imm64(0x0000000000000001)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT))); continue3 = J(); - // Less Than: !GT (bit 63 set), LT (bit 62 set), !SO (bit 61 not set), - // !EQ (bit 0 set). SetJumpTarget(pLesser); - MOV(64, R(RAX), Imm64(0xC000000000000001)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT))); } SetJumpTarget(continue1); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 9eb8d94cb0..1d98d0d291 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -315,20 +315,20 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (signedCompare) { if ((s32)gpr.R(a).offset == (s32)comparand.offset) - compareResult = 0x2; + compareResult = CR_EQ; else if ((s32)gpr.R(a).offset > (s32)comparand.offset) - compareResult = 0x4; + compareResult = CR_GT; else - compareResult = 0x8; + compareResult = CR_LT; } else { if ((u32)gpr.R(a).offset == (u32)comparand.offset) - compareResult = 0x2; + compareResult = CR_EQ; else if ((u32)gpr.R(a).offset > (u32)comparand.offset) - compareResult = 0x4; + compareResult = CR_GT; else - compareResult = 0x8; + compareResult = CR_LT; } MOV(64, R(RAX), Imm64(PPCCRToInternal(compareResult))); MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index e21a3646d0..4dead8df4e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -14,24 +14,24 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out) { switch (bit) { - case 0: // SO, check bit 61 set + case CR_SO_BIT: // check bit 61 set MOV(64, R(ABI_PARAM1), Imm64(1ull << 61)); TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); SETcc(CC_NZ, R(out)); break; - case 1: // EQ, check bits 31-0 == 0 + case CR_EQ_BIT: // check bits 31-0 == 0 CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm32(0)); SETcc(CC_Z, R(out)); break; - case 2: // GT, check val > 0 + case CR_GT_BIT: // check val > 0 MOV(64, R(ABI_PARAM1), M(&PowerPC::ppcState.cr_val[field])); TEST(64, R(ABI_PARAM1), R(ABI_PARAM1)); SETcc(CC_G, R(out)); break; - case 3: // LT, check bit 62 set + case CR_LT_BIT: // check bit 62 set MOV(64, R(ABI_PARAM1), Imm64(1ull << 62)); TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); SETcc(CC_NZ, R(out)); @@ -51,21 +51,21 @@ void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) // New value is 0. switch (bit) { - case 0: // !SO, unset bit 61 + case CR_SO_BIT: // unset bit 61 MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 61))); AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); break; - case 1: // !EQ, set bit 0 to 1 + case CR_EQ_BIT: // set bit 0 to 1 OR(8, R(ABI_PARAM2), Imm8(1)); break; - case 2: // !GT, set bit 63 + case CR_GT_BIT: // !GT, set bit 63 MOV(64, R(ABI_PARAM1), Imm64(1ull << 63)); OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); break; - case 3: // !LT, unset bit 62 + case CR_LT_BIT: // !LT, unset bit 62 MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 62))); AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); break; @@ -76,22 +76,22 @@ void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) switch (bit) { - case 0: // SO, set bit 61 + case CR_SO_BIT: // set bit 61 MOV(64, R(ABI_PARAM1), Imm64(1ull << 61)); OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); break; - case 1: // EQ, set bits 31-0 to 0 + case CR_EQ_BIT: // set bits 31-0 to 0 MOV(64, R(ABI_PARAM1), Imm64(0xFFFFFFFF00000000)); AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); break; - case 2: // GT, unset bit 63 + case CR_GT_BIT: // unset bit 63 MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 63))); AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); break; - case 3: // LT, set bit 62 + case CR_LT_BIT: // set bit 62 MOV(64, R(ABI_PARAM1), Imm64(1ull << 62)); OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); break; @@ -107,21 +107,21 @@ FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) { switch (bit) { - case 0: // SO, check bit 61 set + case CR_SO_BIT: // check bit 61 set MOV(64, R(RAX), Imm64(1ull << 61)); TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(RAX)); return J_CC(jump_if_set ? CC_NZ : CC_Z, true); - case 1: // EQ, check bits 31-0 == 0 + case CR_EQ_BIT: // check bits 31-0 == 0 CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm32(0)); return J_CC(jump_if_set ? CC_Z : CC_NZ, true); - case 2: // GT, check val > 0 + case CR_GT_BIT: // check val > 0 MOV(64, R(RAX), M(&PowerPC::ppcState.cr_val[field])); TEST(64, R(RAX), R(RAX)); return J_CC(jump_if_set ? CC_G : CC_LE, true); - case 3: // LT, check bit 62 set + case CR_LT_BIT: // check bit 62 set MOV(64, R(RAX), Imm64(1ull << 62)); TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(RAX)); return J_CC(jump_if_set ? CC_NZ : CC_Z, true); @@ -369,19 +369,19 @@ void Jit64::mtcrf(UGeckoInstruction inst) // EQ MOV(64, R(tmp), R(EAX)); NOT(64, R(tmp)); - AND(64, R(tmp), Imm8(0x2)); + AND(64, R(tmp), Imm8(CR_EQ)); OR(64, R(cr_val), R(tmp)); // GT MOV(64, R(tmp), R(EAX)); NOT(64, R(tmp)); - AND(64, R(tmp), Imm8(0x4)); + AND(64, R(tmp), Imm8(CR_GT)); SHL(64, R(tmp), Imm8(63 - 2)); OR(64, R(cr_val), R(tmp)); // LT MOV(64, R(tmp), R(EAX)); - AND(64, R(tmp), Imm8(0x8)); + AND(64, R(tmp), Imm8(CR_LT)); SHL(64, R(tmp), Imm8(62 - 3)); OR(64, R(cr_val), R(tmp)); diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 7cf049bfa8..aa4e351d94 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -162,18 +162,27 @@ void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst); } // namespace +enum CRBits +{ + CR_SO = 1, + CR_EQ = 2, + CR_GT = 4, + CR_LT = 8, + + CR_SO_BIT = 0, + CR_EQ_BIT = 1, + CR_GT_BIT = 2, + CR_LT_BIT = 3, +}; + // Convert between PPC and internal representation of CR. inline u64 PPCCRToInternal(u8 value) { u64 cr_val = 0x100000000; - // SO - cr_val |= (u64)!!(value & 1) << 61; - // EQ - cr_val |= (u64)!(value & 2); - // GT - cr_val |= (u64)!(value & 4) << 63; - // LT - cr_val |= (u64)!!(value & 8) << 62; + cr_val |= (u64)!!(value & CR_SO) << 61; + cr_val |= (u64)!(value & CR_EQ); + cr_val |= (u64)!(value & CR_GT) << 63; + cr_val |= (u64)!!(value & CR_LT) << 62; return cr_val; } From 1429fccb9724ab6ec0ee1d443d9838d0ad330764 Mon Sep 17 00:00:00 2001 From: magumagu Date: Mon, 23 Jun 2014 19:19:22 -0700 Subject: [PATCH 04/11] Initial unoptimized JITIL flag optimization. --- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 93 +++++++++++++++++-- Source/Core/Core/PowerPC/JitILCommon/IR.cpp | 11 ++- Source/Core/Core/PowerPC/JitILCommon/IR.h | 9 ++ .../PowerPC/JitILCommon/JitILBase_Branch.cpp | 2 + .../JitILCommon/JitILBase_FloatingPoint.cpp | 2 +- .../PowerPC/JitILCommon/JitILBase_Integer.cpp | 4 +- .../JitILCommon/JitILBase_SystemRegisters.cpp | 11 ++- 7 files changed, 114 insertions(+), 18 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 585485271e..8abe9fd3f9 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -85,7 +85,7 @@ static unsigned regReadUse(RegInfo& R, InstLoc I) { return R.IInfo[I - R.FirstI] & 3; } -static unsigned SlotSet[1000]; +static u64 SlotSet[1000]; static u8 GC_ALIGNED16(FSlotSet[16*1000]); static OpArg regLocForSlot(RegInfo& RI, unsigned slot) { @@ -107,7 +107,7 @@ static void regSpill(RegInfo& RI, X64Reg reg) { unsigned slot = regGetSpill(RI, RI.regs[reg]); if (!slot) { slot = regCreateSpill(RI, RI.regs[reg]); - RI.Jit->MOV(32, regLocForSlot(RI, slot), R(reg)); + RI.Jit->MOV(64, regLocForSlot(RI, slot), R(reg)); } RI.regs[reg] = nullptr; } @@ -621,6 +621,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case FPDup1: case FSNeg: case FDNeg: + case ConvertFromFastCR: + case ConvertToFastCR: if (thisUsed) regMarkUse(RI, I, getOp1(I), 1); break; @@ -763,8 +765,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { if (!thisUsed) break; X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; - // TODO(delroth): unbreak - //Jit->MOVZX(32, 8, reg, M(&PowerPC::ppcState.cr_fast[ppcreg])); + Jit->MOV(64, R(reg), M(&PowerPC::ppcState.cr_val[ppcreg])); RI.regs[reg] = I; break; } @@ -814,11 +815,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { break; } case StoreCR: { - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); + Jit->MOV(64, R(RCX), regLocForInst(RI, getOp1(I))); unsigned ppcreg = *I >> 16; - // CAUTION: uses 8-bit reg! - // TODO(delroth): Unbreak. - //Jit->MOV(8, M(&PowerPC::ppcState.cr_fast[ppcreg]), R(ECX)); + Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(RCX)); regNormalRegClear(RI, I); break; } @@ -1116,6 +1115,84 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } + case ConvertFromFastCR: + { + if (!thisUsed) break; + X64Reg cr_val = regUReg(RI, I); + Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); + + Jit->XOR(32, R(EAX), R(EAX)); + + // SO: Bit 61 set. + Jit->MOV(64, R(RCX), R(cr_val)); + Jit->SHR(64, R(RCX), Imm8(61)); + Jit->AND(32, R(ECX), Imm8(1)); + Jit->OR(32, R(EAX), R(ECX)); + + // EQ: Bits 31-0 == 0. + Jit->XOR(32, R(ECX), R(ECX)); + Jit->TEST(32, R(cr_val), R(cr_val)); + Jit->SETcc(CC_Z, R(ECX)); + Jit->SHL(32, R(ECX), Imm8(1)); + Jit->OR(32, R(EAX), R(ECX)); + + // GT: Value > 0. + Jit->XOR(32, R(ECX), R(ECX)); + Jit->TEST(64, R(cr_val), R(cr_val)); + Jit->SETcc(CC_G, R(ECX)); + Jit->SHL(32, R(ECX), Imm8(2)); + Jit->OR(32, R(EAX), R(ECX)); + + // LT: Bit 62 set. + Jit->MOV(64, R(ECX), R(cr_val)); + Jit->SHR(64, R(ECX), Imm8(62 - 3)); + Jit->AND(32, R(ECX), Imm8(0x8)); + Jit->OR(32, R(EAX), R(ECX)); + + Jit->MOV(32, R(cr_val), R(EAX)); + RI.regs[cr_val] = I; + regNormalRegClear(RI, I); + break; + } + case ConvertToFastCR: + { + if (!thisUsed) break; + X64Reg cr_val = regUReg(RI, I); + Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); + + Jit->MOV(64, R(RCX), Imm64(1ull << 32)); + + // SO + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->SHL(64, R(RAX), Imm8(63)); + Jit->SHR(64, R(RAX), Imm8(63 - 61)); + Jit->OR(64, R(RCX), R(RAX)); + + // EQ + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->NOT(64, R(RAX)); + Jit->AND(64, R(RAX), Imm8(CR_EQ)); + Jit->OR(64, R(RCX), R(RAX)); + + // GT + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->NOT(64, R(RAX)); + Jit->AND(64, R(RAX), Imm8(CR_GT)); + Jit->SHL(64, R(RAX), Imm8(63 - 2)); + Jit->OR(64, R(RCX), R(RAX)); + + // LT + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->AND(64, R(RAX), Imm8(CR_LT)); + Jit->SHL(64, R(RAX), Imm8(62 - 3)); + Jit->OR(64, R(RCX), R(RAX)); + + Jit->MOV(64, R(cr_val), R(RCX)); + + RI.regs[cr_val] = I; + regNormalRegClear(RI, I); + break; + } case LoadSingle: { if (!thisUsed) break; X64Reg reg = fregFindFreeReg(RI); diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index c7515e5bfc..0f0bb1c7f3 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -1130,7 +1130,7 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const { numberOfOperands[CInt32] = 0; static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, }; - static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, }; + static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR}; static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, }; for (auto& op : ZeroOp) { numberOfOperands[op] = 0; @@ -1235,10 +1235,11 @@ static std::unique_ptr writer; static const std::string opcodeNames[] = { "Nop", "LoadGReg", "LoadLink", "LoadCR", "LoadCarry", "LoadCTR", "LoadMSR", "LoadGQR", "SExt8", "SExt16", "BSwap32", "BSwap16", "Cntlzw", - "Not", "Load8", "Load16", "Load32", "BranchUncond", "StoreGReg", - "StoreCR", "StoreLink", "StoreCarry", "StoreCTR", "StoreMSR", "StoreFPRF", - "StoreGQR", "StoreSRR", "FallBackToInterpreter", "Add", "Mul", "And", "Or", - "Xor", "MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol", + "Not", "Load8", "Load16", "Load32", "BranchUncond", "ConvertFromFastCR", + "ConvertToFastCR", "StoreGReg", "StoreCR", "StoreLink", "StoreCarry", + "StoreCTR", "StoreMSR", "StoreFPRF", "StoreGQR", "StoreSRR", + "FallBackToInterpreter", "Add", "Mul", "And", "Or", "Xor", + "MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol", "ICmpCRSigned", "ICmpCRUnsigned", "ICmpEq", "ICmpNe", "ICmpUgt", "ICmpUlt", "ICmpUge", "ICmpUle", "ICmpSgt", "ICmpSlt", "ICmpSge", "ICmpSle", "Store8", "Store16", "Store32", "BranchCond", "FResult_Start", diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.h b/Source/Core/Core/PowerPC/JitILCommon/IR.h index f84f29fe02..c09de3b9b2 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.h +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.h @@ -33,6 +33,9 @@ enum Opcode { Load8, // These loads zext Load16, Load32, + // CR conversions + ConvertFromFastCR, + ConvertToFastCR, // Branches BranchUncond, // Register store operators @@ -373,6 +376,12 @@ public: InstLoc EmitICmpCRUnsigned(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpCRUnsigned, op1, op2); } + InstLoc EmitConvertFromFastCR(InstLoc op1) { + return FoldUOp(ConvertFromFastCR, op1); + } + InstLoc EmitConvertToFastCR(InstLoc op1) { + return FoldUOp(ConvertToFastCR, op1); + } InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) { return FoldBiOp(FallBackToInterpreter, op1, op2); } diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp index 85601cd0c5..9cb80059ec 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp @@ -66,6 +66,7 @@ static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruc if ((inst.BO & 16) == 0) // Test a CR bit { IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); + CRReg = ibuild.EmitConvertFromFastCR(CRReg); IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); CRTest = ibuild.EmitAnd(CRReg, CRCmp); if (!(inst.BO & 8)) @@ -141,6 +142,7 @@ void JitILBase::bcctrx(UGeckoInstruction inst) if ((inst.BO & 16) == 0) // Test a CR bit { IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); + CRReg = ibuild.EmitConvertFromFastCR(CRReg); IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); test = ibuild.EmitAnd(CRReg, CRCmp); if (!(inst.BO & 8)) diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp index 3184fbc16f..b97740ce5e 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp @@ -86,7 +86,7 @@ void JitILBase::fcmpx(UGeckoInstruction inst) int ordered = (inst.SUBOP10 == 32) ? 1 : 0; res = ibuild.EmitFDCmpCR(lhs, rhs, ordered); ibuild.EmitStoreFPRF(res); - ibuild.EmitStoreCR(res, inst.CRFD); + ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD); } void JitILBase::fsign(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp index e7b96a6850..6f275184ee 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp @@ -12,7 +12,7 @@ static void ComputeRC(IREmitter::IRBuilder& ibuild, IREmitter::InstLoc val) { IREmitter::InstLoc res = ibuild.EmitICmpCRSigned(val, ibuild.EmitIntConst(0)); - ibuild.EmitStoreCR(res, 0); + ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), 0); } void JitILBase::reg_imm(UGeckoInstruction inst) @@ -114,7 +114,7 @@ void JitILBase::cmpXX(UGeckoInstruction inst) js.downcountAmount++; //TODO: should this be somewhere else? - ibuild.EmitStoreCR(res, inst.CRFD); + ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD); } void JitILBase::boolX(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp index dbd08d94bb..31c6ffa4b4 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp @@ -107,8 +107,10 @@ void JitILBase::mfcr(UGeckoInstruction inst) IREmitter::InstLoc d = ibuild.EmitIntConst(0); for (int i = 0; i < 8; ++i) { - d = ibuild.EmitShl(d, ibuild.EmitIntConst(4)); - d = ibuild.EmitOr(d, ibuild.EmitLoadCR(i)); + IREmitter::InstLoc cr = ibuild.EmitLoadCR(i); + cr = ibuild.EmitConvertFromFastCR(cr); + cr = ibuild.EmitShl(cr, ibuild.EmitIntConst(28 - 4 * i)); + d = ibuild.EmitOr(d, cr); } ibuild.EmitStoreGReg(d, inst.RD); } @@ -126,6 +128,7 @@ void JitILBase::mtcrf(UGeckoInstruction inst) IREmitter::InstLoc value; value = ibuild.EmitShrl(s, ibuild.EmitIntConst(28 - i * 4)); value = ibuild.EmitAnd(value, ibuild.EmitIntConst(0xF)); + value = ibuild.EmitConvertToFastCR(value); ibuild.EmitStoreCR(value, i); } } @@ -150,6 +153,7 @@ void JitILBase::crXX(UGeckoInstruction inst) // Get bit CRBA in EAX aligned with bit CRBD const int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); IREmitter::InstLoc eax = ibuild.EmitLoadCR(inst.CRBA >> 2); + eax = ibuild.EmitConvertFromFastCR(eax); if (shiftA < 0) eax = ibuild.EmitShl(eax, ibuild.EmitIntConst(-shiftA)); else if (shiftA > 0) @@ -158,6 +162,7 @@ void JitILBase::crXX(UGeckoInstruction inst) // Get bit CRBB in ECX aligned with bit CRBD const int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); IREmitter::InstLoc ecx = ibuild.EmitLoadCR(inst.CRBB >> 2); + ecx = ibuild.EmitConvertFromFastCR(ecx); if (shiftB < 0) ecx = ibuild.EmitShl(ecx, ibuild.EmitIntConst(-shiftB)); else if (shiftB > 0) @@ -211,7 +216,9 @@ void JitILBase::crXX(UGeckoInstruction inst) // Store result bit in CRBD eax = ibuild.EmitAnd(eax, ibuild.EmitIntConst(0x8 >> (inst.CRBD & 3))); IREmitter::InstLoc bd = ibuild.EmitLoadCR(inst.CRBD >> 2); + bd = ibuild.EmitConvertFromFastCR(bd); bd = ibuild.EmitAnd(bd, ibuild.EmitIntConst(~(0x8 >> (inst.CRBD & 3)))); bd = ibuild.EmitOr(bd, eax); + bd = ibuild.EmitConvertToFastCR(bd); ibuild.EmitStoreCR(bd, inst.CRBD >> 2); } From 79cc000d6298782a38a5bb0969e07960df438264 Mon Sep 17 00:00:00 2001 From: magumagu Date: Mon, 23 Jun 2014 20:34:03 -0700 Subject: [PATCH 05/11] JITIL: Optimize compare instruction. --- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 80 +++++++++++-------- Source/Core/Core/PowerPC/JitILCommon/IR.cpp | 64 --------------- .../PowerPC/JitILCommon/JitILBase_Integer.cpp | 4 +- 3 files changed, 50 insertions(+), 98 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 8abe9fd3f9..e87d24ff76 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -550,6 +550,48 @@ static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) { regNormalRegClear(RI, I); } +static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) { + bool signed_compare = getOpcode(*I) == ICmpCRSigned; + X64Reg reg; + if (RI.IInfo[I - RI.FirstI] & 4) + { + reg = regEnsureInReg(RI, getOp1(I)); + if (signed_compare) + RI.Jit->MOVSX(64, 32, reg, R(reg)); + } + else + { + reg = regFindFreeReg(RI); + if (signed_compare) + RI.Jit->MOVSX(64, 32, reg, regLocForInst(RI, getOp1(I))); + else + RI.Jit->MOV(32, R(reg), regLocForInst(RI, getOp1(I))); + } + if (isImm(*getOp2(I))) + { + unsigned RHS = RI.Build->GetImmValue(getOp2(I)); + if (!signed_compare && (RHS & 0x80000000U)) + { + RI.Jit->MOV(32, R(EAX), Imm32(RHS)); + RI.Jit->SUB(64, R(reg), R(RAX)); + } + else if (RHS) + { + RI.Jit->SUB(64, R(reg), Imm32(RHS)); + } + } + else + { + if (signed_compare) + RI.Jit->MOVSX(64, 32, RAX, regLocForInst(RI, getOp2(I))); + else + RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp2(I))); + RI.Jit->SUB(64, R(reg), R(RAX)); + } + RI.regs[reg] = I; + regNormalRegClear(RI, I); +} + static void regWriteExit(RegInfo& RI, InstLoc dest) { if (isImm(*dest)) { RI.exitNumber++; @@ -1077,42 +1119,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regEmitICmpInst(RI, I, CC_LE); break; } - case ICmpCRUnsigned: { + case ICmpCRUnsigned: + { if (!thisUsed) break; - regEmitCmp(RI, I); - X64Reg reg = regBinReg(RI, I); - FixupBranch pLesser = Jit->J_CC(CC_B); - FixupBranch pGreater = Jit->J_CC(CC_A); - Jit->MOV(32, R(reg), Imm32(0x2)); // _x86Reg == 0 - FixupBranch continue1 = Jit->J(); - Jit->SetJumpTarget(pGreater); - Jit->MOV(32, R(reg), Imm32(0x4)); // _x86Reg > 0 - FixupBranch continue2 = Jit->J(); - Jit->SetJumpTarget(pLesser); - Jit->MOV(32, R(reg), Imm32(0x8)); // _x86Reg < 0 - Jit->SetJumpTarget(continue1); - Jit->SetJumpTarget(continue2); - RI.regs[reg] = I; - regNormalRegClear(RI, I); + regEmitICmpCRInst(RI, I); break; } - case ICmpCRSigned: { + case ICmpCRSigned: + { if (!thisUsed) break; - regEmitCmp(RI, I); - X64Reg reg = regBinReg(RI, I); - FixupBranch pLesser = Jit->J_CC(CC_L); - FixupBranch pGreater = Jit->J_CC(CC_G); - Jit->MOV(32, R(reg), Imm32(0x2)); // _x86Reg == 0 - FixupBranch continue1 = Jit->J(); - Jit->SetJumpTarget(pGreater); - Jit->MOV(32, R(reg), Imm32(0x4)); // _x86Reg > 0 - FixupBranch continue2 = Jit->J(); - Jit->SetJumpTarget(pLesser); - Jit->MOV(32, R(reg), Imm32(0x8)); // _x86Reg < 0 - Jit->SetJumpTarget(continue1); - Jit->SetJumpTarget(continue2); - RI.regs[reg] = I; - regNormalRegClear(RI, I); + regEmitICmpCRInst(RI, I); break; } case ConvertFromFastCR: diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index 0f0bb1c7f3..032b953d3e 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -849,42 +849,6 @@ InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) { return EmitBranchUncond(Op2); return nullptr; } - if (getOpcode(*Op1) == And && - isImm(*getOp2(Op1)) && - getOpcode(*getOp1(Op1)) == ICmpCRSigned) { - unsigned branchValue = GetImmValue(getOp2(Op1)); - if (branchValue == 2) - return FoldBranchCond(EmitICmpEq(getOp1(getOp1(Op1)), - getOp2(getOp1(Op1))), Op2); - if (branchValue == 4) - return FoldBranchCond(EmitICmpSgt(getOp1(getOp1(Op1)), - getOp2(getOp1(Op1))), Op2); - if (branchValue == 8) - return FoldBranchCond(EmitICmpSlt(getOp1(getOp1(Op1)), - getOp2(getOp1(Op1))), Op2); - } - if (getOpcode(*Op1) == Xor && - isImm(*getOp2(Op1))) { - InstLoc XOp1 = getOp1(Op1); - unsigned branchValue = GetImmValue(getOp2(Op1)); - if (getOpcode(*XOp1) == And && - isImm(*getOp2(XOp1)) && - getOpcode(*getOp1(XOp1)) == ICmpCRSigned) { - unsigned innerBranchValue = - GetImmValue(getOp2(XOp1)); - if (branchValue == innerBranchValue) { - if (branchValue == 2) - return FoldBranchCond(EmitICmpNe(getOp1(getOp1(XOp1)), - getOp2(getOp1(XOp1))), Op2); - if (branchValue == 4) - return FoldBranchCond(EmitICmpSle(getOp1(getOp1(XOp1)), - getOp2(getOp1(XOp1))), Op2); - if (branchValue == 8) - return FoldBranchCond(EmitICmpSge(getOp1(getOp1(XOp1)), - getOp2(getOp1(XOp1))), Op2); - } - } - } return EmitBiOp(BranchCond, Op1, Op2); } @@ -965,38 +929,10 @@ InstLoc IRBuilder::FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2) { } InstLoc IRBuilder::FoldICmpCRSigned(InstLoc Op1, InstLoc Op2) { - if (isImm(*Op1)) { - if (isImm(*Op2)) { - int c1 = (int)GetImmValue(Op1), - c2 = (int)GetImmValue(Op2), - result; - if (c1 == c2) - result = 2; - else if (c1 > c2) - result = 4; - else - result = 8; - return EmitIntConst(result); - } - } return EmitBiOp(ICmpCRSigned, Op1, Op2); } InstLoc IRBuilder::FoldICmpCRUnsigned(InstLoc Op1, InstLoc Op2) { - if (isImm(*Op1)) { - if (isImm(*Op2)) { - unsigned int c1 = GetImmValue(Op1), - c2 = GetImmValue(Op2), - result; - if (c1 == c2) - result = 2; - else if (c1 > c2) - result = 4; - else - result = 8; - return EmitIntConst(result); - } - } return EmitBiOp(ICmpCRUnsigned, Op1, Op2); } diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp index 6f275184ee..e7b96a6850 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp @@ -12,7 +12,7 @@ static void ComputeRC(IREmitter::IRBuilder& ibuild, IREmitter::InstLoc val) { IREmitter::InstLoc res = ibuild.EmitICmpCRSigned(val, ibuild.EmitIntConst(0)); - ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), 0); + ibuild.EmitStoreCR(res, 0); } void JitILBase::reg_imm(UGeckoInstruction inst) @@ -114,7 +114,7 @@ void JitILBase::cmpXX(UGeckoInstruction inst) js.downcountAmount++; //TODO: should this be somewhere else? - ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD); + ibuild.EmitStoreCR(res, inst.CRFD); } void JitILBase::boolX(UGeckoInstruction inst) From 5bb428c68543fea11233b3f3f7e74b6daa5013f7 Mon Sep 17 00:00:00 2001 From: magumagu Date: Mon, 23 Jun 2014 21:43:38 -0700 Subject: [PATCH 06/11] JITIL: optimize branches. --- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 50 +++++++++++++++++++ Source/Core/Core/PowerPC/JitILCommon/IR.cpp | 7 +-- Source/Core/Core/PowerPC/JitILCommon/IR.h | 18 +++++++ .../PowerPC/JitILCommon/JitILBase_Branch.cpp | 42 +++++++++++----- 4 files changed, 101 insertions(+), 16 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index e87d24ff76..94d61dc761 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -665,6 +665,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case FDNeg: case ConvertFromFastCR: case ConvertToFastCR: + case FastCRSOSet: + case FastCREQSet: + case FastCRGTSet: + case FastCRLTSet: if (thisUsed) regMarkUse(RI, I, getOp1(I), 1); break; @@ -1209,6 +1213,52 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } + case FastCRSOSet: + { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->MOV(64, R(RAX), Imm64(1ull << 61)); + Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); + Jit->SETcc(CC_NZ, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); + RI.regs[reg] = I; + regNormalRegClear(RI, I); + break; + } + case FastCREQSet: + { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0)); + Jit->SETcc(CC_Z, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); + RI.regs[reg] = I; + regNormalRegClear(RI, I); + break; + } + case FastCRGTSet: + { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0)); + Jit->SETcc(CC_G, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); + RI.regs[reg] = I; + regNormalRegClear(RI, I); + break; + } + case FastCRLTSet: + { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->MOV(64, R(RAX), Imm64(1ull << 62)); + Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); + Jit->SETcc(CC_NZ, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); + RI.regs[reg] = I; + regNormalRegClear(RI, I); + break; + } case LoadSingle: { if (!thisUsed) break; X64Reg reg = fregFindFreeReg(RI); diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index 032b953d3e..4125b21d0b 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -1065,9 +1065,9 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const { numberOfOperands[CInt16] = 0; numberOfOperands[CInt32] = 0; - static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, }; - static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR}; - static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, }; + static unsigned ZeroOp[] = { LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, }; + static unsigned UOp[] = { StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR, FastCRSOSet, FastCREQSet, FastCRGTSet, FastCRLTSet, }; + static unsigned BiOp[] = { BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, }; for (auto& op : ZeroOp) { numberOfOperands[op] = 0; } @@ -1174,6 +1174,7 @@ static const std::string opcodeNames[] = { "Not", "Load8", "Load16", "Load32", "BranchUncond", "ConvertFromFastCR", "ConvertToFastCR", "StoreGReg", "StoreCR", "StoreLink", "StoreCarry", "StoreCTR", "StoreMSR", "StoreFPRF", "StoreGQR", "StoreSRR", + "FastCRSOSet", "FastCREQSet", "FastCRGTSet", "FastCRLTSet", "FallBackToInterpreter", "Add", "Mul", "And", "Or", "Xor", "MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol", "ICmpCRSigned", "ICmpCRUnsigned", "ICmpEq", "ICmpNe", "ICmpUgt", diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.h b/Source/Core/Core/PowerPC/JitILCommon/IR.h index c09de3b9b2..37887891e1 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.h +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.h @@ -48,6 +48,11 @@ enum Opcode { StoreFPRF, StoreGQR, StoreSRR, + // Branch conditions + FastCRSOSet, + FastCREQSet, + FastCRGTSet, + FastCRLTSet, // Arbitrary interpreter instruction FallBackToInterpreter, @@ -77,6 +82,7 @@ enum Opcode { ICmpSlt, ICmpSge, ICmpSle, // Opposite of sgt + // Memory store operators Store8, Store16, @@ -382,6 +388,18 @@ public: InstLoc EmitConvertToFastCR(InstLoc op1) { return FoldUOp(ConvertToFastCR, op1); } + InstLoc EmitFastCRSOSet(InstLoc op1) { + return FoldUOp(FastCRSOSet, op1); + } + InstLoc EmitFastCREQSet(InstLoc op1) { + return FoldUOp(FastCREQSet, op1); + } + InstLoc EmitFastCRLTSet(InstLoc op1) { + return FoldUOp(FastCRLTSet, op1); + } + InstLoc EmitFastCRGTSet(InstLoc op1) { + return FoldUOp(FastCRGTSet, op1); + } InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) { return FoldBiOp(FallBackToInterpreter, op1, op2); } diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp index 9cb80059ec..c60080e6a9 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp @@ -61,16 +61,35 @@ void JitILBase::bx(UGeckoInstruction inst) ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination)); } +static IREmitter::InstLoc EmitCRTest(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) +{ + IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); + IREmitter::InstLoc CRTest; + switch (3 - (inst.BI & 3)) + { + case CR_SO_BIT: + CRTest = ibuild.EmitFastCRSOSet(CRReg); + break; + case CR_EQ_BIT: + CRTest = ibuild.EmitFastCREQSet(CRReg); + break; + case CR_GT_BIT: + CRTest = ibuild.EmitFastCRGTSet(CRReg); + break; + case CR_LT_BIT: + CRTest = ibuild.EmitFastCRLTSet(CRReg); + break; + } + if (!(inst.BO & 8)) + CRTest = ibuild.EmitXor(CRTest, ibuild.EmitIntConst(1)); + return CRTest; +} + static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) { IREmitter::InstLoc CRTest = nullptr, CTRTest = nullptr; if ((inst.BO & 16) == 0) // Test a CR bit { - IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); - CRReg = ibuild.EmitConvertFromFastCR(CRReg); - IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); - CRTest = ibuild.EmitAnd(CRReg, CRCmp); - if (!(inst.BO & 8)) - CRTest = ibuild.EmitXor(CRCmp, CRTest); + CRTest = EmitCRTest(ibuild, inst); } if ((inst.BO & 4) == 0) { @@ -141,13 +160,10 @@ void JitILBase::bcctrx(UGeckoInstruction inst) IREmitter::InstLoc test; if ((inst.BO & 16) == 0) // Test a CR bit { - IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); - CRReg = ibuild.EmitConvertFromFastCR(CRReg); - IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); - test = ibuild.EmitAnd(CRReg, CRCmp); - if (!(inst.BO & 8)) - test = ibuild.EmitXor(test, CRCmp); - } else { + test = EmitCRTest(ibuild, inst); + } + else + { test = ibuild.EmitIntConst(1); } test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0)); From c8dd557dde01cab79e22d8c338fc25a2018afd2f Mon Sep 17 00:00:00 2001 From: magumagu Date: Mon, 23 Jun 2014 22:17:45 -0700 Subject: [PATCH 07/11] JITIL: compare instruction folding. --- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 2 +- Source/Core/Core/PowerPC/JitILCommon/IR.cpp | 53 +++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 94d61dc761..b1dba1c913 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -1721,7 +1721,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case ICmpNe: flag = CC_E; break; case ICmpUgt: flag = CC_BE; break; case ICmpUlt: flag = CC_AE; break; - case ICmpUge: flag = CC_L; break; + case ICmpUge: flag = CC_B; break; case ICmpUle: flag = CC_A; break; case ICmpSgt: flag = CC_LE; break; case ICmpSlt: flag = CC_GE; break; diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index 4125b21d0b..90450ea652 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -356,6 +356,25 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) { return getOp1(Op1); } } + if (Opcode == FastCRGTSet) + { + if (getOpcode(*Op1) == ICmpCRSigned) + return EmitICmpSgt(getOp1(Op1), getOp2(Op1)); + if (getOpcode(*Op1) == ICmpCRUnsigned) + return EmitICmpUgt(getOp1(Op1), getOp2(Op1)); + } + if (Opcode == FastCRLTSet) + { + if (getOpcode(*Op1) == ICmpCRSigned) + return EmitICmpSlt(getOp1(Op1), getOp2(Op1)); + if (getOpcode(*Op1) == ICmpCRUnsigned) + return EmitICmpUlt(getOp1(Op1), getOp2(Op1)); + } + if (Opcode == FastCREQSet) + { + if (getOpcode(*Op1) == ICmpCRSigned || getOpcode(*Op1) == ICmpCRUnsigned) + return EmitICmpEq(getOp1(Op1), getOp2(Op1)); + } return EmitUOp(Opcode, Op1, extra); } @@ -778,6 +797,35 @@ InstLoc IRBuilder::FoldOr(InstLoc Op1, InstLoc Op2) { return EmitBiOp(Or, Op1, Op2); } +static unsigned ICmpInverseOp(unsigned op) +{ + switch (op) + { + case ICmpEq: + return ICmpNe; + case ICmpNe: + return ICmpEq; + case ICmpUlt: + return ICmpUge; + case ICmpUgt: + return ICmpUle; + case ICmpUle: + return ICmpUgt; + case ICmpUge: + return ICmpUlt; + case ICmpSlt: + return ICmpSge; + case ICmpSgt: + return ICmpSle; + case ICmpSle: + return ICmpSgt; + case ICmpSge: + return ICmpSlt; + } + PanicAlert("Bad opcode"); + return Nop; +} + InstLoc IRBuilder::FoldXor(InstLoc Op1, InstLoc Op2) { simplifyCommutative(Xor, Op1, Op2); @@ -794,6 +842,11 @@ InstLoc IRBuilder::FoldXor(InstLoc Op1, InstLoc Op2) { GetImmValue(getOp2(Op1)); return FoldXor(getOp1(Op1), EmitIntConst(RHS)); } + if (isICmp(getOpcode(*Op1)) && GetImmValue(Op2) == 1) + { + return FoldBiOp(ICmpInverseOp(getOpcode(*Op1)), getOp1(Op1), getOp2(Op1)); + + } } if (Op1 == Op2) return EmitIntConst(0); From 79ecdf5fd07d9ef2effcc7963cfeb74a14324ce0 Mon Sep 17 00:00:00 2001 From: magumagu Date: Mon, 23 Jun 2014 22:36:21 -0700 Subject: [PATCH 08/11] JitIL: Misc small optimizations. --- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index b1dba1c913..0bc9e8ec74 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -763,9 +763,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regMarkUse(RI, I, getOp1(getOp1(I)), 1); break; case BranchCond: { - if (isICmp(*getOp1(I)) && - isImm(*getOp2(getOp1(I)))) { + if (isICmp(*getOp1(I))) { regMarkUse(RI, I, getOp1(getOp1(I)), 1); + if (!isImm(*getOp2(getOp1(I)))) + regMarkUse(RI, I, getOp2(getOp1(I)), 2); } else { regMarkUse(RI, I, getOp1(I), 1); } @@ -861,9 +862,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { break; } case StoreCR: { - Jit->MOV(64, R(RCX), regLocForInst(RI, getOp1(I))); + X64Reg reg = regEnsureInReg(RI, getOp1(I)); unsigned ppcreg = *I >> 16; - Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(RCX)); + Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(reg)); regNormalRegClear(RI, I); break; } @@ -1711,10 +1712,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { } case BranchCond: { - if (isICmp(*getOp1(I)) && - isImm(*getOp2(getOp1(I)))) { - Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))), - Imm32(RI.Build->GetImmValue(getOp2(getOp1(I))))); + if (isICmp(*getOp1(I))) { + regEmitCmp(RI, getOp1(I)); CCFlags flag; switch (getOpcode(*getOp1(I))) { case ICmpEq: flag = CC_NE; break; @@ -1734,7 +1733,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->SetJumpTarget(cont); if (RI.IInfo[I - RI.FirstI] & 4) regClearInst(RI, getOp1(getOp1(I))); - } else { + if (RI.IInfo[I - RI.FirstI] & 8) + regClearInst(RI, getOp2(getOp1(I))); + } + else { Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0)); FixupBranch cont = Jit->J_CC(CC_Z); regWriteExit(RI, getOp2(I)); From f27940478deeb2ad43ee6ad0ecefbb67e3e00107 Mon Sep 17 00:00:00 2001 From: magumagu Date: Thu, 26 Jun 2014 15:22:37 -0700 Subject: [PATCH 09/11] JitIL: Attempt to constant-fold more aggressively. --- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 8 +++++++- Source/Core/Core/PowerPC/JitILCommon/IR.cpp | 22 +++++++++++++++++++-- Source/Core/Core/PowerPC/JitILCommon/IR.h | 8 +++++--- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 0bc9e8ec74..0870a0c32d 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -1684,7 +1684,13 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case CInt16: { if (!thisUsed) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), Imm32(ibuild->GetImmValue(I))); + u64 val = ibuild->GetImmValue64(I); + if ((u32)val == val) + Jit->MOV(32, R(reg), Imm32(val)); + else if ((s32)val == val) + Jit->MOV(64, R(reg), Imm32(val)); + else + Jit->MOV(64, R(reg), Imm64(val)); RI.regs[reg] = I; break; } diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index 90450ea652..8bb10142d1 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -362,6 +362,8 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) { return EmitICmpSgt(getOp1(Op1), getOp2(Op1)); if (getOpcode(*Op1) == ICmpCRUnsigned) return EmitICmpUgt(getOp1(Op1), getOp2(Op1)); + if (isImm(*Op1)) + return EmitIntConst((s64)GetImmValue64(Op1) > 0); } if (Opcode == FastCRLTSet) { @@ -369,11 +371,15 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) { return EmitICmpSlt(getOp1(Op1), getOp2(Op1)); if (getOpcode(*Op1) == ICmpCRUnsigned) return EmitICmpUlt(getOp1(Op1), getOp2(Op1)); + if (isImm(*Op1)) + return EmitIntConst(!!(GetImmValue64(Op1) & (1ull << 62))); } if (Opcode == FastCREQSet) { if (getOpcode(*Op1) == ICmpCRSigned || getOpcode(*Op1) == ICmpCRUnsigned) return EmitICmpEq(getOp1(Op1), getOp2(Op1)); + if (isImm(*Op1)) + return EmitIntConst((GetImmValue64(Op1) & 0xFFFFFFFFU) == 0); } return EmitUOp(Opcode, Op1, extra); @@ -982,10 +988,22 @@ InstLoc IRBuilder::FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2) { } InstLoc IRBuilder::FoldICmpCRSigned(InstLoc Op1, InstLoc Op2) { + if (isImm(*Op1)) { + if (isImm(*Op2)) { + s64 diff = (s64)(s32)GetImmValue(Op1) - (s64)(s32)GetImmValue(Op2); + return EmitIntConst64((u64)diff); + } + } return EmitBiOp(ICmpCRSigned, Op1, Op2); } InstLoc IRBuilder::FoldICmpCRUnsigned(InstLoc Op1, InstLoc Op2) { + if (isImm(*Op1)) { + if (isImm(*Op2)) { + u64 diff = (u64)GetImmValue(Op1) - (u64)GetImmValue(Op2); + return EmitIntConst64(diff); + } + } return EmitBiOp(ICmpCRUnsigned, Op1, Op2); } @@ -1045,7 +1063,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned } } -InstLoc IRBuilder::EmitIntConst(unsigned value) { +InstLoc IRBuilder::EmitIntConst64(u64 value) { InstLoc curIndex = InstList.data() + InstList.size(); InstList.push_back(CInt32 | ((unsigned int)ConstList.size() << 8)); MarkUsed.push_back(false); @@ -1053,7 +1071,7 @@ InstLoc IRBuilder::EmitIntConst(unsigned value) { return curIndex; } -unsigned IRBuilder::GetImmValue(InstLoc I) const { +u64 IRBuilder::GetImmValue64(InstLoc I) const { return ConstList[*I >> 8]; } diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.h b/Source/Core/Core/PowerPC/JitILCommon/IR.h index 37887891e1..58202ffd9b 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.h +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.h @@ -246,7 +246,8 @@ private: unsigned ComputeKnownZeroBits(InstLoc I) const; public: - InstLoc EmitIntConst(unsigned value); + InstLoc EmitIntConst(unsigned value) { return EmitIntConst64(value); } + InstLoc EmitIntConst64(u64 value); InstLoc EmitStoreLink(InstLoc val) { return FoldUOp(StoreLink, val); } @@ -559,7 +560,8 @@ public: InstLoc getFirstInst() { return InstList.data(); } unsigned int getNumInsts() { return (unsigned int)InstList.size(); } unsigned int ReadInst(InstLoc I) { return *I; } - unsigned int GetImmValue(InstLoc I) const; + unsigned int GetImmValue(InstLoc I) const { return (u32)GetImmValue64(I); } + u64 GetImmValue64(InstLoc I) const; void SetMarkUsed(InstLoc I); bool IsMarkUsed(InstLoc I) const; void WriteToFile(u64 codeHash); @@ -598,7 +600,7 @@ private: std::vector InstList; // FIXME: We must ensure this is continuous! std::vector MarkUsed; // Used for IRWriter - std::vector ConstList; + std::vector ConstList; InstLoc curReadPtr; InstLoc GRegCache[32]; InstLoc GRegCacheStore[32]; From 3627bd21f1fc513f5bafd07bed4de9d9b119a583 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 4 Jul 2014 03:43:55 -0500 Subject: [PATCH 10/11] Remove JitArmIL files from the project. Due to how the new CR-flags work, it isn't possible without some hefty work in the JITIL backend to support this on 32bit systems. --- Source/Android/res/values-ja/strings.xml | 1 - Source/Android/res/values/arrays.xml | 2 - Source/Android/res/values/strings.xml | 1 - Source/Core/Core/CMakeLists.txt | 6 - Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp | 744 ------------------ Source/Core/Core/PowerPC/JitArmIL/IR_Arm.h | 1 - Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp | 345 -------- Source/Core/Core/PowerPC/JitArmIL/JitIL.h | 91 --- .../Core/Core/PowerPC/JitArmIL/JitILAsm.cpp | 106 --- Source/Core/Core/PowerPC/JitArmIL/JitILAsm.h | 27 - .../Core/PowerPC/JitArmIL/JitIL_Branch.cpp | 168 ---- .../Core/PowerPC/JitArmIL/JitIL_Tables.cpp | 492 ------------ .../Core/Core/PowerPC/JitArmIL/JitIL_Tables.h | 14 - Source/Core/Core/PowerPC/JitInterface.cpp | 12 - 14 files changed, 2010 deletions(-) delete mode 100644 Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp delete mode 100644 Source/Core/Core/PowerPC/JitArmIL/IR_Arm.h delete mode 100644 Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp delete mode 100644 Source/Core/Core/PowerPC/JitArmIL/JitIL.h delete mode 100644 Source/Core/Core/PowerPC/JitArmIL/JitILAsm.cpp delete mode 100644 Source/Core/Core/PowerPC/JitArmIL/JitILAsm.h delete mode 100644 Source/Core/Core/PowerPC/JitArmIL/JitIL_Branch.cpp delete mode 100644 Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.cpp delete mode 100644 Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.h diff --git a/Source/Android/res/values-ja/strings.xml b/Source/Android/res/values-ja/strings.xml index 49502bf86d..84b6c24f58 100644 --- a/Source/Android/res/values-ja/strings.xml +++ b/Source/Android/res/values-ja/strings.xml @@ -137,7 +137,6 @@ JIT64 Recompiler JITIL Recompiler JIT ARM Recompiler - JITIL ARM Recompiler CPU CPUコア %s diff --git a/Source/Android/res/values/arrays.xml b/Source/Android/res/values/arrays.xml index 4aaedd6370..4233c194be 100644 --- a/Source/Android/res/values/arrays.xml +++ b/Source/Android/res/values/arrays.xml @@ -19,12 +19,10 @@ @string/interpreter @string/jit_arm_recompiler - @string/jitil_arm_recompiler 0 3 - 4 diff --git a/Source/Android/res/values/strings.xml b/Source/Android/res/values/strings.xml index 88d89dbb85..92e3094121 100644 --- a/Source/Android/res/values/strings.xml +++ b/Source/Android/res/values/strings.xml @@ -138,7 +138,6 @@ JIT64 Recompiler JITIL Recompiler JIT ARM Recompiler - JITIL ARM Recompiler CPU CPU Core %s diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 53b3881a04..77f778c877 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -218,12 +218,6 @@ if(_M_ARM_32) PowerPC/JitArm32/JitArm_LoadStorePaired.cpp PowerPC/JitArm32/JitArm_SystemRegisters.cpp PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp - #JitArmIL - PowerPC/JitArmIL/JitIL.cpp - PowerPC/JitArmIL/JitILAsm.cpp - PowerPC/JitArmIL/JitIL_Tables.cpp - PowerPC/JitArmIL/JitIL_Branch.cpp - PowerPC/JitArmIL/IR_Arm.cpp ) endif() diff --git a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp b/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp deleted file mode 100644 index 7847c6f953..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp +++ /dev/null @@ -1,744 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include - -#include "Common/ArmEmitter.h" -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/PPCTables.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" -#include "Core/PowerPC/JitArmIL/JitILAsm.h" -#include "Core/PowerPC/JitILCommon/IR.h" - -using namespace IREmitter; -using namespace ArmGen; -static const unsigned int MAX_NUMBER_OF_REGS = 32; - -struct RegInfo { - JitArmIL *Jit; - IRBuilder* Build; - InstLoc FirstI; - std::vector IInfo; - std::vector lastUsed; - InstLoc regs[MAX_NUMBER_OF_REGS]; - InstLoc fregs[MAX_NUMBER_OF_REGS]; - unsigned numSpills; - unsigned numFSpills; - unsigned exitNumber; - - RegInfo(JitArmIL* j, InstLoc f, unsigned insts) : Jit(j), FirstI(f), IInfo(insts), lastUsed(insts) { - for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) { - regs[i] = 0; - fregs[i] = 0; - } - numSpills = 0; - numFSpills = 0; - exitNumber = 0; - } - - private: - RegInfo(RegInfo&); // DO NOT IMPLEMENT -}; - -static const ARMReg RegAllocOrder[] = {R0, R1, R2, R3, R4, R5, R6, R7, R8}; -static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(ARMReg); - -static unsigned SlotSet[1000]; - -static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) { - unsigned& info = R.IInfo[Op - R.FirstI]; - if (info == 0) R.IInfo[I - R.FirstI] |= 1 << (OpNum + 1); - if (info < 2) info++; - R.lastUsed[Op - R.FirstI] = std::max(R.lastUsed[Op - R.FirstI], I); -} -static void regClearInst(RegInfo& RI, InstLoc I) { - for (int i = 0; i < RegAllocSize; i++) - if (RI.regs[RegAllocOrder[i]] == I) - RI.regs[RegAllocOrder[i]] = 0; -} -static void regNormalRegClear(RegInfo& RI, InstLoc I) { - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); - if (RI.IInfo[I - RI.FirstI] & 8) - regClearInst(RI, getOp2(I)); -} - -static unsigned regReadUse(RegInfo& R, InstLoc I) { - return R.IInfo[I - R.FirstI] & 3; -} - -static u32 regLocForSlot(RegInfo& RI, unsigned slot) { - return (u32)&SlotSet[slot - 1]; -} - -static unsigned regCreateSpill(RegInfo& RI, InstLoc I) { - unsigned newSpill = ++RI.numSpills; - RI.IInfo[I - RI.FirstI] |= newSpill << 16; - return newSpill; -} - -static unsigned regGetSpill(RegInfo& RI, InstLoc I) { - return RI.IInfo[I - RI.FirstI] >> 16; -} - -static void regSpill(RegInfo& RI, ARMReg reg) { - if (!RI.regs[reg]) return; - unsigned slot = regGetSpill(RI, RI.regs[reg]); - if (!slot) { - slot = regCreateSpill(RI, RI.regs[reg]); - RI.Jit->MOVI2R(R14, regLocForSlot(RI, slot)); - RI.Jit->STR(reg, R14, 0); - } - RI.regs[reg] = 0; -} - -static ARMReg regFindFreeReg(RegInfo& RI) { - for (int i = 0; i < RegAllocSize; i++) - if (RI.regs[RegAllocOrder[i]] == 0) - return RegAllocOrder[i]; - - int bestIndex = -1; - InstLoc bestEnd = 0; - for (int i = 0; i < RegAllocSize; ++i) { - const InstLoc start = RI.regs[RegAllocOrder[i]]; - const InstLoc end = RI.lastUsed[start - RI.FirstI]; - if (bestEnd < end) { - bestEnd = end; - bestIndex = i; - } - } - - ARMReg reg = RegAllocOrder[bestIndex]; - regSpill(RI, reg); - return reg; -} -static ARMReg regLocForInst(RegInfo& RI, InstLoc I) { - for (int i = 0; i < RegAllocSize; i++) - if (RI.regs[RegAllocOrder[i]] == I) - return RegAllocOrder[i]; - - if (regGetSpill(RI, I) == 0) - PanicAlert("Retrieving unknown spill slot?!"); - RI.Jit->MOVI2R(R14, regLocForSlot(RI, regGetSpill(RI, I))); - ARMReg reg = regFindFreeReg(RI); - RI.Jit->LDR(reg, R14, 0); - return reg; -} -static ARMReg regBinLHSReg(RegInfo& RI, InstLoc I) { - ARMReg reg = regFindFreeReg(RI); - RI.Jit->MOV(reg, regLocForInst(RI, getOp1(I))); - return reg; -} - -// If the lifetime of the register used by an operand ends at I, -// return the register. Otherwise return a free register. -static ARMReg regBinReg(RegInfo& RI, InstLoc I) { - // FIXME: When regLocForInst() is extracted as a local variable, - // "Retrieving unknown spill slot?!" is shown. - if (RI.IInfo[I - RI.FirstI] & 4) - return regLocForInst(RI, getOp1(I)); - else if (RI.IInfo[I - RI.FirstI] & 8) - return regLocForInst(RI, getOp2(I)); - - return regFindFreeReg(RI); -} - -static void regSpillCallerSaved(RegInfo& RI) { - regSpill(RI, R0); - regSpill(RI, R1); - regSpill(RI, R2); - regSpill(RI, R3); -} - -static ARMReg regEnsureInReg(RegInfo& RI, InstLoc I) { - return regLocForInst(RI, I); -} - -static void regWriteExit(RegInfo& RI, InstLoc dest) { - if (isImm(*dest)) { - RI.exitNumber++; - RI.Jit->WriteExit(RI.Build->GetImmValue(dest)); - } else { - RI.Jit->WriteExitDestInReg(regLocForInst(RI, dest)); - } -} -static void regStoreInstToPPCState(RegInfo& RI, unsigned width, InstLoc I, s32 offset) { - void (JitArmIL::*op)(ARMReg, ARMReg, Operand2, bool); - switch (width) - { - case 32: - op = &JitArmIL::STR; - break; - case 8: - op = &JitArmIL::STRB; - break; - default: - PanicAlert("Not implemented!"); - return; - break; - } - - if (isImm(*I)) { - RI.Jit->MOVI2R(R12, RI.Build->GetImmValue(I)); - (RI.Jit->*op)(R12, R9, offset, true); - return; - } - ARMReg reg = regEnsureInReg(RI, I); - (RI.Jit->*op)(reg, R9, offset, true); -} - -// -// Mark and calculation routines for profiled load/store addresses -// Could be extended to unprofiled addresses. -static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum) { - if (isImm(*AI)) { - unsigned addr = RI.Build->GetImmValue(AI); - if (Memory::IsRAMAddress(addr)) - return; - } - if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) { - regMarkUse(RI, I, getOp1(AI), OpNum); - return; - } - regMarkUse(RI, I, AI, OpNum); -} -// Binary ops -void JitArmIL::BIN_XOR(ARMReg reg, Operand2 op2) -{ - EOR(reg, reg, op2); -} -void JitArmIL::BIN_OR(ARMReg reg, Operand2 op2) -{ - ORR(reg, reg, op2); -} -void JitArmIL::BIN_AND(ARMReg reg, Operand2 op2) -{ - AND(reg, reg, op2); -} -void JitArmIL::BIN_ADD(ARMReg reg, Operand2 op2) -{ - ADD(reg, reg, op2); -} -static void regEmitShiftInst(RegInfo& RI, InstLoc I, void (JitArmIL::*op)(ARMReg, ARMReg, Operand2)) -{ - ARMReg reg = regBinLHSReg(RI, I); - if (isImm(*getOp2(I))) { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - (RI.Jit->*op)(reg, reg, RHS); - RI.regs[reg] = I; - return; - } - (RI.Jit->*op)(reg, reg, regLocForInst(RI, getOp2(I))); - RI.regs[reg] = I; - regNormalRegClear(RI, I); -} - -static void regEmitBinInst(RegInfo& RI, InstLoc I, - void (JitArmIL::*op)(ARMReg, Operand2), - bool commutable = false) { - ARMReg reg; - bool commuted = false; - if (RI.IInfo[I - RI.FirstI] & 4) { - reg = regEnsureInReg(RI, getOp1(I)); - } else if (commutable && (RI.IInfo[I - RI.FirstI] & 8)) { - reg = regEnsureInReg(RI, getOp2(I)); - commuted = true; - } else { - reg = regFindFreeReg(RI); - RI.Jit->MOV(reg, regLocForInst(RI, getOp1(I))); - } - if (isImm(*getOp2(I))) { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - Operand2 RHSop; - if (TryMakeOperand2(RHS, RHSop)) - (RI.Jit->*op)(reg, RHSop); - else - { - RI.Jit->MOVI2R(R12, RHS); - (RI.Jit->*op)(reg, R12); - } - } else if (commuted) { - (RI.Jit->*op)(reg, regLocForInst(RI, getOp1(I))); - } else { - (RI.Jit->*op)(reg, regLocForInst(RI, getOp2(I))); - } - RI.regs[reg] = I; - regNormalRegClear(RI, I); -} -static void regEmitCmp(RegInfo& RI, InstLoc I) { - if (isImm(*getOp2(I))) { - unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - Operand2 op; - if (TryMakeOperand2(RHS, op)) - RI.Jit->CMP(regLocForInst(RI, getOp1(I)), op); - else - { - RI.Jit->MOVI2R(R12, RHS); - RI.Jit->CMP(regLocForInst(RI, getOp1(I)), R12); - } - } else { - ARMReg reg = regEnsureInReg(RI, getOp1(I)); - RI.Jit->CMP(reg, regLocForInst(RI, getOp2(I))); - } -} - -static void DoWriteCode(IRBuilder* ibuild, JitArmIL* Jit, u32 exitAddress) { - RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts()); - RI.Build = ibuild; - - // Pass to compute liveness - ibuild->StartBackPass(); - for (unsigned int index = (unsigned int)RI.IInfo.size() - 1; index != -1U; --index) { - InstLoc I = ibuild->ReadBackward(); - unsigned int op = getOpcode(*I); - bool thisUsed = regReadUse(RI, I) ? true : false; - switch (op) { - default: - PanicAlert("Unexpected inst!"); - case Nop: - case CInt16: - case CInt32: - case LoadGReg: - case LoadLink: - case LoadCR: - case LoadCarry: - case LoadCTR: - case LoadMSR: - case LoadFReg: - case LoadFRegDENToZero: - case LoadGQR: - case BlockEnd: - case BlockStart: - case FallBackToInterpreter: - case SystemCall: - case RFIExit: - case InterpreterBranch: - case ShortIdleLoop: - case FPExceptionCheck: - case DSIExceptionCheck: - case ISIException: - case ExtExceptionCheck: - case BreakPointCheck: - case Int3: - case Tramp: - // No liveness effects - break; - case SExt8: - case SExt16: - case BSwap32: - case BSwap16: - case Cntlzw: - case Not: - case DupSingleToMReg: - case DoubleToSingle: - case ExpandPackedToMReg: - case CompactMRegToPacked: - case FPNeg: - case FPDup0: - case FPDup1: - case FSNeg: - case FDNeg: - if (thisUsed) - regMarkUse(RI, I, getOp1(I), 1); - break; - case Load8: - case Load16: - case Load32: - regMarkMemAddress(RI, I, getOp1(I), 1); - break; - case LoadDouble: - case LoadSingle: - case LoadPaired: - if (thisUsed) - regMarkUse(RI, I, getOp1(I), 1); - break; - case StoreCR: - case StoreCarry: - case StoreFPRF: - regMarkUse(RI, I, getOp1(I), 1); - break; - case StoreGReg: - case StoreLink: - case StoreCTR: - case StoreMSR: - case StoreGQR: - case StoreSRR: - case StoreFReg: - if (!isImm(*getOp1(I))) - regMarkUse(RI, I, getOp1(I), 1); - break; - case Add: - case Sub: - case And: - case Or: - case Xor: - case Mul: - case MulHighUnsigned: - case Rol: - case Shl: - case Shrl: - case Sarl: - case ICmpCRUnsigned: - case ICmpCRSigned: - case ICmpEq: - case ICmpNe: - case ICmpUgt: - case ICmpUlt: - case ICmpUge: - case ICmpUle: - case ICmpSgt: - case ICmpSlt: - case ICmpSge: - case ICmpSle: - case FSMul: - case FSAdd: - case FSSub: - case FDMul: - case FDAdd: - case FDSub: - case FPAdd: - case FPMul: - case FPSub: - case FPMerge00: - case FPMerge01: - case FPMerge10: - case FPMerge11: - case FDCmpCR: - case InsertDoubleInMReg: - if (thisUsed) { - regMarkUse(RI, I, getOp1(I), 1); - if (!isImm(*getOp2(I))) - regMarkUse(RI, I, getOp2(I), 2); - } - break; - case Store8: - case Store16: - case Store32: - if (!isImm(*getOp1(I))) - regMarkUse(RI, I, getOp1(I), 1); - regMarkMemAddress(RI, I, getOp2(I), 2); - break; - case StoreSingle: - case StoreDouble: - case StorePaired: - regMarkUse(RI, I, getOp1(I), 1); - regMarkUse(RI, I, getOp2(I), 2); - break; - case BranchUncond: - if (!isImm(*getOp1(I))) - regMarkUse(RI, I, getOp1(I), 1); - break; - case IdleBranch: - regMarkUse(RI, I, getOp1(getOp1(I)), 1); - break; - case BranchCond: { - if (isICmp(*getOp1(I)) && - isImm(*getOp2(getOp1(I)))) { - regMarkUse(RI, I, getOp1(getOp1(I)), 1); - } else { - regMarkUse(RI, I, getOp1(I), 1); - } - if (!isImm(*getOp2(I))) - regMarkUse(RI, I, getOp2(I), 2); - break; - } - } - } - - ibuild->StartForwardPass(); - for (unsigned i = 0; i != RI.IInfo.size(); i++) { - InstLoc I = ibuild->ReadForward(); - - bool thisUsed = regReadUse(RI, I) ? true : false; - if (thisUsed) { - // Needed for IR Writer - ibuild->SetMarkUsed(I); - } - - switch (getOpcode(*I)) { - case CInt32: - case CInt16: { - if (!thisUsed) break; - ARMReg reg = regFindFreeReg(RI); - Jit->MOVI2R(reg, ibuild->GetImmValue(I)); - RI.regs[reg] = I; - break; - } - case BranchUncond: { - regWriteExit(RI, getOp1(I)); - regNormalRegClear(RI, I); - break; - } - case BranchCond: { - if (isICmp(*getOp1(I)) && - isImm(*getOp2(getOp1(I)))) { - unsigned imm = RI.Build->GetImmValue(getOp2(getOp1(I))); - if (imm > 255) - { - Jit->MOVI2R(R14, imm); - Jit->CMP(regLocForInst(RI, getOp1(getOp1(I))), R14); - } - else - Jit->CMP(regLocForInst(RI, getOp1(getOp1(I))), imm); - CCFlags flag; - switch (getOpcode(*getOp1(I))) { - case ICmpEq: flag = CC_NEQ; break; - case ICmpNe: flag = CC_EQ; break; - case ICmpUgt: flag = CC_LS; break; - case ICmpUlt: flag = CC_HI; break; - case ICmpUge: flag = CC_HS; break; - case ICmpUle: flag = CC_LO; break; - case ICmpSgt: flag = CC_LT; break; - case ICmpSlt: flag = CC_GT; break; - case ICmpSge: flag = CC_LE; break; - case ICmpSle: flag = CC_GE; break; - default: PanicAlert("cmpXX"); flag = CC_AL; break; - } - FixupBranch cont = Jit->B_CC(flag); - regWriteExit(RI, getOp2(I)); - Jit->SetJumpTarget(cont); - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(getOp1(I))); - } else { - Jit->CMP(regLocForInst(RI, getOp1(I)), 0); - FixupBranch cont = Jit->B_CC(CC_EQ); - regWriteExit(RI, getOp2(I)); - Jit->SetJumpTarget(cont); - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); - } - if (RI.IInfo[I - RI.FirstI] & 8) - regClearInst(RI, getOp2(I)); - break; - } - - case StoreGReg: { - unsigned ppcreg = *I >> 16; - regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(gpr[ppcreg])); - regNormalRegClear(RI, I); - break; - } - case StoreCR: { - unsigned ppcreg = *I >> 16; - regStoreInstToPPCState(RI, 8, getOp1(I), PPCSTATE_OFF(cr_fast[ppcreg])); - regNormalRegClear(RI, I); - break; - } - case StoreLink: { - regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(spr[SPR_LR])); - regNormalRegClear(RI, I); - break; - } - case StoreCTR: { - regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(spr[SPR_CTR])); - regNormalRegClear(RI, I); - break; - } - case StoreMSR: { - regStoreInstToPPCState(RI, 32, getOp1(I), PPCSTATE_OFF(msr)); - regNormalRegClear(RI, I); - break; - } - case LoadGReg: { - if (!thisUsed) break; - ARMReg reg = regFindFreeReg(RI); - unsigned ppcreg = *I >> 8; - Jit->LDR(reg, R9, PPCSTATE_OFF(gpr[ppcreg])); - RI.regs[reg] = I; - break; - } - case LoadCR: { - if (!thisUsed) break; - ARMReg reg = regFindFreeReg(RI); - unsigned ppcreg = *I >> 8; - Jit->LDRB(reg, R9, PPCSTATE_OFF(cr_fast[ppcreg])); - RI.regs[reg] = I; - break; - } - case LoadCTR: { - if (!thisUsed) break; - ARMReg reg = regFindFreeReg(RI); - Jit->LDR(reg, R9, PPCSTATE_OFF(spr[SPR_CTR])); - RI.regs[reg] = I; - break; - } - case LoadLink: { - if (!thisUsed) break; - ARMReg reg = regFindFreeReg(RI); - Jit->LDR(reg, R9, PPCSTATE_OFF(spr[SPR_LR])); - RI.regs[reg] = I; - break; - } - case FallBackToInterpreter: { - unsigned InstCode = ibuild->GetImmValue(getOp1(I)); - unsigned InstLoc = ibuild->GetImmValue(getOp2(I)); - // There really shouldn't be anything live across an - // interpreter call at the moment, but optimizing interpreter - // calls isn't completely out of the question... - regSpillCallerSaved(RI); - Jit->MOVI2R(R14, InstLoc); - Jit->STR(R14, R9, PPCSTATE_OFF(pc)); - Jit->MOVI2R(R14, InstLoc + 4); - Jit->STR(R14, R9, PPCSTATE_OFF(npc)); - - Jit->MOVI2R(R0, InstCode); - Jit->MOVI2R(R14, (u32)GetInterpreterOp(InstCode)); - Jit->BL(R14); - break; - } - case SystemCall: { - unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->MOVI2R(R14, InstLoc + 4); - Jit->STR(R14, R9, PPCSTATE_OFF(pc)); - Jit->LDR(R14, R9, PPCSTATE_OFF(Exceptions)); - Jit->ORR(R14, R14, EXCEPTION_SYSCALL); - Jit->STR(R14, R9, PPCSTATE_OFF(Exceptions)); - Jit->WriteExceptionExit(); - break; - } - case ShortIdleLoop: { - unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->MOVI2R(R14, (u32)&CoreTiming::Idle); - Jit->BL(R14); - Jit->MOVI2R(R14, InstLoc); - Jit->STR(R14, R9, PPCSTATE_OFF(pc)); - Jit->WriteExceptionExit(); - break; - } - case InterpreterBranch: { - Jit->LDR(R14, R9, PPCSTATE_OFF(npc)); - Jit->WriteExitDestInReg(R14); - break; - } - case RFIExit: { - const u32 mask = 0x87C0FFFF; - const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] - // MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13; - // R0 = MSR location - // R1 = MSR contents - // R2 = Mask - // R3 = Mask - ARMReg rA = R14; - ARMReg rB = R12; - ARMReg rC = R11; - ARMReg rD = R10; - Jit->MOVI2R(rB, (~mask) & clearMSR13); - Jit->MOVI2R(rC, mask & clearMSR13); - - Jit->LDR(rD, R9, PPCSTATE_OFF(msr)); - - Jit->AND(rD, rD, rB); // rD = Masked MSR - - Jit->LDR(rB, R9, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here - - Jit->AND(rB, rB, rC); // rB contains masked SRR1 here - Jit->ORR(rB, rD, rB); // rB = Masked MSR OR masked SRR1 - - Jit->STR(rB, R9, PPCSTATE_OFF(msr)); // STR rB in to rA - - Jit->LDR(rA, R9, PPCSTATE_OFF(spr[SPR_SRR0])); - - Jit->WriteRfiExitDestInR(rA); // rA gets unlocked here - break; - } - case Shl: { - if (!thisUsed) break; - regEmitShiftInst(RI, I, &JitArmIL::LSL); - break; - } - case Shrl: { - if (!thisUsed) break; - regEmitShiftInst(RI, I, &JitArmIL::LSR); - break; - } - case Sarl: { - if (!thisUsed) break; - regEmitShiftInst(RI, I, &JitArmIL::ASR); - break; - } - case And: { - if (!thisUsed) break; - regEmitBinInst(RI, I, &JitArmIL::BIN_AND, true); - break; - } - case Not: { - if (!thisUsed) break; - ARMReg reg = regBinLHSReg(RI, I); - Jit->MVN(reg, reg); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case Or: { - if (!thisUsed) break; - regEmitBinInst(RI, I, &JitArmIL::BIN_OR, true); - break; - } - case Xor: { - if (!thisUsed) break; - regEmitBinInst(RI, I, &JitArmIL::BIN_XOR, true); - break; - } - case Add: { - if (!thisUsed) break; - regEmitBinInst(RI, I, &JitArmIL::BIN_ADD, true); - break; - } - case ICmpCRUnsigned: { - if (!thisUsed) break; - regEmitCmp(RI, I); - ARMReg reg = regBinReg(RI, I); - Jit->MOV(reg, 0x2); // Result == 0 - Jit->SetCC(CC_LO); Jit->MOV(reg, 0x8); // Result < 0 - Jit->SetCC(CC_HI); Jit->MOV(reg, 0x4); // Result > 0 - Jit->SetCC(); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - - case ICmpCRSigned: { - if (!thisUsed) break; - regEmitCmp(RI, I); - ARMReg reg = regBinReg(RI, I); - Jit->MOV(reg, 0x2); // Result == 0 - Jit->SetCC(CC_LT); Jit->MOV(reg, 0x8); // Result < 0 - Jit->SetCC(CC_GT); Jit->MOV(reg, 0x4); // Result > 0 - Jit->SetCC(); - RI.regs[reg] = I; - regNormalRegClear(RI, I); - break; - } - case Int3: - Jit->BKPT(0x321); - break; - case Tramp: break; - case Nop: break; - default: - PanicAlert("Unknown JIT instruction; aborting!"); - ibuild->WriteToFile(0); - exit(1); - } - } - for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) { - if (RI.regs[i]) { - // Start a game in Burnout 2 to get this. Or animal crossing. - PanicAlert("Incomplete cleanup! (regs)"); - exit(1); - } - if (RI.fregs[i]) { - PanicAlert("Incomplete cleanup! (fregs)"); - exit(1); - } - } - - Jit->WriteExit(exitAddress); - Jit->BKPT(0x111); - -} -void JitArmIL::WriteCode(u32 exitAddress) { - DoWriteCode(&ibuild, this, exitAddress); -} diff --git a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.h b/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.h deleted file mode 100644 index 8b13789179..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.h +++ /dev/null @@ -1 +0,0 @@ - diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp b/Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp deleted file mode 100644 index e1a711e432..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp +++ /dev/null @@ -1,345 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include - -#include "Common/ArmEmitter.h" -#include "Common/Common.h" -#include "Core/ConfigManager.h" -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/PatchEngine.h" -#include "Core/HLE/HLE.h" -#include "Core/HW/GPFifo.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/JitInterface.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCAnalyst.h" -#include "Core/PowerPC/PPCTables.h" -#include "Core/PowerPC/Profiler.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" -#include "Core/PowerPC/JitArmIL/JitIL_Tables.h" - -using namespace ArmGen; -using namespace PowerPC; - -static int CODE_SIZE = 1024*1024*32; - -void JitArmIL::Init() -{ - AllocCodeSpace(CODE_SIZE); - blocks.Init(); - asm_routines.Init(); - - code_block.m_stats = &js.st; - code_block.m_gpa = &js.gpa; - code_block.m_fpa = &js.fpa; -} - -void JitArmIL::ClearCache() -{ - ClearCodeSpace(); - blocks.Clear(); -} - -void JitArmIL::Shutdown() -{ - FreeCodeSpace(); - blocks.Shutdown(); - asm_routines.Shutdown(); -} -void JitArmIL::unknown_instruction(UGeckoInstruction inst) -{ - // CCPU::Break(); - PanicAlert("unknown_instruction %08x - Fix me ;)", inst.hex); -} - -void JitArmIL::FallBackToInterpreter(UGeckoInstruction _inst) -{ - ibuild.EmitFallBackToInterpreter( - ibuild.EmitIntConst(_inst.hex), - ibuild.EmitIntConst(js.compilerPC)); -} - -void JitArmIL::HLEFunction(UGeckoInstruction _inst) -{ - // XXX -} - -void JitArmIL::DoNothing(UGeckoInstruction _inst) -{ - // Yup, just don't do anything. -} -void JitArmIL::Break(UGeckoInstruction _inst) -{ - ibuild.EmitINT3(); -} - -void JitArmIL::DoDownCount() -{ - ARMReg rA = R12; - LDR(rA, R9, PPCSTATE_OFF(downcount)); - if (js.downcountAmount < 255) // We can enlarge this if we used rotations - { - SUBS(rA, rA, js.downcountAmount); - } - else - { - ARMReg rB = R11; - MOVI2R(rB, js.downcountAmount); - SUBS(rA, rA, rB); - } - STR(rA, R9, PPCSTATE_OFF(downcount)); -} - -void JitArmIL::WriteExitDestInReg(ARMReg Reg) -{ - STR(Reg, R9, PPCSTATE_OFF(pc)); - DoDownCount(); - MOVI2R(Reg, (u32)asm_routines.dispatcher); - B(Reg); -} - -void JitArmIL::WriteRfiExitDestInR(ARMReg Reg) -{ - STR(Reg, R9, PPCSTATE_OFF(pc)); - DoDownCount(); - - LDR(R0, R9, PPCSTATE_OFF(pc)); - STR(R0, R9, PPCSTATE_OFF(npc)); - QuickCallFunction(R0, (void*)&PowerPC::CheckExceptions); - LDR(R0, R9, PPCSTATE_OFF(npc)); - STR(R0, R9, PPCSTATE_OFF(pc)); - - MOVI2R(R0, (u32)asm_routines.dispatcher); - B(R0); -} -void JitArmIL::WriteExceptionExit() -{ - DoDownCount(); - - LDR(R0, R9, PPCSTATE_OFF(pc)); - STR(R0, R9, PPCSTATE_OFF(npc)); - QuickCallFunction(R0, (void*)&PowerPC::CheckExceptions); - LDR(R0, R9, PPCSTATE_OFF(npc)); - STR(R0, R9, PPCSTATE_OFF(pc)); - - MOVI2R(R0, (u32)asm_routines.dispatcher); - B(R0); -} -void JitArmIL::WriteExit(u32 destination) -{ - DoDownCount(); - //If nobody has taken care of this yet (this can be removed when all branches are done) - JitBlock *b = js.curBlock; - JitBlock::LinkData linkData; - linkData.exitAddress = destination; - linkData.exitPtrs = GetWritableCodePtr(); - linkData.linkStatus = false; - - // Link opportunity! - int block; - if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0) - { - // It exists! Joy of joy! - B(blocks.GetBlock(block)->checkedEntry); - linkData.linkStatus = true; - } - else - { - MOVI2R(R14, destination); - STR(R14, R9, PPCSTATE_OFF(pc)); - MOVI2R(R14, (u32)asm_routines.dispatcher); - B(R14); - } - - b->linkData.push_back(linkData); -} -void JitArmIL::PrintDebug(UGeckoInstruction inst, u32 level) -{ - if (level > 0) - printf("Start: %08x OP '%s' Info\n", (u32)GetCodePtr(), PPCTables::GetInstructionName(inst)); - if (level > 1) - { - GekkoOPInfo* Info = GetOpInfo(inst.hex); - printf("\tOuts\n"); - if (Info->flags & FL_OUT_A) - printf("\t-OUT_A: %x\n", inst.RA); - if (Info->flags & FL_OUT_D) - printf("\t-OUT_D: %x\n", inst.RD); - printf("\tIns\n"); - // A, AO, B, C, S - if (Info->flags & FL_IN_A) - printf("\t-IN_A: %x\n", inst.RA); - if (Info->flags & FL_IN_A0) - printf("\t-IN_A0: %x\n", inst.RA); - if (Info->flags & FL_IN_B) - printf("\t-IN_B: %x\n", inst.RB); - if (Info->flags & FL_IN_C) - printf("\t-IN_C: %x\n", inst.RC); - if (Info->flags & FL_IN_S) - printf("\t-IN_S: %x\n", inst.RS); - } -} - -void STACKALIGN JitArmIL::Run() -{ - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; - pExecAddr(); -} - -void JitArmIL::SingleStep() -{ - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; - pExecAddr(); -} -void STACKALIGN JitArmIL::Jit(u32 em_address) -{ - if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || Core::g_CoreStartupParameter.bJITNoBlockCache) - { - ClearCache(); - } - - int block_num = blocks.AllocateBlock(PowerPC::ppcState.pc); - JitBlock *b = blocks.GetBlock(block_num); - const u8* BlockPtr = DoJit(PowerPC::ppcState.pc, &code_buffer, b); - blocks.FinalizeBlock(block_num, jo.enableBlocklink, BlockPtr); -} - -const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b) -{ - int blockSize = code_buf->GetSize(); - - if (Core::g_CoreStartupParameter.bEnableDebugging) - { - // Comment out the following to disable breakpoints (speed-up) - blockSize = 1; - } - - if (em_address == 0) - { - Core::SetState(Core::CORE_PAUSE); - PanicAlert("ERROR: Compiling at 0. LR=%08x CTR=%08x", LR, CTR); - } - - js.isLastInstruction = false; - js.blockStart = em_address; - js.fifoBytesThisBlock = 0; - js.curBlock = b; - - u32 nextPC = em_address; - // Analyze the block, collect all instructions it is made of (including inlining, - // if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize); - - PPCAnalyst::CodeOp *ops = code_buf->codebuffer; - - const u8 *start = GetCodePtr(); - b->checkedEntry = start; - b->runCount = 0; - - // Downcount flag check, Only valid for linked blocks - { - // XXX - } - - const u8 *normalEntry = GetCodePtr(); - b->normalEntry = normalEntry; - - if (js.fpa.any) - { - // XXX - // This block uses FPU - needs to add FP exception bailout - } - js.rewriteStart = (u8*)GetCodePtr(); - - u64 codeHash = -1; - { - // For profiling and IR Writer - for (u32 i = 0; i < code_block.m_num_instructions; i++) - { - const u64 inst = ops[i].inst.hex; - // Ported from boost::hash - codeHash ^= inst + (codeHash << 6) + (codeHash >> 2); - } - } - - // Conditionally add profiling code. - if (Profiler::g_ProfileBlocks) { - // XXX - } - // Start up IR builder (structure that collects the - // instruction processed by the JIT routines) - ibuild.Reset(); - - js.downcountAmount = 0; - if (!Core::g_CoreStartupParameter.bEnableDebugging) - js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address); - - js.skipnext = false; - js.compilerPC = nextPC; - // Translate instructions - for (u32 i = 0; i < code_block.m_num_instructions; i++) - { - js.compilerPC = ops[i].address; - js.op = &ops[i]; - js.instructionNumber = i; - const GekkoOPInfo *opinfo = ops[i].opinfo; - js.downcountAmount += opinfo->numCycles; - - if (i == (code_block.m_num_instructions - 1)) - { - // WARNING - cmp->branch merging will screw this up. - js.isLastInstruction = true; - js.next_inst = 0; - if (Profiler::g_ProfileBlocks) { - // CAUTION!!! push on stack regs you use, do your stuff, then pop - PROFILER_VPUSH; - // get end tic - PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop); - // tic counter += (end tic - start tic) - PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart); - PROFILER_VPOP; - } - } - else - { - // help peephole optimizations - js.next_inst = ops[i + 1].inst; - js.next_compilerPC = ops[i + 1].address; - } - if (!ops[i].skip) - { - PrintDebug(ops[i].inst, 0); - if (js.memcheck && (opinfo->flags & FL_USE_FPU)) - { - // Don't do this yet - BKPT(0x7777); - } - JitArmILTables::CompileInstruction(ops[i]); - if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) - { - // Don't do this yet - BKPT(0x666); - } - } - } - if (code_block.m_memory_exception) - BKPT(0x500); - - if (code_block.m_broken) - { - printf("Broken Block going to 0x%08x\n", nextPC); - WriteExit(nextPC); - } - - // Perform actual code generation - WriteCode(nextPC); - b->codeSize = (u32)(GetCodePtr() - normalEntry); - b->originalSize = code_block.m_num_instructions;; - - FlushIcache(); - return start; - -} diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitIL.h b/Source/Core/Core/PowerPC/JitArmIL/JitIL.h deleted file mode 100644 index 8260ffe114..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitIL.h +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#pragma once - -#include "Common/ArmEmitter.h" -#include "Core/PowerPC/PPCAnalyst.h" -#include "Core/PowerPC/JitArm32/JitArmCache.h" -#include "Core/PowerPC/JitArmIL/JitILAsm.h" -#include "Core/PowerPC/JitCommon/JitBase.h" -#include "Core/PowerPC/JitILCommon/IR.h" -#include "Core/PowerPC/JitILCommon/JitILBase.h" - -#define PPCSTATE_OFF(elem) ((s32)STRUCT_OFF(PowerPC::ppcState, elem) - (s32)STRUCT_OFF(PowerPC::ppcState, spr[0])) -class JitArmIL : public JitILBase, public ArmGen::ARMCodeBlock -{ -private: - JitArmBlockCache blocks; - JitArmILAsmRoutineManager asm_routines; - - void PrintDebug(UGeckoInstruction inst, u32 level); - void DoDownCount(); - -public: - // Initialization, etc - JitArmIL() {} - ~JitArmIL() {} - - void Init(); - void Shutdown(); - - // Jit! - - void Jit(u32 em_address); - const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b); - - JitBaseBlockCache *GetBlockCache() { return &blocks; } - - const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) { return nullptr; } - - bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); } - - void ClearCache(); - const u8 *GetDispatcher() { - return asm_routines.dispatcher; // asm_routines.dispatcher - } - const CommonAsmRoutinesBase *GetAsmRoutines() { - return &asm_routines; - } - - const char *GetName() { - return "JITARMIL"; - } - - // Run! - - void Run(); - void SingleStep(); - // - void WriteCode(u32 exitAddress); - void WriteExit(u32 destination); - void WriteExitDestInReg(ArmGen::ARMReg Reg); - void WriteRfiExitDestInR(ArmGen::ARMReg Reg); - void WriteExceptionExit(); - - // OPCODES - void unknown_instruction(UGeckoInstruction inst); - void FallBackToInterpreter(UGeckoInstruction inst); - void DoNothing(UGeckoInstruction inst); - void HLEFunction(UGeckoInstruction inst); - void Break(UGeckoInstruction inst); - - void DynaRunTable4(UGeckoInstruction inst); - void DynaRunTable19(UGeckoInstruction inst); - void DynaRunTable31(UGeckoInstruction inst); - void DynaRunTable59(UGeckoInstruction inst); - void DynaRunTable63(UGeckoInstruction inst); - - // Binary ops - void BIN_AND(ArmGen::ARMReg reg, ArmGen::Operand2 op2); - void BIN_XOR(ArmGen::ARMReg reg, ArmGen::Operand2 op2); - void BIN_OR(ArmGen::ARMReg reg, ArmGen::Operand2 op2); - void BIN_ADD(ArmGen::ARMReg reg, ArmGen::Operand2 op2); - - // Branches - void bx(UGeckoInstruction inst); - void bcx(UGeckoInstruction inst); - void bclrx(UGeckoInstruction inst); - void bcctrx(UGeckoInstruction inst); -}; diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.cpp b/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.cpp deleted file mode 100644 index c85bcf8aeb..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.cpp +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include "Common/ArmEmitter.h" -#include "Common/MemoryUtil.h" - -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/HW/GPFifo.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" -#include "Core/PowerPC/JitArmIL/JitILAsm.h" -#include "Core/PowerPC/JitCommon/JitCache.h" - -using namespace ArmGen; - -void JitArmILAsmRoutineManager::Generate() -{ - enterCode = GetCodePtr(); - PUSH(9, R4, R5, R6, R7, R8, R9, R10, R11, _LR); - // Take care to 8-byte align stack for function calls. - // We are misaligned here because of an odd number of args for PUSH. - // It's not like x86 where you need to account for an extra 4 bytes - // consumed by CALL. - SUB(_SP, _SP, 4); - - MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]); - - FixupBranch skipToRealDispatcher = B(); - dispatcher = GetCodePtr(); - printf("ILDispatcher is %p\n", dispatcher); - - // Downcount Check - // The result of slice decrementation should be in flags if somebody jumped here - // IMPORTANT - We jump on negative, not carry!!! - FixupBranch bail = B_CC(CC_MI); - - SetJumpTarget(skipToRealDispatcher); - dispatcherNoCheck = GetCodePtr(); - - // This block of code gets the address of the compiled block of code - // It runs though to the compiling portion if it isn't found - LDR(R12, R9, PPCSTATE_OFF(pc));// Load the current PC into R12 - - Operand2 iCacheMask = Operand2(0xE, 2); // JIT_ICACHE_MASK - BIC(R12, R12, iCacheMask); // R12 contains PC & JIT_ICACHE_MASK here. - - MOVI2R(R14, (u32)jit->GetBlockCache()->iCache); - - LDR(R12, R14, R12); // R12 contains iCache[PC & JIT_ICACHE_MASK] here - // R12 Confirmed this is the correct iCache Location loaded. - TST(R12, 0x80); // Test to see if it is a JIT block. - - SetCC(CC_EQ); - // Success, it is our Jitblock. - MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers()); - // LDR R14 right here to get CodePointers()[0] pointer. - LSL(R12, R12, 2); // Multiply by four because address locations are u32 in size - LDR(R14, R14, R12); // Load the block address in to R14 - - B(R14); - // No need to jump anywhere after here, the block will go back to dispatcher start - SetCC(); - - // If we get to this point, that means that we don't have the block cached to execute - // So call ArmJit to compile the block and then execute it. - MOVI2R(R14, (u32)&Jit); - BL(R14); - - B(dispatcherNoCheck); - - SetJumpTarget(bail); - doTiming = GetCodePtr(); - // XXX: In JIT64, Advance() gets called /after/ the exception checking - // once it jumps back to the start of outerLoop - QuickCallFunction(R14, (void*)&CoreTiming::Advance); - - // Does exception checking - LDR(R0, R9, PPCSTATE_OFF(pc)); - STR(R0, R9, PPCSTATE_OFF(npc)); - QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); - LDR(R0, R9, PPCSTATE_OFF(npc)); - STR(R0, R9, PPCSTATE_OFF(pc)); - // Check the state pointer to see if we are exiting - // Gets checked on every exception check - MOVI2R(R0, (u32)PowerPC::GetStatePtr()); - MVN(R1, 0); - LDR(R0, R0); - TST(R0, R1); - FixupBranch Exit = B_CC(CC_NEQ); - - B(dispatcher); - - SetJumpTarget(Exit); - - ADD(_SP, _SP, 4); - - POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns - - GenerateCommon(); - - FlushIcache(); -} - diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.h b/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.h deleted file mode 100644 index a068ec8076..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitILAsm.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#pragma once - -#include "Common/ArmEmitter.h" -#include "Core/PowerPC/JitCommon/JitAsmCommon.h" - -class JitArmILAsmRoutineManager : public CommonAsmRoutinesBase, public ArmGen::ARMCodeBlock -{ -private: - void Generate(); - void GenerateCommon() {} - -public: - void Init() { - AllocCodeSpace(8192); - Generate(); - WriteProtect(); - } - - void Shutdown() { - FreeCodeSpace(); - } -}; - diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Branch.cpp b/Source/Core/Core/PowerPC/JitArmIL/JitIL_Branch.cpp deleted file mode 100644 index 7661a75dae..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Branch.cpp +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include "Common/Common.h" - -#include "Core/ConfigManager.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCTables.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" - -// FIXME -#define NORMALBRANCH_START FallBackToInterpreter(inst); ibuild.EmitInterpreterBranch(); return; -//#define NORMALBRANCH_START - -void JitArmIL::bx(UGeckoInstruction inst) -{ - //NORMALBRANCH_START - INSTRUCTION_START; - - // We must always process the following sentence - // even if the blocks are merged by PPCAnalyst::Flatten(). - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - - // If this is not the last instruction of a block, - // we will skip the rest process. - // Because PPCAnalyst::Flatten() merged the blocks. - if (!js.isLastInstruction) { - return; - } - - u32 destination; - if (inst.AA) - destination = SignExt26(inst.LI << 2); - else - destination = js.compilerPC + SignExt26(inst.LI << 2); - - if (destination == js.compilerPC) { - ibuild.EmitShortIdleLoop(ibuild.EmitIntConst(js.compilerPC)); - return; - } - - ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination)); -} -static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) { - IREmitter::InstLoc CRTest = 0, CTRTest = 0; - if ((inst.BO & 16) == 0) // Test a CR bit - { - IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); - IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); - CRTest = ibuild.EmitAnd(CRReg, CRCmp); - if (!(inst.BO & 8)) - CRTest = ibuild.EmitXor(CRCmp, CRTest); - } - - if ((inst.BO & 4) == 0) { - IREmitter::InstLoc c = ibuild.EmitLoadCTR(); - c = ibuild.EmitSub(c, ibuild.EmitIntConst(1)); - ibuild.EmitStoreCTR(c); - if (inst.BO & 2) { - CTRTest = ibuild.EmitICmpEq(c, - ibuild.EmitIntConst(0)); - } else { - CTRTest = c; - } - } - - IREmitter::InstLoc Test = CRTest; - if (CTRTest) { - if (Test) - Test = ibuild.EmitAnd(Test, CTRTest); - else - Test = CTRTest; - } - - if (!Test) { - Test = ibuild.EmitIntConst(1); - } - return Test; -} - -void JitArmIL::bclrx(UGeckoInstruction inst) -{ - NORMALBRANCH_START - - if (!js.isLastInstruction && - (inst.BO & (1 << 4)) && (inst.BO & (1 << 2))) { - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - return; - } - - if (inst.hex == 0x4e800020) { - ibuild.EmitBranchUncond(ibuild.EmitLoadLink()); - return; - } - IREmitter::InstLoc test = TestBranch(ibuild, inst); - test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0)); - ibuild.EmitBranchCond(test, ibuild.EmitIntConst(js.compilerPC + 4)); - - IREmitter::InstLoc destination = ibuild.EmitLoadLink(); - destination = ibuild.EmitAnd(destination, ibuild.EmitIntConst(-4)); - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - ibuild.EmitBranchUncond(destination); -} -void JitArmIL::bcx(UGeckoInstruction inst) -{ - NORMALBRANCH_START - if (inst.LK) - ibuild.EmitStoreLink( - ibuild.EmitIntConst(js.compilerPC + 4)); - - IREmitter::InstLoc Test = TestBranch(ibuild, inst); - - u32 destination; - if (inst.AA) - destination = SignExt16(inst.BD << 2); - else - destination = js.compilerPC + SignExt16(inst.BD << 2); - - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && - inst.hex == 0x4182fff8 && - (Memory::ReadUnchecked_U32(js.compilerPC - 8) & 0xFFFF0000) == 0x800D0000 && - (Memory::ReadUnchecked_U32(js.compilerPC - 4) == 0x28000000 || - (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC - 4) == 0x2C000000)) - ) - { - ibuild.EmitIdleBranch(Test, ibuild.EmitIntConst(destination)); - } - else - { - ibuild.EmitBranchCond(Test, ibuild.EmitIntConst(destination)); - } - ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4)); -} - -void JitArmIL::bcctrx(UGeckoInstruction inst) -{ - NORMALBRANCH_START - if ((inst.BO & 4) == 0) { - IREmitter::InstLoc c = ibuild.EmitLoadCTR(); - c = ibuild.EmitSub(c, ibuild.EmitIntConst(1)); - ibuild.EmitStoreCTR(c); - } - IREmitter::InstLoc test; - if ((inst.BO & 16) == 0) // Test a CR bit - { - IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); - IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); - test = ibuild.EmitAnd(CRReg, CRCmp); - if (!(inst.BO & 8)) - test = ibuild.EmitXor(test, CRCmp); - } else { - test = ibuild.EmitIntConst(1); - } - test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0)); - ibuild.EmitBranchCond(test, ibuild.EmitIntConst(js.compilerPC + 4)); - - IREmitter::InstLoc destination = ibuild.EmitLoadCTR(); - destination = ibuild.EmitAnd(destination, ibuild.EmitIntConst(-4)); - if (inst.LK) - ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); - ibuild.EmitBranchUncond(destination); -} - diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.cpp b/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.cpp deleted file mode 100644 index 882c4825c0..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.cpp +++ /dev/null @@ -1,492 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitInterface.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" -#include "Core/PowerPC/JitArmIL/JitIL_Tables.h" - -// Should be moved in to the Jit class -typedef void (JitArmIL::*_Instruction) (UGeckoInstruction instCode); - -static _Instruction dynaOpTable[64]; -static _Instruction dynaOpTable4[1024]; -static _Instruction dynaOpTable19[1024]; -static _Instruction dynaOpTable31[1024]; -static _Instruction dynaOpTable59[32]; -static _Instruction dynaOpTable63[1024]; - -void JitArmIL::DynaRunTable4(UGeckoInstruction _inst) {(this->*dynaOpTable4 [_inst.SUBOP10])(_inst);} -void JitArmIL::DynaRunTable19(UGeckoInstruction _inst) {(this->*dynaOpTable19[_inst.SUBOP10])(_inst);} -void JitArmIL::DynaRunTable31(UGeckoInstruction _inst) {(this->*dynaOpTable31[_inst.SUBOP10])(_inst);} -void JitArmIL::DynaRunTable59(UGeckoInstruction _inst) {(this->*dynaOpTable59[_inst.SUBOP5 ])(_inst);} -void JitArmIL::DynaRunTable63(UGeckoInstruction _inst) {(this->*dynaOpTable63[_inst.SUBOP10])(_inst);} - -struct GekkoOPTemplate -{ - int opcode; - _Instruction Inst; - //GekkoOPInfo opinfo; // Doesn't need opinfo, Interpreter fills it out -}; - -static GekkoOPTemplate primarytable[] = -{ - {4, &JitArmIL::DynaRunTable4}, //"RunTable4", OPTYPE_SUBTABLE | (4<<24), 0}}, - {19, &JitArmIL::DynaRunTable19}, //"RunTable19", OPTYPE_SUBTABLE | (19<<24), 0}}, - {31, &JitArmIL::DynaRunTable31}, //"RunTable31", OPTYPE_SUBTABLE | (31<<24), 0}}, - {59, &JitArmIL::DynaRunTable59}, //"RunTable59", OPTYPE_SUBTABLE | (59<<24), 0}}, - {63, &JitArmIL::DynaRunTable63}, //"RunTable63", OPTYPE_SUBTABLE | (63<<24), 0}}, - - {16, &JitArmIL::bcx}, //"bcx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - {18, &JitArmIL::bx}, //"bx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - - {1, &JitArmIL::HLEFunction}, //"HLEFunction", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - {2, &JitArmIL::FallBackToInterpreter}, //"DynaBlock", OPTYPE_SYSTEM, 0}}, - {3, &JitArmIL::Break}, //"twi", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - {17, &JitArmIL::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, - - {7, &JitArmIL::FallBackToInterpreter}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, - {8, &JitArmIL::FallBackToInterpreter}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, - {10, &JitArmIL::cmpXX}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, - {11, &JitArmIL::cmpXX}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, - {12, &JitArmIL::FallBackToInterpreter}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, - {13, &JitArmIL::FallBackToInterpreter}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, - {14, &JitArmIL::reg_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, - {15, &JitArmIL::reg_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, - - {20, &JitArmIL::FallBackToInterpreter}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}}, - {21, &JitArmIL::FallBackToInterpreter}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {23, &JitArmIL::FallBackToInterpreter}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}}, - - {24, &JitArmIL::reg_imm}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {25, &JitArmIL::reg_imm}, //"oris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {26, &JitArmIL::reg_imm}, //"xori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {27, &JitArmIL::reg_imm}, //"xoris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, - {28, &JitArmIL::reg_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, - {29, &JitArmIL::reg_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, - - {32, &JitArmIL::FallBackToInterpreter}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {33, &JitArmIL::FallBackToInterpreter}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {34, &JitArmIL::FallBackToInterpreter}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {35, &JitArmIL::FallBackToInterpreter}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {40, &JitArmIL::FallBackToInterpreter}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {41, &JitArmIL::FallBackToInterpreter}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {42, &JitArmIL::FallBackToInterpreter}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {43, &JitArmIL::FallBackToInterpreter}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - - {44, &JitArmIL::FallBackToInterpreter}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {45, &JitArmIL::FallBackToInterpreter}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {36, &JitArmIL::FallBackToInterpreter}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {37, &JitArmIL::FallBackToInterpreter}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {38, &JitArmIL::FallBackToInterpreter}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {39, &JitArmIL::FallBackToInterpreter}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - - {46, &JitArmIL::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, - {47, &JitArmIL::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, - - {48, &JitArmIL::FallBackToInterpreter}, //"lfs", OPTYPE_LOADFP, FL_IN_A}}, - {49, &JitArmIL::FallBackToInterpreter}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, - {50, &JitArmIL::FallBackToInterpreter}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, - {51, &JitArmIL::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, - - {52, &JitArmIL::FallBackToInterpreter}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, - {53, &JitArmIL::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, - {54, &JitArmIL::FallBackToInterpreter}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, - {55, &JitArmIL::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, - - {56, &JitArmIL::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}}, - {57, &JitArmIL::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, - {60, &JitArmIL::FallBackToInterpreter}, //"psq_st", OPTYPE_PS, FL_IN_A}}, - {61, &JitArmIL::FallBackToInterpreter}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, - - //missing: 0, 5, 6, 9, 22, 30, 62, 58 - {0, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {5, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {6, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {9, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {22, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {30, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {62, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, - {58, &JitArmIL::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, -}; - -static GekkoOPTemplate table4[] = -{ //SUBOP10 - {0, &JitArmIL::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, - {32, &JitArmIL::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, - {40, &JitArmIL::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, - {136, &JitArmIL::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, - {264, &JitArmIL::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, - {64, &JitArmIL::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, - {72, &JitArmIL::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, - {96, &JitArmIL::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, - {528, &JitArmIL::FallBackToInterpreter}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, - {560, &JitArmIL::FallBackToInterpreter}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, - {592, &JitArmIL::FallBackToInterpreter}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, - {624, &JitArmIL::FallBackToInterpreter}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}}, - - {1014, &JitArmIL::FallBackToInterpreter}, //"dcbz_l", OPTYPE_SYSTEM, 0}}, -}; - -static GekkoOPTemplate table4_2[] = -{ - {10, &JitArmIL::FallBackToInterpreter}, //"ps_sum0", OPTYPE_PS, 0}}, - {11, &JitArmIL::FallBackToInterpreter}, //"ps_sum1", OPTYPE_PS, 0}}, - {12, &JitArmIL::FallBackToInterpreter}, //"ps_muls0", OPTYPE_PS, 0}}, - {13, &JitArmIL::FallBackToInterpreter}, //"ps_muls1", OPTYPE_PS, 0}}, - {14, &JitArmIL::FallBackToInterpreter}, //"ps_madds0", OPTYPE_PS, 0}}, - {15, &JitArmIL::FallBackToInterpreter}, //"ps_madds1", OPTYPE_PS, 0}}, - {18, &JitArmIL::FallBackToInterpreter}, //"ps_div", OPTYPE_PS, 0, 16}}, - {20, &JitArmIL::FallBackToInterpreter}, //"ps_sub", OPTYPE_PS, 0}}, - {21, &JitArmIL::FallBackToInterpreter}, //"ps_add", OPTYPE_PS, 0}}, - {23, &JitArmIL::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}}, - {24, &JitArmIL::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}}, - {25, &JitArmIL::FallBackToInterpreter}, //"ps_mul", OPTYPE_PS, 0}}, - {26, &JitArmIL::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, - {28, &JitArmIL::FallBackToInterpreter}, //"ps_msub", OPTYPE_PS, 0}}, - {29, &JitArmIL::FallBackToInterpreter}, //"ps_madd", OPTYPE_PS, 0}}, - {30, &JitArmIL::FallBackToInterpreter}, //"ps_nmsub", OPTYPE_PS, 0}}, - {31, &JitArmIL::FallBackToInterpreter}, //"ps_nmadd", OPTYPE_PS, 0}}, -}; - - -static GekkoOPTemplate table4_3[] = -{ - {6, &JitArmIL::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}}, - {7, &JitArmIL::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}}, - {38, &JitArmIL::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}}, - {39, &JitArmIL::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}}, -}; - -static GekkoOPTemplate table19[] = -{ - {528, &JitArmIL::bcctrx}, //"bcctrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, - {16, &JitArmIL::bclrx}, //"bclrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, - {257, &JitArmIL::crXX}, //"crand", OPTYPE_CR, FL_EVIL}}, - {129, &JitArmIL::crXX}, //"crandc", OPTYPE_CR, FL_EVIL}}, - {289, &JitArmIL::crXX}, //"creqv", OPTYPE_CR, FL_EVIL}}, - {225, &JitArmIL::crXX}, //"crnand", OPTYPE_CR, FL_EVIL}}, - {33, &JitArmIL::crXX}, //"crnor", OPTYPE_CR, FL_EVIL}}, - {449, &JitArmIL::crXX}, //"cror", OPTYPE_CR, FL_EVIL}}, - {417, &JitArmIL::crXX}, //"crorc", OPTYPE_CR, FL_EVIL}}, - {193, &JitArmIL::crXX}, //"crxor", OPTYPE_CR, FL_EVIL}}, - - {150, &JitArmIL::FallBackToInterpreter}, //"isync", OPTYPE_ICACHE, FL_EVIL}}, - {0, &JitArmIL::FallBackToInterpreter}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, - - {50, &JitArmIL::rfi}, //"rfi", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 1}}, - {18, &JitArmIL::Break}, //"rfid", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS}} -}; - - -static GekkoOPTemplate table31[] = -{ - {28, &JitArmIL::boolX}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {60, &JitArmIL::boolX}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {444, &JitArmIL::boolX}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {124, &JitArmIL::boolX}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {316, &JitArmIL::boolX}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {412, &JitArmIL::boolX}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {476, &JitArmIL::boolX}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {284, &JitArmIL::boolX}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, - {0, &JitArmIL::cmpXX}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, - {32, &JitArmIL::cmpXX}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, - {26, &JitArmIL::FallBackToInterpreter}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {922, &JitArmIL::FallBackToInterpreter}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {954, &JitArmIL::FallBackToInterpreter}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, - {536, &JitArmIL::FallBackToInterpreter}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - {792, &JitArmIL::FallBackToInterpreter}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - {824, &JitArmIL::FallBackToInterpreter}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - {24, &JitArmIL::FallBackToInterpreter}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - - {54, &JitArmIL::FallBackToInterpreter}, //"dcbst", OPTYPE_DCACHE, 0, 4}}, - {86, &JitArmIL::FallBackToInterpreter}, //"dcbf", OPTYPE_DCACHE, 0, 4}}, - {246, &JitArmIL::FallBackToInterpreter}, //"dcbtst", OPTYPE_DCACHE, 0, 1}}, - {278, &JitArmIL::FallBackToInterpreter}, //"dcbt", OPTYPE_DCACHE, 0, 1}}, - {470, &JitArmIL::FallBackToInterpreter}, //"dcbi", OPTYPE_DCACHE, 0, 4}}, - {758, &JitArmIL::FallBackToInterpreter}, //"dcba", OPTYPE_DCACHE, 0, 4}}, - {1014, &JitArmIL::FallBackToInterpreter}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, - - //load word - {23, &JitArmIL::FallBackToInterpreter}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {55, &JitArmIL::FallBackToInterpreter}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //load halfword - {279, &JitArmIL::FallBackToInterpreter}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {311, &JitArmIL::FallBackToInterpreter}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //load halfword signextend - {343, &JitArmIL::FallBackToInterpreter}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {375, &JitArmIL::FallBackToInterpreter}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //load byte - {87, &JitArmIL::FallBackToInterpreter}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {119, &JitArmIL::FallBackToInterpreter}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //load byte reverse - {534, &JitArmIL::FallBackToInterpreter}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {790, &JitArmIL::FallBackToInterpreter}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - - // Conditional load/store (Wii SMP) - {150, &JitArmIL::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}}, - {20, &JitArmIL::FallBackToInterpreter}, //"lwarx", OPTYPE_LOAD, FL_EVIL | FL_OUT_D | FL_IN_A0B | FL_SET_CR0}}, - - //load string (interpret these) - {533, &JitArmIL::FallBackToInterpreter}, //"lswx", OPTYPE_LOAD, FL_EVIL | FL_IN_A | FL_OUT_D}}, - {597, &JitArmIL::FallBackToInterpreter}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}}, - - //store word - {151, &JitArmIL::FallBackToInterpreter}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {183, &JitArmIL::FallBackToInterpreter}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //store halfword - {407, &JitArmIL::FallBackToInterpreter}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {439, &JitArmIL::FallBackToInterpreter}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //store byte - {215, &JitArmIL::FallBackToInterpreter}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {247, &JitArmIL::FallBackToInterpreter}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, - - //store bytereverse - {662, &JitArmIL::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {918, &JitArmIL::FallBackToInterpreter}, //"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B}}, - - {661, &JitArmIL::FallBackToInterpreter}, //"stswx", OPTYPE_STORE, FL_EVIL}}, - {725, &JitArmIL::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}}, - - // fp load/store - {535, &JitArmIL::FallBackToInterpreter}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {567, &JitArmIL::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - {599, &JitArmIL::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {631, &JitArmIL::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - - {663, &JitArmIL::FallBackToInterpreter}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {695, &JitArmIL::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, - {727, &JitArmIL::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {759, &JitArmIL::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, - {983, &JitArmIL::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - - {19, &JitArmIL::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, - {83, &JitArmIL::FallBackToInterpreter}, //"mfmsr", OPTYPE_SYSTEM, FL_OUT_D}}, - {144, &JitArmIL::FallBackToInterpreter}, //"mtcrf", OPTYPE_SYSTEM, 0}}, - {146, &JitArmIL::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}}, - {210, &JitArmIL::FallBackToInterpreter}, //"mtsr", OPTYPE_SYSTEM, 0}}, - {242, &JitArmIL::FallBackToInterpreter}, //"mtsrin", OPTYPE_SYSTEM, 0}}, - {339, &JitArmIL::FallBackToInterpreter}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, - {467, &JitArmIL::FallBackToInterpreter}, //"mtspr", OPTYPE_SPR, 0, 2}}, - {371, &JitArmIL::FallBackToInterpreter}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, - {512, &JitArmIL::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}}, - {595, &JitArmIL::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, - {659, &JitArmIL::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, - - {4, &JitArmIL::Break}, //"tw", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, - {598, &JitArmIL::FallBackToInterpreter}, //"sync", OPTYPE_SYSTEM, 0, 2}}, - {982, &JitArmIL::icbi}, //"icbi", OPTYPE_SYSTEM, FL_ENDBLOCK, 3}}, - - // Unused instructions on GC - {310, &JitArmIL::FallBackToInterpreter}, //"eciwx", OPTYPE_INTEGER, FL_RC_BIT}}, - {438, &JitArmIL::FallBackToInterpreter}, //"ecowx", OPTYPE_INTEGER, FL_RC_BIT}}, - {854, &JitArmIL::FallBackToInterpreter}, //"eieio", OPTYPE_INTEGER, FL_RC_BIT}}, - {306, &JitArmIL::FallBackToInterpreter}, //"tlbie", OPTYPE_SYSTEM, 0}}, - {370, &JitArmIL::FallBackToInterpreter}, //"tlbia", OPTYPE_SYSTEM, 0}}, - {566, &JitArmIL::FallBackToInterpreter}, //"tlbsync", OPTYPE_SYSTEM, 0}}, -}; - -static GekkoOPTemplate table31_2[] = -{ - {266, &JitArmIL::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {778, &JitArmIL::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {10, &JitArmIL::FallBackToInterpreter}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {522, &JitArmIL::FallBackToInterpreter}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {138, &JitArmIL::FallBackToInterpreter}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {650, &JitArmIL::FallBackToInterpreter}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {234, &JitArmIL::FallBackToInterpreter}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {202, &JitArmIL::FallBackToInterpreter}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {491, &JitArmIL::FallBackToInterpreter}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, - {1003, &JitArmIL::FallBackToInterpreter}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, - {459, &JitArmIL::FallBackToInterpreter}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, - {971, &JitArmIL::FallBackToInterpreter}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, - {75, &JitArmIL::FallBackToInterpreter}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {11, &JitArmIL::FallBackToInterpreter}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {235, &JitArmIL::FallBackToInterpreter}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {747, &JitArmIL::FallBackToInterpreter}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {104, &JitArmIL::FallBackToInterpreter}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {40, &JitArmIL::FallBackToInterpreter}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {552, &JitArmIL::FallBackToInterpreter}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {8, &JitArmIL::FallBackToInterpreter}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {520, &JitArmIL::FallBackToInterpreter}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {136, &JitArmIL::FallBackToInterpreter}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {232, &JitArmIL::FallBackToInterpreter}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {200, &JitArmIL::FallBackToInterpreter}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, -}; - -static GekkoOPTemplate table59[] = -{ - {18, &JitArmIL::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, - {20, &JitArmIL::FallBackToInterpreter}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {21, &JitArmIL::FallBackToInterpreter}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, -// {22, &JitArmIL::FallBackToInterpreter}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {24, &JitArmIL::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitArmIL::FallBackToInterpreter}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {28, &JitArmIL::FallBackToInterpreter}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {29, &JitArmIL::FallBackToInterpreter}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {30, &JitArmIL::FallBackToInterpreter}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitArmIL::FallBackToInterpreter}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, -}; - -static GekkoOPTemplate table63[] = -{ - {264, &JitArmIL::FallBackToInterpreter}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {32, &JitArmIL::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, - {0, &JitArmIL::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, - {14, &JitArmIL::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, - {15, &JitArmIL::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, - {72, &JitArmIL::FallBackToInterpreter}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, - {136, &JitArmIL::FallBackToInterpreter}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {40, &JitArmIL::FallBackToInterpreter}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, - {12, &JitArmIL::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}}, - - {64, &JitArmIL::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}}, - {583, &JitArmIL::FallBackToInterpreter}, //"mffsx", OPTYPE_SYSTEMFP, 0}}, - {70, &JitArmIL::FallBackToInterpreter}, //"mtfsb0x", OPTYPE_SYSTEMFP, 0, 2}}, - {38, &JitArmIL::FallBackToInterpreter}, //"mtfsb1x", OPTYPE_SYSTEMFP, 0, 2}}, - {134, &JitArmIL::FallBackToInterpreter}, //"mtfsfix", OPTYPE_SYSTEMFP, 0, 2}}, - {711, &JitArmIL::FallBackToInterpreter}, //"mtfsfx", OPTYPE_SYSTEMFP, 0, 2}}, -}; - -static GekkoOPTemplate table63_2[] = -{ - {18, &JitArmIL::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}}, - {20, &JitArmIL::FallBackToInterpreter}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {21, &JitArmIL::FallBackToInterpreter}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, - {22, &JitArmIL::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, - {23, &JitArmIL::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, - {25, &JitArmIL::FallBackToInterpreter}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, - {26, &JitArmIL::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, - {28, &JitArmIL::FallBackToInterpreter}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {29, &JitArmIL::FallBackToInterpreter}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, - {30, &JitArmIL::FallBackToInterpreter}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitArmIL::FallBackToInterpreter}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, -}; - - -namespace JitArmILTables -{ - -void CompileInstruction(PPCAnalyst::CodeOp & op) -{ - JitArmIL *jitarm = (JitArmIL *)jit; - (jitarm->*dynaOpTable[op.inst.OPCD])(op.inst); - GekkoOPInfo *info = op.opinfo; - if (info) { -#ifdef OPLOG - if (!strcmp(info->opname, OP_TO_LOG)){ ///"mcrfs" - rsplocations.push_back(jit.js.compilerPC); - } -#endif - info->compileCount++; - info->lastUse = jit->js.compilerPC; - } -} - -void InitTables() -{ - // once initialized, tables are read-only - static bool initialized = false; - if (initialized) - return; - - //clear - for (int i = 0; i < 32; i++) - { - dynaOpTable59[i] = &JitArmIL::unknown_instruction; - } - - for (int i = 0; i < 1024; i++) - { - dynaOpTable4 [i] = &JitArmIL::unknown_instruction; - dynaOpTable19[i] = &JitArmIL::unknown_instruction; - dynaOpTable31[i] = &JitArmIL::unknown_instruction; - dynaOpTable63[i] = &JitArmIL::unknown_instruction; - } - - for (int i = 0; i < (int)(sizeof(primarytable) / sizeof(GekkoOPTemplate)); i++) - { - dynaOpTable[primarytable[i].opcode] = primarytable[i].Inst; - } - - for (int i = 0; i < 32; i++) - { - int fill = i << 5; - for (int j = 0; j < (int)(sizeof(table4_2) / sizeof(GekkoOPTemplate)); j++) - { - int op = fill+table4_2[j].opcode; - dynaOpTable4[op] = table4_2[j].Inst; - } - } - - for (int i = 0; i < 16; i++) - { - int fill = i << 6; - for (int j = 0; j < (int)(sizeof(table4_3) / sizeof(GekkoOPTemplate)); j++) - { - int op = fill+table4_3[j].opcode; - dynaOpTable4[op] = table4_3[j].Inst; - } - } - - for (int i = 0; i < (int)(sizeof(table4) / sizeof(GekkoOPTemplate)); i++) - { - int op = table4[i].opcode; - dynaOpTable4[op] = table4[i].Inst; - } - - for (int i = 0; i < (int)(sizeof(table31) / sizeof(GekkoOPTemplate)); i++) - { - int op = table31[i].opcode; - dynaOpTable31[op] = table31[i].Inst; - } - - for (int i = 0; i < 1; i++) - { - int fill = i << 9; - for (int j = 0; j < (int)(sizeof(table31_2) / sizeof(GekkoOPTemplate)); j++) - { - int op = fill + table31_2[j].opcode; - dynaOpTable31[op] = table31_2[j].Inst; - } - } - - for (int i = 0; i < (int)(sizeof(table19) / sizeof(GekkoOPTemplate)); i++) - { - int op = table19[i].opcode; - dynaOpTable19[op] = table19[i].Inst; - } - - for (int i = 0; i < (int)(sizeof(table59) / sizeof(GekkoOPTemplate)); i++) - { - int op = table59[i].opcode; - dynaOpTable59[op] = table59[i].Inst; - } - - for (int i = 0; i < (int)(sizeof(table63) / sizeof(GekkoOPTemplate)); i++) - { - int op = table63[i].opcode; - dynaOpTable63[op] = table63[i].Inst; - } - - for (int i = 0; i < 32; i++) - { - int fill = i << 5; - for (int j = 0; j < (int)(sizeof(table63_2) / sizeof(GekkoOPTemplate)); j++) - { - int op = fill + table63_2[j].opcode; - dynaOpTable63[op] = table63_2[j].Inst; - } - } - - initialized = true; - -} - -} // namespace diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.h b/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.h deleted file mode 100644 index 7d92e6cf0b..0000000000 --- a/Source/Core/Core/PowerPC/JitArmIL/JitIL_Tables.h +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#pragma once - -#include "Core/PowerPC/Gekko.h" -#include "Core/PowerPC/PPCTables.h" - -namespace JitArmILTables -{ - void CompileInstruction(PPCAnalyst::CodeOp & op); - void InitTables(); -} diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index 8dc6eaefe2..e7c1d1ed2a 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -27,8 +27,6 @@ #if _M_ARM_32 #include "Core/PowerPC/JitArm32/Jit.h" #include "Core/PowerPC/JitArm32/JitArm_Tables.h" -#include "Core/PowerPC/JitArmIL/JitIL.h" -#include "Core/PowerPC/JitArmIL/JitIL_Tables.h" #endif static bool bFakeVMEM = false; @@ -67,11 +65,6 @@ namespace JitInterface ptr = new JitArm(); break; } - case 4: - { - ptr = new JitArmIL(); - break; - } #endif default: { @@ -106,11 +99,6 @@ namespace JitInterface JitArmTables::InitTables(); break; } - case 4: - { - JitArmILTables::InitTables(); - break; - } #endif default: { From fda2190a3787e4571ed77a9e2325c5a4e7fe3ff9 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 17 Jul 2014 03:40:02 +0000 Subject: [PATCH 11/11] Support the 64bit CR flags in the ARM JIT. --- Source/Core/Core/PowerPC/JitArm32/Jit.cpp | 18 +- Source/Core/Core/PowerPC/JitArm32/Jit.h | 17 +- .../Core/PowerPC/JitArm32/JitArm_Branch.cpp | 58 +--- .../PowerPC/JitArm32/JitArm_FloatingPoint.cpp | 133 --------- .../Core/PowerPC/JitArm32/JitArm_Integer.cpp | 197 ++++--------- .../Core/PowerPC/JitArm32/JitArm_Paired.cpp | 259 ------------------ .../JitArm32/JitArm_SystemRegisters.cpp | 198 +++---------- .../Core/PowerPC/JitArm32/JitArm_Tables.cpp | 38 +-- 8 files changed, 137 insertions(+), 781 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp index afe4d490cc..97ef0644ec 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp @@ -233,31 +233,25 @@ void JitArm::SingleStep() void JitArm::Trace() { - char regs[500] = ""; - char fregs[750] = ""; + std::string regs; + std::string fregs; #ifdef JIT_LOG_GPR for (int i = 0; i < 32; i++) { - char reg[50]; - sprintf(reg, "r%02d: %08x ", i, PowerPC::ppcState.gpr[i]); - strncat(regs, reg, sizeof(regs) - 1); + regs += StringFromFormat("r%02d: %08x ", i, PowerPC::ppcState.gpr[i]); } #endif #ifdef JIT_LOG_FPR for (int i = 0; i < 32; i++) { - char reg[50]; - sprintf(reg, "f%02d: %016x ", i, riPS0(i)); - strncat(fregs, reg, sizeof(fregs) - 1); + fregs += StringFromFormat("f%02d: %016x ", i, riPS0(i)); } #endif - DEBUG_LOG(DYNA_REC, "JITARM PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s", - PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], - PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, - PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs, fregs); + DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x FPSCR: %08x MSR: %08x LR: %08x %s %s", + PC, SRR0, SRR1, PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); } void JitArm::PrintDebug(UGeckoInstruction inst, u32 level) diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.h b/Source/Core/Core/PowerPC/JitArm32/Jit.h index 30681d63e5..c4cfcaa115 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.h @@ -50,6 +50,8 @@ private: void Helper_UpdateCR1(ARMReg fpscr, ARMReg temp); void SetFPException(ARMReg Reg, u32 Exception); + + FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); public: JitArm() : code_buffer(32000) {} ~JitArm() {} @@ -96,8 +98,7 @@ public: void WriteCallInterpreter(UGeckoInstruction _inst); void Cleanup(); - void GenerateRC(int cr = 0); - void ComputeRC(int cr = 0); + void ComputeRC(ARMReg value, int cr = 0); void ComputeRC(s32 value, int cr); void ComputeCarry(); @@ -143,8 +144,6 @@ public: void cntlzwx(UGeckoInstruction _inst); void cmp (UGeckoInstruction _inst); void cmpi(UGeckoInstruction _inst); - void cmpl(UGeckoInstruction _inst); - void cmpli(UGeckoInstruction _inst); void negx(UGeckoInstruction _inst); void mulhwux(UGeckoInstruction _inst); void rlwimix(UGeckoInstruction _inst); @@ -160,13 +159,9 @@ public: void mtspr(UGeckoInstruction _inst); void mfspr(UGeckoInstruction _inst); void mftb(UGeckoInstruction _inst); - void crXXX(UGeckoInstruction _inst); void mcrf(UGeckoInstruction _inst); - void mfcr(UGeckoInstruction _inst); - void mtcrf(UGeckoInstruction _inst); void mtsr(UGeckoInstruction _inst); void mfsr(UGeckoInstruction _inst); - void mcrxr(UGeckoInstruction _inst); void twx(UGeckoInstruction _inst); // LoadStore @@ -193,8 +188,6 @@ public: void fmaddx(UGeckoInstruction _inst); void fctiwx(UGeckoInstruction _inst); void fctiwzx(UGeckoInstruction _inst); - void fcmpo(UGeckoInstruction _inst); - void fcmpu(UGeckoInstruction _inst); void fnmaddx(UGeckoInstruction _inst); void fnmaddsx(UGeckoInstruction _inst); void fresx(UGeckoInstruction _inst); @@ -232,10 +225,6 @@ public: void ps_nabs(UGeckoInstruction _inst); void ps_rsqrte(UGeckoInstruction _inst); void ps_sel(UGeckoInstruction _inst); - void ps_cmpu0(UGeckoInstruction _inst); - void ps_cmpu1(UGeckoInstruction _inst); - void ps_cmpo0(UGeckoInstruction _inst); - void ps_cmpo1(UGeckoInstruction _inst); // LoadStore paired void psq_l(UGeckoInstruction _inst); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp index de76f030f6..09d4b33acb 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp @@ -16,15 +16,6 @@ // The branches are known good, or at least reasonably good. // No need for a disable-mechanism. -// If defined, clears CR0 at blr and bl-s. If the assumption that -// flags never carry over between functions holds, then the task for -// an optimizer becomes much easier. - -// #define ACID_TEST - -// Zelda and many more games seem to pass the Acid Test. - - using namespace ArmGen; void JitArm::sc(UGeckoInstruction inst) { @@ -121,13 +112,7 @@ void JitArm::bx(UGeckoInstruction inst) destination = SignExt26(inst.LI << 2); else destination = js.compilerPC + SignExt26(inst.LI << 2); -#ifdef ACID_TEST - if (inst.LK) - { - MOV(R14, 0); - STRB(R14, R9, PPCSTATE_OFF(cr_fast[0])); - } -#endif + if (destination == js.compilerPC) { //PanicAlert("Idle loop detected at %08x", destination); @@ -168,15 +153,10 @@ void JitArm::bcx(UGeckoInstruction inst) FixupBranch pConditionDontBranch; if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit { - LDRB(rA, R9, PPCSTATE_OFF(cr_fast) + (inst.BI >> 2)); - TST(rA, 8 >> (inst.BI & 3)); - - //TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = B_CC(CC_EQ); // Zero - else - pConditionDontBranch = B_CC(CC_NEQ); // Not Zero + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } + if (inst.LK) { u32 Jumpto = js.compilerPC + 4; @@ -240,20 +220,13 @@ void JitArm::bcctrx(UGeckoInstruction inst) else { // Rare condition seen in (just some versions of?) Nintendo's NES Emulator - // BO_2 == 001zy -> b if false // BO_2 == 011zy -> b if true ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); - LDRB(rA, R9, PPCSTATE_OFF(cr_fast) + (inst.BI >> 2)); - TST(rA, 8 >> (inst.BI & 3)); - CCFlags branch; - if (inst.BO_2 & BO_BRANCH_IF_TRUE) - branch = CC_EQ; - else - branch = CC_NEQ; - FixupBranch b = B_CC(branch); + FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); LDR(rA, R9, PPCSTATE_OFF(spr[SPR_CTR])); BIC(rA, rA, 0x3); @@ -304,25 +277,10 @@ void JitArm::bclrx(UGeckoInstruction inst) FixupBranch pConditionDontBranch; if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit { - LDRB(rA, R9, PPCSTATE_OFF(cr_fast) + (inst.BI >> 2)); - TST(rA, 8 >> (inst.BI & 3)); - //TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = B_CC(CC_EQ); // Zero - else - pConditionDontBranch = B_CC(CC_NEQ); // Not Zero + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } - // This below line can be used to prove that blr "eats flags" in practice. - // This observation will let us do a lot of fun observations. - #ifdef ACID_TEST - if (inst.LK) - { - MOV(R14, 0); - STRB(R14, R9, PPCSTATE_OFF(cr_fast[0])); - } - #endif - //MOV(32, R(EAX), M(&LR)); //AND(32, R(EAX), Imm32(0xFFFFFFFC)); LDR(rA, R9, PPCSTATE_OFF(spr[SPR_LR])); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp index 6bd36cff2c..79a0b79885 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp @@ -19,8 +19,6 @@ void JitArm::Helper_UpdateCR1(ARMReg fpscr, ARMReg temp) { - UBFX(temp, fpscr, 28, 4); - STRB(temp, R9, PPCSTATE_OFF(cr_fast[1])); } void JitArm::fctiwx(UGeckoInstruction inst) @@ -129,7 +127,6 @@ void JitArm::fctiwx(UGeckoInstruction inst) fpr.Unlock(V2); } - void JitArm::fctiwzx(UGeckoInstruction inst) { INSTRUCTION_START @@ -210,136 +207,6 @@ void JitArm::fctiwzx(UGeckoInstruction inst) fpr.Unlock(V2); } -void JitArm::fcmpo(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R0(a); - ARMReg vB = fpr.R0(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - - SetFPException(fpscrReg, FPSCR_VXVC); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - TST(fpscrReg, VEMask); - - FixupBranch noVXVC = B_CC(CC_NEQ); - SetFPException(fpscrReg, FPSCR_VXVC); - - SetJumpTarget(noVXVC); - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} - -void JitArm::fcmpu(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R0(a); - ARMReg vB = fpr.R0(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} - void JitArm::fabsx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp index 5f47aefb34..6f7e6854dc 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp @@ -14,42 +14,27 @@ #include "Core/PowerPC/JitArm32/JitAsm.h" #include "Core/PowerPC/JitArm32/JitRegCache.h" -// Assumes that Sign and Zero flags were set by the last operation. Preserves all flags and registers. -// Jit64 ComputerRC is signed -// JIT64 GenerateRC is unsigned -void JitArm::GenerateRC(int cr) { +void JitArm::ComputeRC(ARMReg value, int cr) { ARMReg rB = gpr.GetReg(); - MOV(rB, 0x4); // Result > 0 - SetCC(CC_EQ); MOV(rB, 0x2); // Result == 0 - SetCC(CC_MI); MOV(rB, 0x8); // Result < 0 - SetCC(); + Operand2 ASRReg(value, ST_ASR, 31); - STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr); - gpr.Unlock(rB); -} -void JitArm::ComputeRC(int cr) { - ARMReg rB = gpr.GetReg(); + STR(value, R9, PPCSTATE_OFF(cr_val[cr])); + MOV(rB, ASRReg); + STR(rB, R9, PPCSTATE_OFF(cr_val[cr]) + sizeof(u32)); - MOV(rB, 0x2); // Result == 0 - SetCC(CC_LT); MOV(rB, 0x8); // Result < 0 - SetCC(CC_GT); MOV(rB, 0x4); // Result > 0 - SetCC(); - - STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr); gpr.Unlock(rB); } void JitArm::ComputeRC(s32 value, int cr) { ARMReg rB = gpr.GetReg(); - if (value < 0) - MOV(rB, 0x8); - else if (value > 0) - MOV(rB, 0x4); - else - MOV(rB, 0x2); + Operand2 ASRReg(rB, ST_ASR, 31); + + MOVI2R(rB, value); + STR(rB, R9, PPCSTATE_OFF(cr_val[cr])); + MOV(rB, ASRReg); + STR(rB, R9, PPCSTATE_OFF(cr_val[cr]) + sizeof(u32)); - STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr); gpr.Unlock(rB); } @@ -195,7 +180,6 @@ void JitArm::arith(UGeckoInstruction inst) u32 Imm[2] = {0, 0}; bool Rc = false; bool carry = false; - bool isUnsigned = false; bool shiftedImm = false; switch (inst.OPCD) @@ -306,7 +290,6 @@ void JitArm::arith(UGeckoInstruction inst) case 522: // addcox carry = true; case 40: // subfx - isUnsigned = true; case 235: // mullwx case 266: case 747: // mullwox @@ -431,6 +414,8 @@ void JitArm::arith(UGeckoInstruction inst) if (Rc) ComputeRC(gpr.GetImm(dest), 0); return; } + + u32 dest = d; // One or the other isn't a IMM switch (inst.OPCD) { @@ -472,6 +457,7 @@ void JitArm::arith(UGeckoInstruction inst) case 24: case 25: { + dest = a; ARMReg rA = gpr.GetReg(); RS = gpr.R(s); RA = gpr.R(a); @@ -483,6 +469,7 @@ void JitArm::arith(UGeckoInstruction inst) case 26: case 27: { + dest = a; ARMReg rA = gpr.GetReg(); RS = gpr.R(s); RA = gpr.R(a); @@ -495,6 +482,7 @@ void JitArm::arith(UGeckoInstruction inst) case 28: case 29: { + dest = a; ARMReg rA = gpr.GetReg(); RS = gpr.R(s); RA = gpr.R(a); @@ -507,12 +495,14 @@ void JitArm::arith(UGeckoInstruction inst) switch (inst.SUBOP10) { case 24: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); LSLS(RA, RS, RB); break; case 28: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -525,12 +515,14 @@ void JitArm::arith(UGeckoInstruction inst) SUBS(RD, RB, RA); break; case 60: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); BICS(RA, RS, RB); break; case 124: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -545,6 +537,7 @@ void JitArm::arith(UGeckoInstruction inst) MULS(RD, RA, RB); break; case 284: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -552,6 +545,7 @@ void JitArm::arith(UGeckoInstruction inst) MVNS(RA, RA); break; case 316: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -559,6 +553,7 @@ void JitArm::arith(UGeckoInstruction inst) break; case 412: { + dest = a; ARMReg rA = gpr.GetReg(); RA = gpr.R(a); RS = gpr.R(s); @@ -569,12 +564,14 @@ void JitArm::arith(UGeckoInstruction inst) } break; case 444: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); ORRS(RA, RS, RB); break; case 476: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -582,12 +579,14 @@ void JitArm::arith(UGeckoInstruction inst) MVNS(RA, RA); break; case 536: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); LSRS(RA, RS, RB); break; case 792: + dest = a; RA = gpr.R(a); RS = gpr.R(s); RB = gpr.R(b); @@ -605,7 +604,7 @@ void JitArm::arith(UGeckoInstruction inst) break; } if (carry) ComputeCarry(); - if (Rc) isUnsigned ? GenerateRC() : ComputeRC(); + if (Rc) ComputeRC(gpr.R(dest)); } void JitArm::addex(UGeckoInstruction inst) @@ -624,7 +623,7 @@ void JitArm::addex(UGeckoInstruction inst) GetCarryAndClear(rA); ADDS(RD, RA, RB); FinalizeCarry(rA); - if (inst.Rc) ComputeRC(); + if (inst.Rc) ComputeRC(RD); gpr.Unlock(rA); } @@ -638,10 +637,7 @@ void JitArm::cntlzwx(UGeckoInstruction inst) ARMReg RS = gpr.R(s); CLZ(RA, RS); if (inst.Rc) - { - CMP(RA, 0); - ComputeRC(); - } + ComputeRC(RA); } void JitArm::mulhwux(UGeckoInstruction inst) @@ -655,8 +651,8 @@ void JitArm::mulhwux(UGeckoInstruction inst) ARMReg RB = gpr.R(b); ARMReg RD = gpr.R(d); ARMReg rA = gpr.GetReg(false); - UMULLS(rA, RD, RA, RB); - if (inst.Rc) ComputeRC(); + UMULL(rA, RD, RA, RB); + if (inst.Rc) ComputeRC(RD); } void JitArm::extshx(UGeckoInstruction inst) @@ -674,10 +670,8 @@ void JitArm::extshx(UGeckoInstruction inst) ARMReg rA = gpr.R(a); ARMReg rS = gpr.R(s); SXTH(rA, rS); - if (inst.Rc){ - CMP(rA, 0); - ComputeRC(); - } + if (inst.Rc) + ComputeRC(rA); } void JitArm::extsbx(UGeckoInstruction inst) { @@ -694,10 +688,8 @@ void JitArm::extsbx(UGeckoInstruction inst) ARMReg rA = gpr.R(a); ARMReg rS = gpr.R(s); SXTB(rA, rS); - if (inst.Rc){ - CMP(rA, 0); - ComputeRC(); - } + if (inst.Rc) + ComputeRC(rA); } void JitArm::cmp (UGeckoInstruction inst) { @@ -713,11 +705,7 @@ void JitArm::cmp (UGeckoInstruction inst) return; } - ARMReg RA = gpr.R(a); - ARMReg RB = gpr.R(b); - CMP(RA, RB); - - ComputeRC(crf); + FALLBACK_IF(true); } void JitArm::cmpi(UGeckoInstruction inst) { @@ -726,71 +714,12 @@ void JitArm::cmpi(UGeckoInstruction inst) u32 a = inst.RA; int crf = inst.CRFD; if (gpr.IsImm(a)) + { ComputeRC((s32)gpr.GetImm(a) - inst.SIMM_16, crf); - else - { - ARMReg RA = gpr.R(a); - if (inst.SIMM_16 >= 0 && inst.SIMM_16 < 256) - CMP(RA, inst.SIMM_16); - else - { - ARMReg rA = gpr.GetReg(); - MOVI2R(rA, inst.SIMM_16); - CMP(RA, rA); - gpr.Unlock(rA); - } - ComputeRC(crf); + return; } -} -void JitArm::cmpl(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - ARMReg RA = gpr.R(inst.RA); - ARMReg RB = gpr.R(inst.RB); - ARMReg rA = gpr.GetReg(); - int crf = inst.CRFD; - - CMP(RA, RB); - // Unsigned GenerateRC() - - MOV(rA, 0x2); // Result == 0 - SetCC(CC_LO); MOV(rA, 0x8); // Result < 0 - SetCC(CC_HI); MOV(rA, 0x4); // Result > 0 - SetCC(); - - STRB(rA, R9, PPCSTATE_OFF(cr_fast) + crf); - gpr.Unlock(rA); -} - -void JitArm::cmpli(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - ARMReg RA = gpr.R(inst.RA); - ARMReg rA = gpr.GetReg(); - int crf = inst.CRFD; - u32 uimm = (u32)inst.UIMM; - if (uimm < 256) - { - CMP(RA, uimm); - } - else - { - MOVI2R(rA, (u32)inst.UIMM); - CMP(RA, rA); - } - // Unsigned GenerateRC() - - MOV(rA, 0x2); // Result == 0 - SetCC(CC_LO); MOV(rA, 0x8); // Result < 0 - SetCC(CC_HI); MOV(rA, 0x4); // Result > 0 - SetCC(); - - STRB(rA, R9, PPCSTATE_OFF(cr_fast) + crf); - gpr.Unlock(rA); + FALLBACK_IF(true); } void JitArm::negx(UGeckoInstruction inst) @@ -801,11 +730,10 @@ void JitArm::negx(UGeckoInstruction inst) ARMReg RA = gpr.R(inst.RA); ARMReg RD = gpr.R(inst.RD); - RSBS(RD, RA, 0); + RSB(RD, RA, 0); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(RD); + if (inst.OE) { BKPT(0x333); @@ -825,19 +753,12 @@ void JitArm::rlwimix(UGeckoInstruction inst) MOVI2R(rA, mask); Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. + BIC (rB, RA, rA); // RA & ~mask + AND (rA, rA, Shift); + ORR(RA, rB, rA); + if (inst.Rc) - { - BIC (rB, RA, rA); // RA & ~mask - AND (rA, rA, Shift); - ORRS(RA, rB, rA); - GenerateRC(); - } - else - { - BIC (rB, RA, rA); // RA & ~mask - AND (rA, rA, Shift); - ORR(RA, rB, rA); - } + ComputeRC(RA); gpr.Unlock(rA, rB); } @@ -853,13 +774,10 @@ void JitArm::rlwinmx(UGeckoInstruction inst) MOVI2R(rA, mask); Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. + AND(RA, rA, Shift); + if (inst.Rc) - { - ANDS(RA, rA, Shift); - GenerateRC(); - } - else - AND (RA, rA, Shift); + ComputeRC(RA); gpr.Unlock(rA); //m_GPR[inst.RA] = _rotl(m_GPR[inst.RS],inst.SH) & mask; @@ -882,13 +800,10 @@ void JitArm::rlwnmx(UGeckoInstruction inst) SUB(rB, rB, RB); Operand2 Shift(RS, ST_ROR, rB); // Register shifted register + AND(RA, rA, Shift); + if (inst.Rc) - { - ANDS(RA, rA, Shift); - GenerateRC(); - } - else - AND (RA, rA, Shift); + ComputeRC(RA); gpr.Unlock(rA, rB); } @@ -908,9 +823,9 @@ void JitArm::srawix(UGeckoInstruction inst) Operand2 mask = Operand2(2, 2); // XER_CA_MASK MOV(tmp, RS); - ASRS(RA, RS, amount); + ASR(RA, RS, amount); if (inst.Rc) - GenerateRC(); + ComputeRC(RA); LSL(tmp, tmp, 32 - amount); TST(tmp, RA); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Paired.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Paired.cpp index 4584058206..520e243327 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Paired.cpp @@ -611,263 +611,4 @@ void JitArm::ps_nabs(UGeckoInstruction inst) VABS(vD1, vB1); VNEG(vD1, vD1); } -void JitArm::ps_cmpu0(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R0(a); - ARMReg vB = fpr.R0(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} - -void JitArm::ps_cmpu1(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R1(a); - ARMReg vB = fpr.R1(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} - -void JitArm::ps_cmpo0(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R0(a); - ARMReg vB = fpr.R0(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - - SetFPException(fpscrReg, FPSCR_VXVC); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - TST(fpscrReg, VEMask); - - FixupBranch noVXVC = B_CC(CC_NEQ); - SetFPException(fpscrReg, FPSCR_VXVC); - - SetJumpTarget(noVXVC); - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} - -void JitArm::ps_cmpo1(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - u32 a = inst.FA, b = inst.FB; - int cr = inst.CRFD; - - ARMReg vA = fpr.R1(a); - ARMReg vB = fpr.R1(b); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg crReg = gpr.GetReg(); - Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 - Operand2 LessThan(0x8, 0xA); // 0x8000 - Operand2 GreaterThan(0x4, 0xA); // 0x4000 - Operand2 EqualTo(0x2, 0xA); // 0x2000 - Operand2 NANRes(0x1, 0xA); // 0x1000 - FixupBranch Done1, Done2, Done3; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - BIC(fpscrReg, fpscrReg, FPRFMask); - - VCMPE(vA, vB); - VMRS(_PC); - SetCC(CC_LT); - ORR(fpscrReg, fpscrReg, LessThan); - MOV(crReg, 8); - Done1 = B(); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, GreaterThan); - MOV(crReg, 4); - Done2 = B(); - SetCC(CC_EQ); - ORR(fpscrReg, fpscrReg, EqualTo); - MOV(crReg, 2); - Done3 = B(); - SetCC(); - - ORR(fpscrReg, fpscrReg, NANRes); - MOV(crReg, 1); - - VCMPE(vA, vA); - VMRS(_PC); - FixupBranch NanA = B_CC(CC_NEQ); - VCMPE(vB, vB); - VMRS(_PC); - FixupBranch NanB = B_CC(CC_NEQ); - - SetFPException(fpscrReg, FPSCR_VXVC); - FixupBranch Done4 = B(); - - SetJumpTarget(NanA); - SetJumpTarget(NanB); - - SetFPException(fpscrReg, FPSCR_VXSNAN); - - TST(fpscrReg, VEMask); - - FixupBranch noVXVC = B_CC(CC_NEQ); - SetFPException(fpscrReg, FPSCR_VXVC); - - SetJumpTarget(noVXVC); - SetJumpTarget(Done1); - SetJumpTarget(Done2); - SetJumpTarget(Done3); - SetJumpTarget(Done4); - STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, crReg); -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp index ff129f9674..d17e61a88f 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp @@ -14,6 +14,46 @@ #include "Core/PowerPC/JitArm32/JitAsm.h" #include "Core/PowerPC/JitArm32/JitRegCache.h" +FixupBranch JitArm::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) +{ + ARMReg RA = gpr.GetReg(); + + Operand2 SOBit(2, 2); // 0x10000000 + Operand2 LTBit(1, 1); // 0x80000000 + + FixupBranch branch; + switch (bit) + { + case CR_SO_BIT: // check bit 61 set + LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32)); + TST(RA, SOBit); + branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ); + break; + case CR_EQ_BIT: // check bits 31-0 == 0 + LDR(RA, R9, PPCSTATE_OFF(cr_val[field])); + CMP(RA, 0); + branch = B_CC(jump_if_set ? CC_EQ : CC_NEQ); + break; + case CR_GT_BIT: // check val > 0 + LDR(RA, R9, PPCSTATE_OFF(cr_val[field])); + CMP(RA, 1); + LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32)); + SBCS(RA, RA, 0); + branch = B_CC(jump_if_set ? CC_GE : CC_LT); + break; + case CR_LT_BIT: // check bit 62 set + LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32)); + TST(RA, LTBit); + branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ); + break; + default: + _assert_msg_(DYNA_REC, false, "Invalid CR bit"); + } + + gpr.Unlock(RA); + return branch; +} + void JitArm::mtspr(UGeckoInstruction inst) { INSTRUCTION_START @@ -84,67 +124,6 @@ void JitArm::mfspr(UGeckoInstruction inst) } } -void JitArm::mfcr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - // USES_CR - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - int d = inst.RD; - LDRB(rA, R9, PPCSTATE_OFF(cr_fast[0])); - - for (int i = 1; i < 8; i++) - { - LDRB(rB, R9, PPCSTATE_OFF(cr_fast[i])); - LSL(rA, rA, 4); - ORR(rA, rA, rB); - } - MOV(gpr.R(d), rA); - gpr.Unlock(rA, rB); -} - -void JitArm::mtcrf(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - ARMReg rA = gpr.GetReg(); - - // USES_CR - u32 crm = inst.CRM; - if (crm != 0) - { - if (gpr.IsImm(inst.RS)) - { - for (int i = 0; i < 8; i++) - { - if ((crm & (0x80 >> i)) != 0) - { - u8 newcr = (gpr.GetImm(inst.RS) >> (28 - (i * 4))) & 0xF; - MOV(rA, newcr); - STRB(rA, R9, PPCSTATE_OFF(cr_fast[i])); - } - } - } - else - { - ARMReg rB = gpr.GetReg(); - MOV(rA, gpr.R(inst.RS)); - for (int i = 0; i < 8; i++) - { - if ((crm & (0x80 >> i)) != 0) - { - UBFX(rB, rA, 28 - (i * 4), 4); - STRB(rB, R9, PPCSTATE_OFF(cr_fast[i])); - } - } - gpr.Unlock(rB); - } - } - gpr.Unlock(rA); -} - void JitArm::mtsr(UGeckoInstruction inst) { INSTRUCTION_START @@ -160,25 +139,6 @@ void JitArm::mfsr(UGeckoInstruction inst) LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(sr[inst.SR])); } -void JitArm::mcrxr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - // Copy XER[0-3] into CR[inst.CRFD] - LDR(rA, R9, PPCSTATE_OFF(spr[SPR_XER])); - MOV(rB, rA); - LSR(rA, rA, 28); - STRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRFD])); - - // Clear XER[0-3] - Operand2 Top4(0xF, 2); - BIC(rB, rB, Top4); - STR(rB, R9, PPCSTATE_OFF(spr[SPR_XER])); - gpr.Unlock(rA, rB); -} void JitArm::mtmsr(UGeckoInstruction inst) { @@ -206,84 +166,16 @@ void JitArm::mcrf(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); + ARMReg rA = gpr.GetReg(); if (inst.CRFS != inst.CRFD) { - LDRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRFS])); - STRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRFD])); + LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS])); + STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD])); + LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS]) + sizeof(u32)); + STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD]) + sizeof(u32)); } gpr.Unlock(rA); } -void JitArm::crXXX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - // Get bit CRBA aligned with bit CRBD - LDRB(rA, R9, PPCSTATE_OFF(cr_fast[inst.CRBA >> 2])); - int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); - if (shiftA < 0) - LSL(rA, rA, -shiftA); - else if (shiftA > 0) - LSR(rA, rA, shiftA); - - // Get bit CRBB aligned with bit CRBD - int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); - LDRB(rB, R9, PPCSTATE_OFF(cr_fast[inst.CRBB >> 2])); - if (shiftB < 0) - LSL(rB, rB, -shiftB); - else if (shiftB > 0) - LSR(rB, rB, shiftB); - - // Compute combined bit - switch (inst.SUBOP10) - { - case 33: // crnor - ORR(rA, rA, rB); - MVN(rA, rA); - break; - - case 129: // crandc - MVN(rB, rB); - AND(rA, rA, rB); - break; - - case 193: // crxor - EOR(rA, rA, rB); - break; - - case 225: // crnand - AND(rA, rA, rB); - MVN(rA, rA); - break; - - case 257: // crand - AND(rA, rA, rB); - break; - - case 289: // creqv - EOR(rA, rA, rB); - MVN(rA, rA); - break; - - case 417: // crorc - MVN(rA, rA); - ORR(rA, rA, rB); - break; - - case 449: // cror - ORR(rA, rA, rB); - break; - } - // Store result bit in CRBD - AND(rA, rA, 0x8 >> (inst.CRBD & 3)); - LDRB(rB, R9, PPCSTATE_OFF(cr_fast[inst.CRBD >> 2])); - BIC(rB, rB, 0x8 >> (inst.CRBD & 3)); - ORR(rB, rB, rA); - STRB(rB, R9, PPCSTATE_OFF(cr_fast[inst.CRBD >> 2])); - gpr.Unlock(rA, rB); -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp index 5176a4e8d9..d7c1882f44 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp @@ -47,7 +47,7 @@ static GekkoOPTemplate primarytable[] = {7, &JitArm::arith}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, {8, &JitArm::subfic}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, - {10, &JitArm::cmpli}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, + {10, &JitArm::FallBackToInterpreter}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {11, &JitArm::cmpi}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {12, &JitArm::arith}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, {13, &JitArm::arith}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, @@ -112,14 +112,14 @@ static GekkoOPTemplate primarytable[] = static GekkoOPTemplate table4[] = { //SUBOP10 - {0, &JitArm::ps_cmpu0}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, - {32, &JitArm::ps_cmpo0}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, + {0, &JitArm::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, + {32, &JitArm::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, {40, &JitArm::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, {136, &JitArm::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, {264, &JitArm::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, - {64, &JitArm::ps_cmpu1}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, + {64, &JitArm::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, {72, &JitArm::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, - {96, &JitArm::ps_cmpo1}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, + {96, &JitArm::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, {528, &JitArm::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, {560, &JitArm::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, {592, &JitArm::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, @@ -162,14 +162,14 @@ static GekkoOPTemplate table19[] = { {528, &JitArm::bcctrx}, //"bcctrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, {16, &JitArm::bclrx}, //"bclrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, - {257, &JitArm::crXXX}, //"crand", OPTYPE_CR, FL_EVIL}}, - {129, &JitArm::crXXX}, //"crandc", OPTYPE_CR, FL_EVIL}}, - {289, &JitArm::crXXX}, //"creqv", OPTYPE_CR, FL_EVIL}}, - {225, &JitArm::crXXX}, //"crnand", OPTYPE_CR, FL_EVIL}}, - {33, &JitArm::crXXX}, //"crnor", OPTYPE_CR, FL_EVIL}}, - {449, &JitArm::crXXX}, //"cror", OPTYPE_CR, FL_EVIL}}, - {417, &JitArm::crXXX}, //"crorc", OPTYPE_CR, FL_EVIL}}, - {193, &JitArm::crXXX}, //"crxor", OPTYPE_CR, FL_EVIL}}, + {257, &JitArm::FallBackToInterpreter}, //"crand", OPTYPE_CR, FL_EVIL}}, + {129, &JitArm::FallBackToInterpreter}, //"crandc", OPTYPE_CR, FL_EVIL}}, + {289, &JitArm::FallBackToInterpreter}, //"creqv", OPTYPE_CR, FL_EVIL}}, + {225, &JitArm::FallBackToInterpreter}, //"crnand", OPTYPE_CR, FL_EVIL}}, + {33, &JitArm::FallBackToInterpreter}, //"crnor", OPTYPE_CR, FL_EVIL}}, + {449, &JitArm::FallBackToInterpreter}, //"cror", OPTYPE_CR, FL_EVIL}}, + {417, &JitArm::FallBackToInterpreter}, //"crorc", OPTYPE_CR, FL_EVIL}}, + {193, &JitArm::FallBackToInterpreter}, //"crxor", OPTYPE_CR, FL_EVIL}}, {150, &JitArm::DoNothing}, //"isync", OPTYPE_ICACHE, FL_EVIL}}, {0, &JitArm::mcrf}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, @@ -190,7 +190,7 @@ static GekkoOPTemplate table31[] = {476, &JitArm::arith}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {284, &JitArm::arith}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {0, &JitArm::cmp}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, - {32, &JitArm::cmpl}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, + {32, &JitArm::FallBackToInterpreter}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, {26, &JitArm::cntlzwx}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {922, &JitArm::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {954, &JitArm::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, @@ -266,16 +266,16 @@ static GekkoOPTemplate table31[] = {759, &JitArm::stfXX}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, {983, &JitArm::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {19, &JitArm::mfcr}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, + {19, &JitArm::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, {83, &JitArm::mfmsr}, //"mfmsr", OPTYPE_SYSTEM, FL_OUT_D}}, - {144, &JitArm::mtcrf}, //"mtcrf", OPTYPE_SYSTEM, 0}}, + {144, &JitArm::FallBackToInterpreter}, //"mtcrf", OPTYPE_SYSTEM, 0}}, {146, &JitArm::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}}, {210, &JitArm::mtsr}, //"mtsr", OPTYPE_SYSTEM, 0}}, {242, &JitArm::FallBackToInterpreter}, //"mtsrin", OPTYPE_SYSTEM, 0}}, {339, &JitArm::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, {467, &JitArm::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}}, {371, &JitArm::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, - {512, &JitArm::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, 0}}, + {512, &JitArm::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}}, {595, &JitArm::mfsr}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, {659, &JitArm::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, @@ -337,8 +337,8 @@ static GekkoOPTemplate table59[] = static GekkoOPTemplate table63[] = { {264, &JitArm::fabsx}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {32, &JitArm::fcmpo}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, - {0, &JitArm::fcmpu}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, + {32, &JitArm::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, + {0, &JitArm::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, {14, &JitArm::fctiwx}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, {15, &JitArm::fctiwzx}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, {72, &JitArm::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},