JIT compiler:

* Improved constants folding in load instructions
* Merged load instructions
* Fixed the register allocator so that it can restore the registers state after jumping to the dispatcher (in case of a conditional jump)

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6076 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
dok.slade 2010-08-08 18:12:58 +00:00
parent 95b0abb737
commit 5a248b46a1
14 changed files with 438 additions and 280 deletions

View file

@ -127,12 +127,20 @@ void XEmitter::ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2
void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
{ {
ABI_AlignStack(2 * 4); ABI_AlignStack(2 * 4);
PUSH(32, arg1);
PUSH(32, Imm32(param2)); PUSH(32, Imm32(param2));
PUSH(32, arg1);
CALL(func); CALL(func);
ABI_RestoreStack(2 * 4); ABI_RestoreStack(2 * 4);
} }
void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1)
{
ABI_AlignStack(1 * 4);
PUSH(32, arg1);
CALL(func);
ABI_RestoreStack(1 * 4);
}
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
// Note: 4 * 4 = 16 bytes, so alignment is preserved. // Note: 4 * 4 = 16 bytes, so alignment is preserved.
PUSH(EBP); PUSH(EBP);

View file

@ -600,6 +600,7 @@ public:
void ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3); void ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3);
void ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3); void ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3);
void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2); void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2);
void ABI_CallFunctionA(void *func, const Gen::OpArg &arg1);
// Pass a register as a paremeter. // Pass a register as a paremeter.
void ABI_CallFunctionR(void *func, Gen::X64Reg reg1); void ABI_CallFunctionR(void *func, Gen::X64Reg reg1);

View file

@ -300,6 +300,14 @@ void Jit64::Cleanup()
void Jit64::WriteExit(u32 destination, int exit_num) void Jit64::WriteExit(u32 destination, int exit_num)
{ {
// We are about to jump to the dispatcher => save and flush regs
RegCacheState regCacheStateGPR;
RegCacheState regCacheStateFPR;
gpr.SaveState(regCacheStateGPR);
fpr.SaveState(regCacheStateFPR);
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
Cleanup(); Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
@ -321,29 +329,69 @@ void Jit64::WriteExit(u32 destination, int exit_num)
MOV(32, M(&PC), Imm32(destination)); MOV(32, M(&PC), Imm32(destination));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
// Restore registers states so that the next instructions could still use the cached values
gpr.LoadState(regCacheStateGPR);
fpr.LoadState(regCacheStateFPR);
} }
void Jit64::WriteExitDestInEAX(int exit_num) void Jit64::WriteExitDestInEAX(int exit_num)
{ {
// We are about to jump to the dispatcher => save and flush regs
RegCacheState regCacheStateGPR;
RegCacheState regCacheStateFPR;
gpr.SaveState(regCacheStateGPR);
fpr.SaveState(regCacheStateFPR);
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
MOV(32, M(&PC), R(EAX)); MOV(32, M(&PC), R(EAX));
Cleanup(); Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
// Restore registers states so that the next instructions could still use the cached values
gpr.LoadState(regCacheStateGPR);
fpr.LoadState(regCacheStateFPR);
} }
void Jit64::WriteRfiExitDestInEAX() void Jit64::WriteRfiExitDestInEAX()
{ {
// We are about to jump to the exception handler => save and flush regs
RegCacheState regCacheStateGPR;
RegCacheState regCacheStateFPR;
gpr.SaveState(regCacheStateGPR);
fpr.SaveState(regCacheStateFPR);
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
MOV(32, M(&PC), R(EAX)); MOV(32, M(&PC), R(EAX));
Cleanup(); Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.testExceptions, true); JMP(asm_routines.testExceptions, true);
// Restore registers states so that the next instructions could still use the cached values
gpr.LoadState(regCacheStateGPR);
fpr.LoadState(regCacheStateFPR);
} }
void Jit64::WriteExceptionExit() void Jit64::WriteExceptionExit()
{ {
// We are about to jump to the exception handler => save and flush regs
RegCacheState regCacheStateGPR;
RegCacheState regCacheStateFPR;
gpr.SaveState(regCacheStateGPR);
fpr.SaveState(regCacheStateFPR);
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
Cleanup(); Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.testExceptions, true); JMP(asm_routines.testExceptions, true);
// Restore registers states so that the next instructions could still use the cached values
gpr.LoadState(regCacheStateGPR);
fpr.LoadState(regCacheStateFPR);
} }
void STACKALIGN Jit64::Run() void STACKALIGN Jit64::Run()
@ -552,7 +600,15 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
{ {
//This instruction uses FPU - needs to add FP exception bailout //This instruction uses FPU - needs to add FP exception bailout
TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit
FixupBranch b1 = J_CC(CC_NZ); FixupBranch b1 = J_CC(CC_NZ, true);
// We are about to jump to the exception handler => save and flush regs
RegCacheState regCacheStateGPR;
RegCacheState regCacheStateFPR;
gpr.SaveState(regCacheStateGPR);
fpr.SaveState(regCacheStateFPR);
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
// If a FPU exception occurs, the exception handler will read // If a FPU exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens. // from PC. Update PC with the latest value in case that happens.
@ -560,18 +616,19 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.fpException, true); JMP(asm_routines.fpException, true);
SetJumpTarget(b1); SetJumpTarget(b1);
// Restore registers states
// (If no memory exception occured, the next instructions could still use the cached values)
gpr.LoadState(regCacheStateGPR);
fpr.LoadState(regCacheStateFPR);
} }
Jit64Tables::CompileInstruction(ops[i]); Jit64Tables::CompileInstruction(ops[i]);
if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) if (js.memcheck && (opinfo->flags & FL_LOADSTORE))
{ {
// In case we are about to jump to the dispatcher, flush regs
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI));
FixupBranch noMemException = J_CC(CC_Z); FixupBranch noMemException = J_CC(CC_Z, true);
// If a memory exception occurs, the exception handler will read // If a memory exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens. // from PC. Update PC with the latest value in case that happens.
@ -607,8 +664,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (broken_block) if (broken_block)
{ {
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
WriteExit(nextPC, 0); WriteExit(nextPC, 0);
} }

View file

@ -236,8 +236,6 @@ public:
void fmaddXX(UGeckoInstruction inst); void fmaddXX(UGeckoInstruction inst);
void fsign(UGeckoInstruction inst); void fsign(UGeckoInstruction inst);
void stX(UGeckoInstruction inst); //stw sth stb void stX(UGeckoInstruction inst); //stw sth stb
void lXz(UGeckoInstruction inst);
void lha(UGeckoInstruction inst);
void rlwinmx(UGeckoInstruction inst); void rlwinmx(UGeckoInstruction inst);
void rlwimix(UGeckoInstruction inst); void rlwimix(UGeckoInstruction inst);
void rlwnmx(UGeckoInstruction inst); void rlwnmx(UGeckoInstruction inst);
@ -254,12 +252,8 @@ public:
void subfmex(UGeckoInstruction inst); void subfmex(UGeckoInstruction inst);
void subfzex(UGeckoInstruction inst); void subfzex(UGeckoInstruction inst);
void lbzx(UGeckoInstruction inst); void lXXx(UGeckoInstruction inst);
void lwzx(UGeckoInstruction inst);
void lhax(UGeckoInstruction inst);
void lwzux(UGeckoInstruction inst);
void stXx(UGeckoInstruction inst); void stXx(UGeckoInstruction inst);
void lmw(UGeckoInstruction inst); void lmw(UGeckoInstruction inst);

View file

@ -77,14 +77,14 @@ static GekkoOPTemplate primarytable[] =
{28, &Jit64::reg_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, {28, &Jit64::reg_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
{29, &Jit64::reg_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, {29, &Jit64::reg_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
{32, &Jit64::lXz}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {32, &Jit64::lXXx}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{33, &Jit64::Default}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {33, &Jit64::lXXx}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{34, &Jit64::lXz}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {34, &Jit64::lXXx}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{35, &Jit64::Default}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {35, &Jit64::lXXx}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{40, &Jit64::lXz}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {40, &Jit64::lXXx}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{41, &Jit64::Default}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {41, &Jit64::lXXx}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{42, &Jit64::lha}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {42, &Jit64::lXXx}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{43, &Jit64::Default}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {43, &Jit64::lXXx}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{44, &Jit64::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, {44, &Jit64::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{45, &Jit64::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, {45, &Jit64::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
@ -220,20 +220,20 @@ static GekkoOPTemplate table31[] =
{1014, &Jit64::dcbz}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, {1014, &Jit64::dcbz}, //"dcbz", OPTYPE_DCACHE, 0, 4}},
//load word //load word
{23, &Jit64::lwzx}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {23, &Jit64::lXXx}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{55, &Jit64::lwzux}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {55, &Jit64::lXXx}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load halfword //load halfword
{279, &Jit64::Default}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {279, &Jit64::lXXx}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{311, &Jit64::Default}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {311, &Jit64::lXXx}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load halfword signextend //load halfword signextend
{343, &Jit64::lhax}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {343, &Jit64::lXXx}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{375, &Jit64::Default}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {375, &Jit64::lXXx}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load byte //load byte
{87, &Jit64::lbzx}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {87, &Jit64::lXXx}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{119, &Jit64::Default}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {119, &Jit64::lXXx}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load byte reverse //load byte reverse
{534, &Jit64::Default}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {534, &Jit64::Default}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},

View file

@ -28,18 +28,15 @@ using namespace PowerPC;
RegCache::RegCache() : emit(0) { RegCache::RegCache() : emit(0) {
memset(locks, 0, sizeof(locks)); memset(locks, 0, sizeof(locks));
memset(xlocks, 0, sizeof(xlocks)); memset(xlocks, 0, sizeof(xlocks));
memset(saved_locks, 0, sizeof(saved_locks));
memset(saved_xlocks, 0, sizeof(saved_xlocks));
memset(regs, 0, sizeof(regs)); memset(regs, 0, sizeof(regs));
memset(xregs, 0, sizeof(xregs)); memset(xregs, 0, sizeof(xregs));
memset(saved_regs, 0, sizeof(saved_regs));
memset(saved_xregs, 0, sizeof(saved_xregs));
} }
void RegCache::Start(PPCAnalyst::BlockRegStats &stats) void RegCache::Start(PPCAnalyst::BlockRegStats &stats)
{ {
for (int i = 0; i < NUMXREGS; i++) for (int i = 0; i < NUMXREGS; i++)
{ {
xregs[i].ppcReg = -1;
xregs[i].free = true; xregs[i].free = true;
xregs[i].dirty = false; xregs[i].dirty = false;
xlocks[i] = false; xlocks[i] = false;
@ -48,6 +45,7 @@ void RegCache::Start(PPCAnalyst::BlockRegStats &stats)
{ {
regs[i].location = GetDefaultLocation(i); regs[i].location = GetDefaultLocation(i);
regs[i].away = false; regs[i].away = false;
locks[i] = false;
} }
// todo: sort to find the most popular regs // todo: sort to find the most popular regs
@ -137,20 +135,20 @@ X64Reg RegCache::GetFreeXReg()
return (X64Reg) -1; return (X64Reg) -1;
} }
void RegCache::SaveState() void RegCache::SaveState(RegCacheState & state)
{ {
memcpy(saved_locks, locks, sizeof(locks)); memcpy(state.locks, locks, sizeof(locks));
memcpy(saved_xlocks, xlocks, sizeof(xlocks)); memcpy(state.xlocks, xlocks, sizeof(xlocks));
memcpy(saved_regs, regs, sizeof(regs)); memcpy(state.regs, regs, sizeof(regs));
memcpy(saved_xregs, xregs, sizeof(xregs)); memcpy(state.xregs, xregs, sizeof(xregs));
} }
void RegCache::LoadState() void RegCache::LoadState(const RegCacheState & state)
{ {
memcpy(xlocks, saved_xlocks, sizeof(xlocks)); memcpy(xlocks, state.xlocks, sizeof(xlocks));
memcpy(locks, saved_locks, sizeof(locks)); memcpy(locks, state.locks, sizeof(locks));
memcpy(regs, saved_regs, sizeof(regs)); memcpy(regs, state.regs, sizeof(regs));
memcpy(xregs, saved_xregs, sizeof(xregs)); memcpy(xregs, state.xregs, sizeof(xregs));
} }
void RegCache::FlushR(X64Reg reg) void RegCache::FlushR(X64Reg reg)

View file

@ -55,21 +55,17 @@ typedef int PReg;
#define NUMXREGS 8 #define NUMXREGS 8
#endif #endif
class RegCache struct RegCacheState
{ {
private:
bool locks[32]; bool locks[32];
bool saved_locks[32];
bool saved_xlocks[NUMXREGS];
protected:
bool xlocks[NUMXREGS]; bool xlocks[NUMXREGS];
PPCCachedReg regs[32]; PPCCachedReg regs[32];
X64CachedReg xregs[NUMXREGS]; X64CachedReg xregs[NUMXREGS];
};
PPCCachedReg saved_regs[32]; class RegCache : protected RegCacheState
X64CachedReg saved_xregs[NUMXREGS]; {
protected:
virtual const int *GetAllocationOrder(int &count) = 0; virtual const int *GetAllocationOrder(int &count) = 0;
XEmitter *emit; XEmitter *emit;
@ -123,8 +119,8 @@ public:
X64Reg GetFreeXReg(); X64Reg GetFreeXReg();
void SaveState(); void SaveState(RegCacheState & state);
void LoadState(); void LoadState(const RegCacheState & state);
}; };
class GPRRegCache : public RegCache class GPRRegCache : public RegCache

View file

@ -45,8 +45,6 @@ void Jit64::sc(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Branch) JITDISABLE(Branch)
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
MOV(32, M(&PC), Imm32(js.compilerPC + 4)); MOV(32, M(&PC), Imm32(js.compilerPC + 4));
OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL));
WriteExceptionExit(); WriteExceptionExit();
@ -57,8 +55,6 @@ void Jit64::rfi(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Branch) JITDISABLE(Branch)
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
// See Interpreter rfi for details // See Interpreter rfi for details
const u32 mask = 0x87C0FFFF; const u32 mask = 0x87C0FFFF;
const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
@ -79,8 +75,6 @@ void Jit64::bx(UGeckoInstruction inst)
if (inst.LK) if (inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); MOV(32, M(&LR), Imm32(js.compilerPC + 4));
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
if (js.isLastInstruction) if (js.isLastInstruction)
{ {
@ -120,17 +114,14 @@ void Jit64::bcx(UGeckoInstruction inst)
// USES_CR // USES_CR
_assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block"); _assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block");
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
FixupBranch pCTRDontBranch; FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{ {
SUB(32, M(&CTR), Imm8(1)); SUB(32, M(&CTR), Imm8(1));
if (inst.BO & BO_BRANCH_IF_CTR_0) if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = J_CC(CC_NZ); pCTRDontBranch = J_CC(CC_NZ, true);
else else
pCTRDontBranch = J_CC(CC_Z); pCTRDontBranch = J_CC(CC_Z, true);
} }
FixupBranch pConditionDontBranch; FixupBranch pConditionDontBranch;
@ -138,9 +129,9 @@ void Jit64::bcx(UGeckoInstruction inst)
{ {
TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3)));
if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch
pConditionDontBranch = J_CC(CC_Z); pConditionDontBranch = J_CC(CC_Z, true);
else else
pConditionDontBranch = J_CC(CC_NZ); pConditionDontBranch = J_CC(CC_NZ, true);
} }
if (inst.LK) if (inst.LK)
@ -165,9 +156,6 @@ void Jit64::bcctrx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Branch) JITDISABLE(Branch)
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
// bcctrx doesn't decrement and/or test CTR // bcctrx doesn't decrement and/or test CTR
_dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!"); _dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!");
@ -196,7 +184,7 @@ void Jit64::bcctrx(UGeckoInstruction inst)
branch = CC_Z; branch = CC_Z;
else else
branch = CC_NZ; branch = CC_NZ;
FixupBranch b = J_CC(branch, false); FixupBranch b = J_CC(branch, true);
MOV(32, R(EAX), M(&CTR)); MOV(32, R(EAX), M(&CTR));
AND(32, R(EAX), Imm32(0xFFFFFFFC)); AND(32, R(EAX), Imm32(0xFFFFFFFC));
//MOV(32, M(&PC), R(EAX)); => Already done in WriteExitDestInEAX() //MOV(32, M(&PC), R(EAX)); => Already done in WriteExitDestInEAX()
@ -214,17 +202,14 @@ void Jit64::bclrx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Branch) JITDISABLE(Branch)
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
FixupBranch pCTRDontBranch; FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{ {
SUB(32, M(&CTR), Imm8(1)); SUB(32, M(&CTR), Imm8(1));
if (inst.BO & BO_BRANCH_IF_CTR_0) if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = J_CC(CC_NZ); pCTRDontBranch = J_CC(CC_NZ, true);
else else
pCTRDontBranch = J_CC(CC_Z); pCTRDontBranch = J_CC(CC_Z, true);
} }
FixupBranch pConditionDontBranch; FixupBranch pConditionDontBranch;
@ -232,9 +217,9 @@ void Jit64::bclrx(UGeckoInstruction inst)
{ {
TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3)));
if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch
pConditionDontBranch = J_CC(CC_Z); pConditionDontBranch = J_CC(CC_Z, true);
else else
pConditionDontBranch = J_CC(CC_NZ); pConditionDontBranch = J_CC(CC_NZ, true);
} }
// This below line can be used to prove that blr "eats flags" in practice. // This below line can be used to prove that blr "eats flags" in practice.

View file

@ -264,9 +264,6 @@ void Jit64::cmpXX(UGeckoInstruction inst)
{ {
js.downcountAmount++; js.downcountAmount++;
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
int test_bit = 8 >> (js.next_inst.BI & 3); int test_bit = 8 >> (js.next_inst.BI & 3);
u8 conditionResult = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? test_bit : 0; u8 conditionResult = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? test_bit : 0;
if ((compareResult & test_bit) == conditionResult) if ((compareResult & test_bit) == conditionResult)
@ -359,21 +356,19 @@ void Jit64::cmpXX(UGeckoInstruction inst)
// if (rand() & 1) // if (rand() & 1)
// std::swap(destination1, destination2), condition = !condition; // std::swap(destination1, destination2), condition = !condition;
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
FixupBranch pLesser = J_CC(less_than); FixupBranch pLesser = J_CC(less_than);
FixupBranch pGreater = J_CC(greater_than); FixupBranch pGreater = J_CC(greater_than);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // == 0 MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // == 0
FixupBranch continue1 = J(); FixupBranch continue1 = J(true);
SetJumpTarget(pGreater); SetJumpTarget(pGreater);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // > 0 MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // > 0
FixupBranch continue2 = J(); FixupBranch continue2 = J(true);
SetJumpTarget(pLesser); SetJumpTarget(pLesser);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // < 0 MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // < 0
FixupBranch continue3; FixupBranch continue3;
if (!!(8 & test_bit) == condition) continue3 = J(); if (!!(8 & test_bit) == condition) continue3 = J(true);
if (!!(4 & test_bit) != condition) SetJumpTarget(continue2); if (!!(4 & test_bit) != condition) SetJumpTarget(continue2);
if (!!(2 & test_bit) != condition) SetJumpTarget(continue1); if (!!(2 & test_bit) != condition) SetJumpTarget(continue1);
if (js.next_inst.OPCD == 16) // bcx if (js.next_inst.OPCD == 16) // bcx

View file

@ -34,96 +34,84 @@
#include "JitAsm.h" #include "JitAsm.h"
#include "JitRegCache.h" #include "JitRegCache.h"
void Jit64::lbzx(UGeckoInstruction inst) void Jit64::lXXx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff) // Skip disabled JIT instructions
if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff && (inst.OPCD == 31) && (inst.SUBOP10 == 87))
{ Default(inst); return; }
if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff && ((inst.OPCD == 34) || (inst.OPCD == 40) || (inst.OPCD == 32)))
{ Default(inst); return; }
if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff && (inst.OPCD == 32))
{ Default(inst); return; } { Default(inst); return; }
int a = inst.RA, b = inst.RB, d = inst.RD; // Determine memory access size and sign extend
gpr.FlushLockX(ABI_PARAM1); int accessSize;
MOV(32, R(ABI_PARAM1), gpr.R(b)); bool signExtend;
if (a) switch (inst.OPCD)
{ {
ADD(32, R(ABI_PARAM1), gpr.R(a)); case 32: /* lwz */
case 33: /* lwzu */
accessSize = 32;
signExtend = false;
break;
case 34: /* lbz */
case 35: /* lbzu */
accessSize = 8;
signExtend = false;
break;
case 40: /* lhz */
case 41: /* lhzu */
accessSize = 16;
signExtend = false;
break;
case 42: /* lha */
case 43: /* lhau */
accessSize = 16;
signExtend = true;
break;
case 31:
switch (inst.SUBOP10)
{
case 23: /* lwzx */
case 55: /* lwzux */
accessSize = 32;
signExtend = false;
break;
case 87: /* lbzx */
case 119: /* lbzux */
accessSize = 8;
signExtend = false;
break;
case 279: /* lhzx */
case 311: /* lhzux */
accessSize = 16;
signExtend = false;
break;
case 343: /* lhax */
case 375: /* lhaux */
accessSize = 16;
signExtend = true;
break;
default:
PanicAlert("Invalid instruction");
}
break;
default:
PanicAlert("Invalid instruction");
} }
SafeLoadRegToEAX(ABI_PARAM1, 8, 0);
MEMCHECK_START
gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAllX();
}
void Jit64::lhax(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
{
ADD(32, R(ABI_PARAM1), gpr.R(a));
}
// Some homebrew actually loads from a hw reg with this instruction
SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true);
MEMCHECK_START
gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAllX();
}
void Jit64::lwzx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
{
ADD(32, R(ABI_PARAM1), gpr.R(a));
}
SafeLoadRegToEAX(ABI_PARAM1, 32, 0);
MEMCHECK_START
gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAllX();
}
void Jit64::lXz(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff)
{ Default(inst); return; }
int d = inst.RD;
int a = inst.RA;
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate // TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
// Will give nice boost to dual core mode // Will give nice boost to dual core mode
// (mb2): I agree, // (mb2): I agree,
@ -144,23 +132,24 @@ void Jit64::lXz(UGeckoInstruction inst)
// do our job at first // do our job at first
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d); gpr.Lock(d);
MOV(32, R(ABI_PARAM1), gpr.R(a)); SafeLoadToEAX(gpr.R(a), accessSize, offset, signExtend);
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
gpr.KillImmediate(d, false, true); gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
gpr.Flush(FLUSH_ALL);
// if it's still 0, we can wait until the next event // if it's still 0, we can wait until the next event
CMP(32, R(RAX), Imm32(0)); CMP(32, R(RAX), Imm32(0));
FixupBranch noIdle = J_CC(CC_NE); FixupBranch noIdle = J_CC(CC_NE);
// We are about to jump to the exception handler, save and flush regs
RegCacheState regCacheStateGPR;
RegCacheState regCacheStateFPR;
gpr.SaveState(regCacheStateGPR);
fpr.SaveState(regCacheStateFPR);
gpr.Flush(FLUSH_ALL); gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL);
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
// ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0
@ -169,112 +158,99 @@ void Jit64::lXz(UGeckoInstruction inst)
SetJumpTarget(noIdle); SetJumpTarget(noIdle);
// Restore registers states so that the next instructions could still use the cached values
gpr.LoadState(regCacheStateGPR);
fpr.LoadState(regCacheStateFPR);
//js.compilerPC += 8; //js.compilerPC += 8;
return; return;
} }
else if ((inst.OPCD == 32 /* lwz */) && accessSize == 32 && jo.enableFastMem && !Core::g_CoreStartupParameter.bMMU)
// R2 always points to the small read-only data area. We could bake R2-relative loads into immediates.
// R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode.
s32 offset = (s32)(s16)inst.SIMM_16;
if (!a)
{
Default(inst);
return;
}
int accessSize;
switch (inst.OPCD)
{
case 32:
accessSize = 32;
if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;}
break; //lwz
case 40: accessSize = 16; break; //lhz
case 34: accessSize = 8; break; //lbz
default:
//_assert_msg_(DYNA_REC, 0, "lXz: invalid access size");
PanicAlert("lXz: invalid access size");
return;
}
if (accessSize == 32 && jo.enableFastMem && !Core::g_CoreStartupParameter.bMMU)
{ {
// Fast and daring // Fast and daring
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.BindToRegister(a, true, false); gpr.BindToRegister(a, true, false);
gpr.BindToRegister(d, a == d, true); gpr.BindToRegister(d, a == d, true);
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset)); MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
BSWAP(32, gpr.R(d).GetSimpleReg()); BSWAP(32, gpr.R(d).GetSimpleReg());
gpr.UnlockAll(); gpr.UnlockAll();
return;
}
// Determine whether this instruction updates inst.RA
bool update;
if (inst.OPCD == 31)
update = ((inst.SUBOP10 & 0x20) != 0);
else
update = ((inst.OPCD & 1) != 0);
// Prepare address operand
Gen::OpArg opAddress;
if (!update && !a)
{
if (inst.OPCD == 31)
{
gpr.Lock(b);
opAddress = gpr.R(b);
}
else
{
opAddress = Imm32((u32)(s32)inst.SIMM_16);
}
}
else if (update && ((a == 0) || (d == a)))
{
PanicAlert("Invalid instruction");
} }
else else
{ {
gpr.FlushLockX(ABI_PARAM1); if ((inst.OPCD != 31) && gpr.R(a).IsImm())
gpr.Lock(a); {
gpr.BindToRegister(a, true, false); opAddress = Imm32((u32)gpr.R(a).offset + (s32)inst.SIMM_16);
MOV(32, R(ABI_PARAM1), gpr.R(a)); }
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); else if ((inst.OPCD == 31) && gpr.R(a).IsImm() && gpr.R(b).IsImm())
{
MEMCHECK_START opAddress = Imm32((u32)gpr.R(a).offset + (u32)gpr.R(b).offset);
}
gpr.KillImmediate(d, false, true); else
MOV(32, gpr.R(d), R(EAX)); {
gpr.FlushLockX(ABI_PARAM1);
MEMCHECK_END opAddress = R(ABI_PARAM1);
MOV(32, opAddress, gpr.R(a));
gpr.UnlockAll();
gpr.UnlockAllX(); if (inst.OPCD == 31)
ADD(32, opAddress, gpr.R(b));
else
ADD(32, opAddress, Imm32((u32)(s32)inst.SIMM_16));
}
} }
}
void Jit64::lha(UGeckoInstruction inst) SafeLoadToEAX(opAddress, accessSize, 0, signExtend);
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int d = inst.RD; // We must flush immediate values from the following registers because
int a = inst.RA; // they may change at runtime if no MMU exception has been raised
s32 offset = (s32)(s16)inst.SIMM_16; gpr.KillImmediate(d, true, true);
// Safe and boring if (update)
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
MEMCHECK_START
gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAllX();
}
void Jit64::lwzux(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (!a || a == d || a == b)
{ {
Default(inst); gpr.Lock(a);
return; gpr.BindToRegister(a, true, true);
} }
gpr.Lock(a);
gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(EAX), gpr.R(a));
SafeLoadRegToEAX(EAX, 32, 0, false);
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(d, false, true); if (update)
{
if (inst.OPCD == 31)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)(s32)inst.SIMM_16));
}
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -312,7 +288,7 @@ void Jit64::stX(UGeckoInstruction inst)
bool update = inst.OPCD & 1; bool update = inst.OPCD & 1;
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
if (a || update) if (a || !update)
{ {
int accessSize; int accessSize;
switch (inst.OPCD & ~1) switch (inst.OPCD & ~1)
@ -323,18 +299,18 @@ void Jit64::stX(UGeckoInstruction inst)
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
} }
if (gpr.R(a).IsImm()) if ((a == 0) || gpr.R(a).IsImm())
{ {
// If we already know the address through constant folding, we can do some // If we already know the address through constant folding, we can do some
// fun tricks... // fun tricks...
u32 addr = (u32)gpr.R(a).offset; u32 addr = ((a == 0) ? 0 : (u32)gpr.R(a).offset);
addr += offset; addr += offset;
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
{ {
if (offset && update)
gpr.SetImmediate32(a, addr);
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(s)); MOV(32, R(ABI_PARAM1), gpr.R(s));
if (update)
gpr.SetImmediate32(a, addr);
switch (accessSize) switch (accessSize)
{ {
// No need to protect these, they don't touch any state // No need to protect these, they don't touch any state
@ -347,16 +323,27 @@ void Jit64::stX(UGeckoInstruction inst)
gpr.UnlockAllX(); gpr.UnlockAllX();
return; return;
} }
else if (Memory::IsRAMAddress(addr) && accessSize == 32) else if (Memory::IsRAMAddress(addr))
{ {
if (offset && update) MOV(32, R(EAX), gpr.R(s));
gpr.SetImmediate32(a, addr);
MOV(accessSize, R(EAX), gpr.R(s));
BSWAP(accessSize, EAX); BSWAP(accessSize, EAX);
WriteToConstRamAddress(accessSize, R(EAX), addr); WriteToConstRamAddress(accessSize, R(EAX), addr);
if (update)
gpr.SetImmediate32(a, addr);
return;
}
else
{
switch (accessSize)
{
case 32: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), gpr.R(s), addr); break;
case 16: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), gpr.R(s), addr); break;
case 8: ABI_CallFunctionAC(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), gpr.R(s), addr); break;
}
if (update)
gpr.SetImmediate32(a, addr);
return; return;
} }
// Other IO not worth the trouble.
} }
// Optimized stack access? // Optimized stack access?
@ -372,7 +359,7 @@ void Jit64::stX(UGeckoInstruction inst)
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX)); MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX));
#endif #endif
if (update) if (update && offset)
{ {
gpr.Lock(a); gpr.Lock(a);
gpr.KillImmediate(a, true, true); gpr.KillImmediate(a, true, true);
@ -406,9 +393,9 @@ void Jit64::stX(UGeckoInstruction inst)
if (update && offset) if (update && offset)
{ {
gpr.KillImmediate(a, true, true);
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(a, true, true);
ADD(32, gpr.R(a), Imm32((u32)offset)); ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END MEMCHECK_END
@ -419,7 +406,7 @@ void Jit64::stX(UGeckoInstruction inst)
} }
else else
{ {
Default(inst); PanicAlert("Invalid stX");
} }
} }

View file

@ -62,15 +62,13 @@ void Jit64::lfs(UGeckoInstruction inst)
return; return;
} }
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(a));
if (jo.assumeFPLoadFromMem) if (jo.assumeFPLoadFromMem)
{ {
UnsafeLoadRegToReg(ABI_PARAM1, EAX, 32, offset, false); UnsafeLoadToEAX(gpr.R(a), 32, offset, false);
} }
else else
{ {
SafeLoadRegToEAX(ABI_PARAM1, 32, offset); SafeLoadToEAX(gpr.R(a), 32, offset, false);
} }
MEMCHECK_START MEMCHECK_START
@ -83,7 +81,6 @@ void Jit64::lfs(UGeckoInstruction inst)
MEMCHECK_END MEMCHECK_END
gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -299,9 +296,12 @@ void Jit64::stfs(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM2), Imm32(offset)); ADD(32, R(ABI_PARAM2), Imm32(offset));
if (update && offset) if (update && offset)
{ {
// We must flush immediate values from the following register because
// it may take another value at runtime if no MMU exception has been raised
gpr.KillImmediate(a, true, true);
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(a, false, true);
MOV(32, gpr.R(a), R(ABI_PARAM2)); MOV(32, gpr.R(a), R(ABI_PARAM2));
MEMCHECK_END MEMCHECK_END

View file

@ -123,8 +123,6 @@ void Jit64::mtmsr(UGeckoInstruction inst)
} }
MOV(32, M(&MSR), gpr.R(inst.RS)); MOV(32, M(&MSR), gpr.R(inst.RS));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
WriteExit(js.compilerPC + 4, 0); WriteExit(js.compilerPC + 4, 0);
} }
// ============== // ==============

View file

@ -71,6 +71,56 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
#endif #endif
} }
void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend)
{
if (opAddress.IsImm())
{
#ifdef _M_IX86
MOVZX(32, accessSize, EAX, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK)));
#else
MOVZX(32, accessSize, EAX, M(Memory::base + (u32)opAddress.offset + offset));
#endif
}
else
{
#ifdef _M_IX86
MOV(32, R(EAX), opAddress);
if (offset)
ADD(32, R(EAX), Imm32(offset));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVZX(32, accessSize, EAX, MDisp(EAX, (u32)Memory::base));
#else
if (opAddress.IsSimpleReg())
{
MOVZX(32, accessSize, EAX, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
}
else
{
MOV(32, R(EAX), opAddress);
MOVZX(32, accessSize, EAX, MComplex(RBX, EAX, SCALE_1, offset));
}
#endif
}
if (accessSize == 32)
{
BSWAP(32, EAX);
}
else if (accessSize == 16)
{
BSWAP(32, EAX);
if (signExtend)
SAR(32, R(EAX), Imm8(16));
else
SHR(32, R(EAX), Imm8(16));
}
else if (signExtend)
{
// TODO: bake 8-bit into the original load.
MOVSX(32, accessSize, EAX, R(EAX));
}
}
void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, bool signExtend) void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, bool signExtend)
{ {
if (Core::g_CoreStartupParameter.bUseFastMem && (accessSize == 32 || accessSize == 8) && !Core::g_CoreStartupParameter.bMMU) if (Core::g_CoreStartupParameter.bUseFastMem && (accessSize == 32 || accessSize == 8) && !Core::g_CoreStartupParameter.bMMU)
@ -112,6 +162,95 @@ void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset,
} }
} }
void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend)
{
if (Core::g_CoreStartupParameter.bUseFastMem && (accessSize == 32 || accessSize == 8) && !Core::g_CoreStartupParameter.bMMU)
{
// FIXME: accessSize == 16 does not work. Breaks mkdd
UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
}
else
{
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
mem_mask |= Memory::ADDR_MASK_MEM1;
}
if (opAddress.IsImm())
{
u32 address = (u32)opAddress.offset + offset;
if ((address & mem_mask) == 0)
{
UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
}
else
{
switch (accessSize)
{
case 32: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), address); break;
case 16: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), address); break;
case 8: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), address); break;
}
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, EAX, R(EAX));
}
}
}
else
{
if (offset)
{
MOV(32, R(EAX), opAddress);
ADD(32, R(EAX), Imm32(offset));
TEST(32, R(EAX), Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z);
switch (accessSize)
{
case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), EAX); break;
case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), EAX); break;
case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), EAX); break;
}
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, EAX, R(EAX));
}
FixupBranch exit = J();
SetJumpTarget(fast);
UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
SetJumpTarget(exit);
}
else
{
TEST(32, opAddress, Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z);
switch (accessSize)
{
case 32: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), opAddress); break;
case 16: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), opAddress); break;
case 8: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), opAddress); break;
}
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, EAX, R(EAX));
}
FixupBranch exit = J();
SetJumpTarget(fast);
UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
SetJumpTarget(exit);
}
}
}
}
void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{ {
if (accessSize == 8 && reg_value >= 4) { if (accessSize == 8 && reg_value >= 4) {

View file

@ -27,8 +27,10 @@ public:
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset); void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset);
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false); void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true); void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true);
void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
// Trashes both inputs and EAX. // Trashes both inputs and EAX.
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr); void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr);