Reimplements fastmem for ARMv7 floating point loadstores.

This implements a new system for fastmem backpatching on ARMv7 that is less of a mindfsck to deal with.
This also implements stfs under the default loadstore path as well, not sure why it was by itself in the first place.

I'll be moving the rest of the loadstore methods over to this new way in a few days.
This commit is contained in:
Ryan Houdek 2014-11-15 08:16:36 +00:00
parent e47bfc2788
commit 181f16c5f0
4 changed files with 149 additions and 138 deletions

View file

@ -205,7 +205,6 @@ public:
// Floating point loadStore
void lfXX(UGeckoInstruction _inst);
void stfXX(UGeckoInstruction _inst);
void stfs(UGeckoInstruction _inst);
// Paired Singles
void ps_add(UGeckoInstruction _inst);

View file

@ -17,7 +17,7 @@ using namespace ArmGen;
// 1) It's really necessary. We don't know anything about the context.
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
// that many of them in a typical program/game.
static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store)
static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store, bool *new_system)
{
u8 op = (inst >> 20) & 0xFF;
rD = (ARMReg)((inst >> 12) & 0xF);
@ -60,9 +60,24 @@ static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Sto
accessSize = 16;
}
break;
default:
{
// Could be a floating point loadstore
u8 op2 = (inst >> 24) & 0xF;
switch (op2)
{
case 0xD: // VLDR/VSTR
*new_system = true;
break;
case 0x4: // VST1/VLD1
*new_system = true;
break;
default:
printf("Op is 0x%02x\n", op);
return false;
break;
}
}
}
return true;
}
@ -70,10 +85,7 @@ static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Sto
bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx)
{
if (access_address < (uintptr_t)Memory::base)
{
PanicAlertT("Exception handler - access below memory space. %08llx%08llx",
access_address >> 32, access_address);
}
PanicAlertT("Exception handler - access below memory space. 0x%08x", access_address);
return BackPatch(ctx);
}
@ -87,13 +99,36 @@ bool JitArm::BackPatch(SContext* ctx)
ARMReg rD;
u8 accessSize;
bool Store;
bool new_system = false;
if (!DisamLoadStore(Value, rD, accessSize, Store))
if (!DisamLoadStore(Value, rD, accessSize, Store, &new_system))
{
printf("Invalid backpatch at location 0x%08lx(0x%08x)\n", ctx->CTX_PC, Value);
exit(0);
}
if (new_system)
{
// The new system is a lot easier to backpatch than the old crap.
// Instead of backpatching over code and making sure we NOP pad and other crap
// We emit both the slow and fast path and branch over the slow path each time
// We search backwards until we find the second branch instruction
// Then proceed to replace it with a NOP and set that to the new PC.
// This ensures that we run the slow path and then branch over the fast path.
// Run backwards until we find the branch we want to NOP
for (int branches = 2; branches > 0; ctx->CTX_PC -= 4)
if ((*(u32*)ctx->CTX_PC & 0x0F000000) == 0x0A000000) // B
--branches;
ctx->CTX_PC += 4;
ARMXEmitter emitter((u8*)ctx->CTX_PC);
emitter.NOP(1);
emitter.FlushIcache();
return true;
}
else
{
if (Store)
{
const u32 ARMREGOFFSET = 4 * 5;
@ -148,6 +183,7 @@ bool JitArm::BackPatch(SContext* ctx)
emitter.FlushIcache();
return true;
}
}
return 0;
}

View file

@ -77,9 +77,9 @@ void JitArm::lfXX(UGeckoInstruction inst)
break;
}
ARMReg v0 = fpr.R0(inst.FD), v1;
ARMReg v0 = fpr.R0(inst.FD, false), v1;
if (single)
v1 = fpr.R1(inst.FD);
v1 = fpr.R1(inst.FD, false);
if (update)
{
@ -134,28 +134,9 @@ void JitArm::lfXX(UGeckoInstruction inst)
if (update)
MOV(RA, rB);
if (false)
{
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(rB, rB, mask); // 1
MOVI2R(rA, (u32)Memory::base, false); // 2-3
ADD(rB, rB, rA); // 4
NEONXEmitter nemit(this);
if (single)
{
VLDR(S0, rB, 0);
nemit.VREV32(I_8, D0, D0); // Byte swap to result
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
}
else
{
VLDR(v0, rB, 0);
nemit.VREV64(I_8, v0, v0); // Byte swap to result
}
}
else
// This branch gets changed to a NOP when the fastpath fails
FixupBranch fast_path = B();
FixupBranch slow_out;
{
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
@ -163,9 +144,7 @@ void JitArm::lfXX(UGeckoInstruction inst)
{
MOVI2R(rA, (u32)&Memory::Read_U32);
BL(rA);
VMOV(S0, R0);
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
}
@ -181,7 +160,34 @@ void JitArm::lfXX(UGeckoInstruction inst)
#endif
}
POP(4, R0, R1, R2, R3);
slow_out = B();
}
SetJumpTarget(fast_path);
{
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
ARMReg rC = gpr.GetReg();
BIC(rC, rB, mask);
MOVI2R(rA, (u32)Memory::base);
ADD(rC, rC, rA);
NEONXEmitter nemit(this);
if (single)
{
nemit.VLD1(F_32, D0, rC);
nemit.VREV32(I_8, D0, D0); // Byte swap to result
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
}
else
{
nemit.VLD1(I_64, v0, rC);
nemit.VREV64(I_8, v0, v0); // Byte swap to result
}
gpr.Unlock(rC);
}
SetJumpTarget(slow_out);
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
@ -302,36 +308,17 @@ void JitArm::stfXX(UGeckoInstruction inst)
SetCC();
}
if (false)
{
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(rB, rB, mask); // 1
MOVI2R(rA, (u32)Memory::base, false); // 2-3
ADD(rB, rB, rA); // 4
NEONXEmitter nemit(this);
if (single)
{
VCVT(S0, v0, 0);
nemit.VREV32(I_8, D0, D0);
VSTR(S0, rB, 0);
}
else
{
nemit.VREV64(I_8, D0, v0);
VSTR(D0, rB, 0);
}
}
else
// This branch gets changed to a NOP when the fastpath fails
FixupBranch fast_path = B();
FixupBranch slow_out;
{
PUSH(4, R0, R1, R2, R3);
if (single)
{
MOVI2R(rA, (u32)&Memory::Write_U32);
MOV(R1, rB);
VCVT(S0, v0, 0);
VMOV(R0, S0);
MOV(R1, rB);
MOVI2R(rA, (u32)&Memory::Write_U32);
BL(rA);
}
else
@ -347,43 +334,32 @@ void JitArm::stfXX(UGeckoInstruction inst)
BL(rA);
}
POP(4, R0, R1, R2, R3);
slow_out = B();
}
gpr.Unlock(rA, rB);
}
// Some games use stfs as a way to quickly write to the gatherpipe and other hardware areas.
// Keep it as a safe store until this can get optimized.
// Look at the JIT64 implementation to see how it is done
void JitArm::stfs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FS);
VCVT(S0, v0, 0);
if (inst.RA)
SetJumpTarget(fast_path);
{
MOVI2R(rB, inst.SIMM_16);
ARMReg RA = gpr.R(inst.RA);
ADD(rB, rB, RA);
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
ARMReg rC = gpr.GetReg();
BIC(rC, rB, mask);
MOVI2R(rA, (u32)Memory::base);
ADD(rC, rC, rA);
NEONXEmitter nemit(this);
if (single)
{
VCVT(S0, v0, 0);
nemit.VREV32(I_8, D0, D0);
VSTR(S0, rC, 0);
}
else
{
MOVI2R(rB, (u32)inst.SIMM_16);
nemit.VREV64(I_8, D0, v0);
VSTR(D0, rC, 0);
}
gpr.Unlock(rC);
}
MOVI2R(rA, (u32)&Memory::Write_U32);
PUSH(4, R0, R1, R2, R3);
VMOV(R0, S0);
MOV(R1, rB);
BL(rA);
POP(4, R0, R1, R2, R3);
SetJumpTarget(slow_out);
gpr.Unlock(rA, rB);
}

View file

@ -89,7 +89,7 @@ static GekkoOPTemplate primarytable[] =
{50, &JitArm::lfXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
{51, &JitArm::lfXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{52, &JitArm::stfs}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
{52, &JitArm::stfXX}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
{53, &JitArm::stfXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{54, &JitArm::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &JitArm::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},