Reimplements fastmem for ARMv7 floating point loadstores.

This implements a new system for fastmem backpatching on ARMv7 that is less of a mindfsck to deal with.
This also implements stfs under the default loadstore path as well, not sure why it was by itself in the first place.

I'll be moving the rest of the loadstore methods over to this new way in a few days.
This commit is contained in:
Ryan Houdek 2014-11-15 08:16:36 +00:00
parent e47bfc2788
commit 181f16c5f0
4 changed files with 149 additions and 138 deletions

View file

@ -205,7 +205,6 @@ public:
// Floating point loadStore // Floating point loadStore
void lfXX(UGeckoInstruction _inst); void lfXX(UGeckoInstruction _inst);
void stfXX(UGeckoInstruction _inst); void stfXX(UGeckoInstruction _inst);
void stfs(UGeckoInstruction _inst);
// Paired Singles // Paired Singles
void ps_add(UGeckoInstruction _inst); void ps_add(UGeckoInstruction _inst);

View file

@ -17,7 +17,7 @@ using namespace ArmGen;
// 1) It's really necessary. We don't know anything about the context. // 1) It's really necessary. We don't know anything about the context.
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be // 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
// that many of them in a typical program/game. // that many of them in a typical program/game.
static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store) static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store, bool *new_system)
{ {
u8 op = (inst >> 20) & 0xFF; u8 op = (inst >> 20) & 0xFF;
rD = (ARMReg)((inst >> 12) & 0xF); rD = (ARMReg)((inst >> 12) & 0xF);
@ -60,9 +60,24 @@ static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Sto
accessSize = 16; accessSize = 16;
} }
break; break;
default:
{
// Could be a floating point loadstore
u8 op2 = (inst >> 24) & 0xF;
switch (op2)
{
case 0xD: // VLDR/VSTR
*new_system = true;
break;
case 0x4: // VST1/VLD1
*new_system = true;
break;
default: default:
printf("Op is 0x%02x\n", op); printf("Op is 0x%02x\n", op);
return false; return false;
break;
}
}
} }
return true; return true;
} }
@ -70,10 +85,7 @@ static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Sto
bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx) bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx)
{ {
if (access_address < (uintptr_t)Memory::base) if (access_address < (uintptr_t)Memory::base)
{ PanicAlertT("Exception handler - access below memory space. 0x%08x", access_address);
PanicAlertT("Exception handler - access below memory space. %08llx%08llx",
access_address >> 32, access_address);
}
return BackPatch(ctx); return BackPatch(ctx);
} }
@ -87,13 +99,36 @@ bool JitArm::BackPatch(SContext* ctx)
ARMReg rD; ARMReg rD;
u8 accessSize; u8 accessSize;
bool Store; bool Store;
bool new_system = false;
if (!DisamLoadStore(Value, rD, accessSize, Store)) if (!DisamLoadStore(Value, rD, accessSize, Store, &new_system))
{ {
printf("Invalid backpatch at location 0x%08lx(0x%08x)\n", ctx->CTX_PC, Value); printf("Invalid backpatch at location 0x%08lx(0x%08x)\n", ctx->CTX_PC, Value);
exit(0); exit(0);
} }
if (new_system)
{
// The new system is a lot easier to backpatch than the old crap.
// Instead of backpatching over code and making sure we NOP pad and other crap
// We emit both the slow and fast path and branch over the slow path each time
// We search backwards until we find the second branch instruction
// Then proceed to replace it with a NOP and set that to the new PC.
// This ensures that we run the slow path and then branch over the fast path.
// Run backwards until we find the branch we want to NOP
for (int branches = 2; branches > 0; ctx->CTX_PC -= 4)
if ((*(u32*)ctx->CTX_PC & 0x0F000000) == 0x0A000000) // B
--branches;
ctx->CTX_PC += 4;
ARMXEmitter emitter((u8*)ctx->CTX_PC);
emitter.NOP(1);
emitter.FlushIcache();
return true;
}
else
{
if (Store) if (Store)
{ {
const u32 ARMREGOFFSET = 4 * 5; const u32 ARMREGOFFSET = 4 * 5;
@ -148,6 +183,7 @@ bool JitArm::BackPatch(SContext* ctx)
emitter.FlushIcache(); emitter.FlushIcache();
return true; return true;
} }
}
return 0; return 0;
} }

View file

@ -77,9 +77,9 @@ void JitArm::lfXX(UGeckoInstruction inst)
break; break;
} }
ARMReg v0 = fpr.R0(inst.FD), v1; ARMReg v0 = fpr.R0(inst.FD, false), v1;
if (single) if (single)
v1 = fpr.R1(inst.FD); v1 = fpr.R1(inst.FD, false);
if (update) if (update)
{ {
@ -134,28 +134,9 @@ void JitArm::lfXX(UGeckoInstruction inst)
if (update) if (update)
MOV(RA, rB); MOV(RA, rB);
if (false) // This branch gets changed to a NOP when the fastpath fails
{ FixupBranch fast_path = B();
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) FixupBranch slow_out;
BIC(rB, rB, mask); // 1
MOVI2R(rA, (u32)Memory::base, false); // 2-3
ADD(rB, rB, rA); // 4
NEONXEmitter nemit(this);
if (single)
{
VLDR(S0, rB, 0);
nemit.VREV32(I_8, D0, D0); // Byte swap to result
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
}
else
{
VLDR(v0, rB, 0);
nemit.VREV64(I_8, v0, v0); // Byte swap to result
}
}
else
{ {
PUSH(4, R0, R1, R2, R3); PUSH(4, R0, R1, R2, R3);
MOV(R0, rB); MOV(R0, rB);
@ -163,9 +144,7 @@ void JitArm::lfXX(UGeckoInstruction inst)
{ {
MOVI2R(rA, (u32)&Memory::Read_U32); MOVI2R(rA, (u32)&Memory::Read_U32);
BL(rA); BL(rA);
VMOV(S0, R0); VMOV(S0, R0);
VCVT(v0, S0, 0); VCVT(v0, S0, 0);
VCVT(v1, S0, 0); VCVT(v1, S0, 0);
} }
@ -181,7 +160,34 @@ void JitArm::lfXX(UGeckoInstruction inst)
#endif #endif
} }
POP(4, R0, R1, R2, R3); POP(4, R0, R1, R2, R3);
slow_out = B();
} }
SetJumpTarget(fast_path);
{
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
ARMReg rC = gpr.GetReg();
BIC(rC, rB, mask);
MOVI2R(rA, (u32)Memory::base);
ADD(rC, rC, rA);
NEONXEmitter nemit(this);
if (single)
{
nemit.VLD1(F_32, D0, rC);
nemit.VREV32(I_8, D0, D0); // Byte swap to result
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
}
else
{
nemit.VLD1(I_64, v0, rC);
nemit.VREV64(I_8, v0, v0); // Byte swap to result
}
gpr.Unlock(rC);
}
SetJumpTarget(slow_out);
gpr.Unlock(rA, rB); gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad); SetJumpTarget(DoNotLoad);
} }
@ -302,36 +308,17 @@ void JitArm::stfXX(UGeckoInstruction inst)
SetCC(); SetCC();
} }
if (false) // This branch gets changed to a NOP when the fastpath fails
{ FixupBranch fast_path = B();
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) FixupBranch slow_out;
BIC(rB, rB, mask); // 1
MOVI2R(rA, (u32)Memory::base, false); // 2-3
ADD(rB, rB, rA); // 4
NEONXEmitter nemit(this);
if (single)
{
VCVT(S0, v0, 0);
nemit.VREV32(I_8, D0, D0);
VSTR(S0, rB, 0);
}
else
{
nemit.VREV64(I_8, D0, v0);
VSTR(D0, rB, 0);
}
}
else
{ {
PUSH(4, R0, R1, R2, R3); PUSH(4, R0, R1, R2, R3);
if (single) if (single)
{ {
MOVI2R(rA, (u32)&Memory::Write_U32); MOV(R1, rB);
VCVT(S0, v0, 0); VCVT(S0, v0, 0);
VMOV(R0, S0); VMOV(R0, S0);
MOV(R1, rB); MOVI2R(rA, (u32)&Memory::Write_U32);
BL(rA); BL(rA);
} }
else else
@ -347,43 +334,32 @@ void JitArm::stfXX(UGeckoInstruction inst)
BL(rA); BL(rA);
} }
POP(4, R0, R1, R2, R3); POP(4, R0, R1, R2, R3);
slow_out = B();
} }
gpr.Unlock(rA, rB); SetJumpTarget(fast_path);
}
// Some games use stfs as a way to quickly write to the gatherpipe and other hardware areas.
// Keep it as a safe store until this can get optimized.
// Look at the JIT64 implementation to see how it is done
void JitArm::stfs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FS);
VCVT(S0, v0, 0);
if (inst.RA)
{ {
MOVI2R(rB, inst.SIMM_16); Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
ARMReg RA = gpr.R(inst.RA); ARMReg rC = gpr.GetReg();
ADD(rB, rB, RA); BIC(rC, rB, mask);
MOVI2R(rA, (u32)Memory::base);
ADD(rC, rC, rA);
NEONXEmitter nemit(this);
if (single)
{
VCVT(S0, v0, 0);
nemit.VREV32(I_8, D0, D0);
VSTR(S0, rC, 0);
} }
else else
{ {
MOVI2R(rB, (u32)inst.SIMM_16); nemit.VREV64(I_8, D0, v0);
VSTR(D0, rC, 0);
}
gpr.Unlock(rC);
} }
MOVI2R(rA, (u32)&Memory::Write_U32); SetJumpTarget(slow_out);
PUSH(4, R0, R1, R2, R3);
VMOV(R0, S0);
MOV(R1, rB);
BL(rA);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB); gpr.Unlock(rA, rB);
} }

View file

@ -89,7 +89,7 @@ static GekkoOPTemplate primarytable[] =
{50, &JitArm::lfXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, {50, &JitArm::lfXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
{51, &JitArm::lfXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, {51, &JitArm::lfXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{52, &JitArm::stfs}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, {52, &JitArm::stfXX}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
{53, &JitArm::stfXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, {53, &JitArm::stfXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{54, &JitArm::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, {54, &JitArm::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &JitArm::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, {55, &JitArm::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},