MMU: simplify code to restore original data register after failed load

Instead of passing the value around constantly, just store it in the regcache,
note where it is, and restore it on the exception path.

This saves a whole bunch of pushing and popping and gives a ~5% speed boost
in Rebel Strike. It's a bit ugly, but it simplifies a lot of code and is
faster, too.
This commit is contained in:
Fiora 2015-01-02 18:34:10 -08:00
parent 53b44ccb3a
commit 8903df7300
10 changed files with 89 additions and 102 deletions

View file

@ -96,13 +96,6 @@ u16 Read_U16(const u32 address);
u32 Read_U32(const u32 address);
u64 Read_U64(const u32 address);
u32 Read_S8_Val(const u32 address, u32 var);
u32 Read_U8_Val(const u32 address, u32 var);
u32 Read_S16_Val(const u32 address, u32 var);
u32 Read_U16_Val(const u32 address, u32 var);
u32 Read_U32_Val(const u32 address, u32 var);
u64 Read_U64_Val(const u32 address, u64 var);
// Useful helper functions, used by ARM JIT
float Read_F32(const u32 address);
double Read_F64(const u32 address);

View file

@ -92,8 +92,8 @@ static u32 EFB_Read(const u32 addr)
static void GenerateDSIException(u32 _EffectiveAddress, bool _bWrite);
template <XCheckTLBFlag flag, typename T, typename U>
__forceinline void ReadFromHardware(U &_var, const u32 em_address)
template <XCheckTLBFlag flag, typename T>
__forceinline T ReadFromHardware(const u32 em_address)
{
int segment = em_address >> 28;
// Quick check for an address that can't meet any of the following conditions,
@ -104,33 +104,28 @@ __forceinline void ReadFromHardware(U &_var, const u32 em_address)
if ((em_address & 0xC8000000) == 0xC8000000)
{
if (em_address < 0xcc000000)
_var = EFB_Read(em_address);
return EFB_Read(em_address);
else
_var = (T)mmio_mapping->Read<typename std::make_unsigned<T>::type>(em_address);
return;
return (T)mmio_mapping->Read<typename std::make_unsigned<T>::type>(em_address);
}
else if (segment == 0x8 || segment == 0xC || segment == 0x0)
{
_var = bswap((*(const T*)&m_pRAM[em_address & RAM_MASK]));
return;
return bswap((*(const T*)&m_pRAM[em_address & RAM_MASK]));
}
else if (m_pEXRAM && (segment == 0x9 || segment == 0xD || segment == 0x1))
{
_var = bswap((*(const T*)&m_pEXRAM[em_address & EXRAM_MASK]));
return;
return bswap((*(const T*)&m_pEXRAM[em_address & EXRAM_MASK]));
}
else if (segment == 0xE && (em_address < (0xE0000000 + L1_CACHE_SIZE)))
{
_var = bswap((*(const T*)&m_pL1Cache[em_address & L1_CACHE_MASK]));
return;
return bswap((*(const T*)&m_pL1Cache[em_address & L1_CACHE_MASK]));
}
}
if (bFakeVMEM && (segment == 0x7 || segment == 0x4))
{
// fake VMEM
_var = bswap((*(const T*)&m_pFakeVMEM[em_address & FAKEVMEM_MASK]));
return;
return bswap((*(const T*)&m_pFakeVMEM[em_address & FAKEVMEM_MASK]));
}
// MMU: Do page table translation
@ -139,7 +134,7 @@ __forceinline void ReadFromHardware(U &_var, const u32 em_address)
{
if (flag == FLAG_READ)
GenerateDSIException(em_address, false);
return;
return 0;
}
// Handle loads that cross page boundaries (ewwww)
@ -157,20 +152,20 @@ __forceinline void ReadFromHardware(U &_var, const u32 em_address)
{
if (flag == FLAG_READ)
GenerateDSIException(em_address_next_page, false);
return;
return 0;
}
_var = 0;
T var = 0;
for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++)
{
if (addr == em_address_next_page)
tlb_addr = tlb_addr_next_page;
_var = (_var << 8) | Memory::base[tlb_addr];
var = (var << 8) | Memory::base[tlb_addr];
}
return;
return var;
}
// The easy case!
_var = bswap(*(const T*)&Memory::base[tlb_addr]);
return bswap(*(const T*)&Memory::base[tlb_addr]);
}
@ -331,32 +326,28 @@ static __forceinline void Memcheck(u32 address, u32 var, bool write, int size)
u8 Read_U8(const u32 address)
{
u8 var = 0;
ReadFromHardware<FLAG_READ, u8>(var, address);
u8 var = ReadFromHardware<FLAG_READ, u8>(address);
Memcheck(address, var, false, 1);
return (u8)var;
}
u16 Read_U16(const u32 address)
{
u16 var = 0;
ReadFromHardware<FLAG_READ, u16>(var, address);
u16 var = ReadFromHardware<FLAG_READ, u16>(address);
Memcheck(address, var, false, 2);
return (u16)var;
}
u32 Read_U32(const u32 address)
{
u32 var = 0;
ReadFromHardware<FLAG_READ, u32>(var, address);
u32 var = ReadFromHardware<FLAG_READ, u32>(address);
Memcheck(address, var, false, 4);
return var;
}
u64 Read_U64(const u32 address)
{
u64 var = 0;
ReadFromHardware<FLAG_READ, u64>(var, address);
u64 var = ReadFromHardware<FLAG_READ, u64>(address);
Memcheck(address, (u32)var, false, 8);
return var;
}
@ -385,48 +376,6 @@ float Read_F32(const u32 address)
return cvt.d;
}
u32 Read_U8_Val(const u32 address, u32 var)
{
ReadFromHardware<FLAG_READ, u8>(var, address);
Memcheck(address, var, false, 1);
return var;
}
u32 Read_S8_Val(const u32 address, u32 var)
{
ReadFromHardware<FLAG_READ, s8>(var, address);
Memcheck(address, var, false, 1);
return var;
}
u32 Read_U16_Val(const u32 address, u32 var)
{
ReadFromHardware<FLAG_READ, u16>(var, address);
Memcheck(address, var, false, 2);
return var;
}
u32 Read_S16_Val(const u32 address, u32 var)
{
ReadFromHardware<FLAG_READ, s16>(var, address);
Memcheck(address, var, false, 2);
return var;
}
u32 Read_U32_Val(const u32 address, u32 var)
{
ReadFromHardware<FLAG_READ, u32>(var, address);
Memcheck(address, var, false, 4);
return var;
}
u64 Read_U64_Val(const u32 address, u64 var)
{
ReadFromHardware<FLAG_READ, u64>(var, address);
Memcheck(address, (u32)var, false, 8);
return var;
}
u32 Read_U8_ZX(const u32 address)
{
return (u32)Read_U8(address);
@ -489,16 +438,14 @@ void Write_F64(const double var, const u32 address)
}
u8 ReadUnchecked_U8(const u32 address)
{
u8 var = 0;
ReadFromHardware<FLAG_NO_EXCEPTION, u8>(var, address);
u8 var = ReadFromHardware<FLAG_NO_EXCEPTION, u8>(address);
return var;
}
u32 ReadUnchecked_U32(const u32 address)
{
u32 var = 0;
ReadFromHardware<FLAG_NO_EXCEPTION, u32>(var, address);
u32 var = ReadFromHardware<FLAG_NO_EXCEPTION, u32>(address);
return var;
}

View file

@ -615,6 +615,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.downcountAmount += opinfo->numCycles;
js.fastmemLoadStore = NULL;
js.fixupExceptionHandler = false;
js.revertGprLoad = -1;
js.revertFprLoad = -1;
if (i == (code_block.m_num_instructions - 1))
{
@ -787,8 +789,14 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
exceptionHandlerAtLoc[js.fastmemLoadStore] = GetWritableCodePtr();
}
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
BitSet32 gprToFlush = BitSet32::AllTrue(32);
BitSet32 fprToFlush = BitSet32::AllTrue(32);
if (js.revertGprLoad >= 0)
gprToFlush[js.revertGprLoad] = false;
if (js.revertFprLoad >= 0)
fprToFlush[js.revertFprLoad] = false;
gpr.Flush(FLUSH_MAINTAIN_STATE, gprToFlush);
fpr.Flush(FLUSH_MAINTAIN_STATE, fprToFlush);
// If a memory exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens.

View file

@ -401,7 +401,7 @@ void FPURegCache::StoreRegister(size_t preg, OpArg newLoc)
emit->MOVAPD(newLoc, regs[preg].location.GetSimpleReg());
}
void RegCache::Flush(FlushMode mode)
void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
{
for (unsigned int i = 0; i < xregs.size(); i++)
{
@ -409,7 +409,7 @@ void RegCache::Flush(FlushMode mode)
PanicAlert("Someone forgot to unlock X64 reg %u", i);
}
for (unsigned int i = 0; i < regs.size(); i++)
for (unsigned int i : regsToFlush)
{
if (regs[i].locked)
{

View file

@ -81,7 +81,7 @@ public:
LockX(reg1); LockX(reg2);
}
void Flush(FlushMode mode = FLUSH_ALL);
void Flush(FlushMode mode = FLUSH_ALL, BitSet32 regsToFlush = BitSet32::AllTrue(32));
void Flush(PPCAnalyst::CodeOp *op) {Flush();}
int SanityCheck() const;
void KillImmediate(size_t preg, bool doLoad, bool makeDirty);

View file

@ -246,9 +246,23 @@ void Jit64::lXXx(UGeckoInstruction inst)
}
gpr.Lock(a, b, d);
if (update && storeAddress)
gpr.BindToRegister(a, true, true);
gpr.BindToRegister(d, js.memcheck, true);
// A bit of an evil hack here. We need to retain the original value of this register for the
// exception path, but we'd rather not needlessly pass it around if we don't have to, since
// the exception path is very rare. So we store the value in the regcache, let the load path
// clobber it, then restore the value in the exception path.
// TODO: no other load has to do this at the moment, since no other loads go directly to the
// target registers, but if that ever changes, we need to do it there too.
if (js.memcheck)
{
gpr.StoreFromRegister(d);
js.revertGprLoad = d;
}
gpr.BindToRegister(d, false, true);
BitSet32 registersInUse = CallerSavedRegistersInUse();
// We need to save the (usually scratch) address register for the update.
if (update && storeAddress)

View file

@ -66,7 +66,12 @@ void Jit64::lfXXX(UGeckoInstruction inst)
}
fpr.Lock(d);
fpr.BindToRegister(d, js.memcheck || !single);
if (js.memcheck && single)
{
fpr.StoreFromRegister(d);
js.revertFprLoad = d;
}
fpr.BindToRegister(d, !single);
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update && js.memcheck)
registersInUse[RSCRATCH2] = true;

View file

@ -79,6 +79,10 @@ protected:
// so just fixup that branch instead of testing for a DSI again.
bool fixupExceptionHandler;
Gen::FixupBranch exceptionHandler;
// If these are set, we've stored the old value of a register which will be loaded in revertLoad,
// which lets us revert it on the exception path.
int revertGprLoad;
int revertFprLoad;
bool firstFPInstructionFound;
bool isLastInstruction;

View file

@ -302,10 +302,7 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(OpArg reg_value, X64Reg reg_addr, B
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags)
{
if (!jit->js.memcheck)
{
registersInUse[reg_value] = false;
}
registersInUse[reg_value] = false;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem &&
!opAddress.IsImm() &&
!(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM))

View file

@ -42,39 +42,58 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, B
const u8* trampoline = GetCodePtr();
X64Reg addrReg = (X64Reg)info.scaledReg;
X64Reg dataReg = (X64Reg)info.regOperandReg;
registersInUse[addrReg] = true;
registersInUse[dataReg] = false;
int stack_offset = 0;
bool push_param1 = registersInUse[ABI_PARAM1];
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
if (push_param1)
{
PUSH(ABI_PARAM1);
stack_offset = 8;
registersInUse[ABI_PARAM1] = 0;
}
int dataRegSize = info.operandSize == 8 ? 64 : 32;
MOVTwo(dataRegSize, ABI_PARAM1, addrReg, info.displacement, ABI_PARAM2, dataReg);
if (addrReg != ABI_PARAM1 && info.displacement)
LEA(32, ABI_PARAM1, MDisp(addrReg, info.displacement));
else if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(addrReg));
else if (info.displacement)
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
ABI_PushRegistersAndAdjustStack(registersInUse, stack_offset);
switch (info.operandSize)
{
case 8:
CALL((void *)&Memory::Read_U64_Val);
CALL((void *)&Memory::Read_U64);
break;
case 4:
CALL((void *)&Memory::Read_U32_Val);
CALL((void *)&Memory::Read_U32);
break;
case 2:
CALL(info.signExtend ? (void *)&Memory::Read_S16_Val : (void *)&Memory::Read_U16_Val);
CALL((void *)&Memory::Read_U16);
break;
case 1:
CALL(info.signExtend ? (void *)&Memory::Read_S8_Val : (void *)&Memory::Read_U8_Val);
CALL((void *)&Memory::Read_U8);
break;
}
if (dataReg != ABI_RETURN)
MOV(dataRegSize, R(dataReg), R(ABI_RETURN));
ABI_PopRegistersAndAdjustStack(registersInUse, stack_offset);
if (push_param1)
POP(ABI_PARAM1);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (exceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, exceptionHandler);
}
if (info.signExtend)
MOVSX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
else if (dataReg != ABI_RETURN || info.operandSize < 4)
MOVZX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
JMP(returnPtr, true);
return trampoline;
}