Rewrites ARMv7 fastmem entirely.

This is a fairly lengthy change that can't be separated out to multiple commits well due to the nature of fastmem being a bit of an intertangled mess.
This makes my life easier for maintaining fastmem on ARMv7 because I now don't have to do any terrible instruction counting and NOP padding. Really
makes my brain stop hurting when working with it.

This enables fastmem for a whole bunch of new instructions, which basically means that all instructions now have fastmem working for them. This also
rewrites the floating point loadstores again because the last implementation was pretty crap when it comes to performance, even if they were the
cleanest implementation from my point of view.

This initially started with me rewriting the fastmem routines to work just like the previous/current implementation of floating loadstores. That was
when I noticed that the performance tanked and decided to rewrite all of it.

This also happens to implement gatherpipe optimizations alongside constant address optimization.

Overall this comment brings a fairly large speedboost when using fastmem.
This commit is contained in:
Ryan Houdek 2014-11-20 13:56:50 +00:00
parent da962a3d2b
commit bfbbddd76f
5 changed files with 1065 additions and 520 deletions

View file

@ -40,6 +40,7 @@ void JitArm::Init()
code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa;
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
InitBackpatch();
}
void JitArm::ClearCache()

View file

@ -48,6 +48,26 @@ private:
ArmFPRCache fpr;
PPCAnalyst::CodeBuffer code_buffer;
struct BackPatchInfo
{
enum
{
FLAG_STORE = (1 << 0),
FLAG_LOAD = (1 << 1),
FLAG_SIZE_8 = (1 << 2),
FLAG_SIZE_16 = (1 << 3),
FLAG_SIZE_32 = (1 << 4),
FLAG_SIZE_F32 = (1 << 5),
FLAG_SIZE_F64 = (1 << 6),
FLAG_REVERSE = (1 << 7),
};
u32 m_fastmem_size;
u32 m_fastmem_trouble_inst_offset;
u32 m_slowmem_size;
};
// The key is the flags
std::map<u32, BackPatchInfo> m_backpatch_info;
void DoDownCount();
@ -57,11 +77,19 @@ private:
ArmGen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
bool BackPatch(SContext* ctx);
void BeginTimeProfile(JitBlock* b);
void EndTimeProfile(JitBlock* b);
bool BackPatch(SContext* ctx);
bool DisasmLoadStore(const u8* ptr, u32* flags, ArmGen::ARMReg* rD, ArmGen::ARMReg* V1);
// Initializes the information that backpatching needs
// This is required so we know the backpatch routine sizes and trouble offsets
void InitBackpatch();
// Returns the trouble instruction offset
// Zero if it isn't a fastmem routine
u32 EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ArmGen::ARMReg RS, ArmGen::ARMReg V1 = ArmGen::ARMReg::INVALID_REG);
public:
JitArm() : code_buffer(32000) {}
~JitArm() {}
@ -118,13 +146,8 @@ public:
void GetCarryAndClear(ArmGen::ARMReg reg);
void FinalizeCarry(ArmGen::ARMReg reg);
// TODO: This shouldn't be here
void UnsafeStoreFromReg(ArmGen::ARMReg dest, ArmGen::ARMReg value, int accessSize, s32 offset);
void SafeStoreFromReg(bool fastmem, s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset);
void UnsafeLoadToReg(ArmGen::ARMReg dest, ArmGen::ARMReg addr, int accessSize, s32 offsetReg, s32 offset);
void SafeLoadToReg(bool fastmem, u32 dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse);
void SafeStoreFromReg(s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset);
void SafeLoadToReg(ArmGen::ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update);
// OPCODES
void unknown_instruction(UGeckoInstruction _inst);

View file

@ -16,47 +16,65 @@ using namespace ArmGen;
// 1) It's really necessary. We don't know anything about the context.
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
// that many of them in a typical program/game.
static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store, bool *new_system)
bool JitArm::DisasmLoadStore(const u8* ptr, u32* flags, ARMReg* rD, ARMReg* V1)
{
u32 inst = *(u32*)ptr;
u32 prev_inst = *(u32*)(ptr - 4);
u32 next_inst = *(u32*)(ptr + 4);
u8 op = (inst >> 20) & 0xFF;
rD = (ARMReg)((inst >> 12) & 0xF);
*rD = (ARMReg)((inst >> 12) & 0xF);
switch (op)
{
case 0x58: // STR
{
Store = true;
accessSize = 32;
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_32;
*rD = (ARMReg)(prev_inst & 0xF);
}
break;
case 0x59: // LDR
{
Store = false;
accessSize = 32;
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32;
// REV
if ((next_inst & 0x0FFF0FF0) != 0x06BF0F30)
*flags |= BackPatchInfo::FLAG_REVERSE;
}
break;
case 0x1D: // LDRH
{
Store = false;
accessSize = 16;
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16;
// REV16
if((next_inst & 0x0FFF0FF0) != 0x06BF0FB0)
*flags |= BackPatchInfo::FLAG_REVERSE;
}
break;
case 0x45 + 0x18: // LDRB
{
Store = false;
accessSize = 8;
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_8;
}
break;
case 0x5C: // STRB
{
Store = true;
accessSize = 8;
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_8;
*rD = (ARMReg)((inst >> 12) & 0xF);
}
break;
case 0x1C: // STRH
{
Store = true;
accessSize = 16;
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_16;
*rD = (ARMReg)(prev_inst & 0xF);
}
break;
default:
@ -66,10 +84,92 @@ static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Sto
switch (op2)
{
case 0xD: // VLDR/VSTR
*new_system = true;
{
bool load = (inst >> 20) & 1;
bool single = !((inst >> 8) & 1);
if (load)
*flags |= BackPatchInfo::FLAG_LOAD;
else
*flags |= BackPatchInfo::FLAG_STORE;
if (single)
*flags |= BackPatchInfo::FLAG_SIZE_F32;
else
*flags |= BackPatchInfo::FLAG_SIZE_F64;
if (single)
{
if (!load)
{
u32 vcvt = *(u32*)(ptr - 8);
u32 src_register = vcvt & 0xF;
src_register |= (vcvt >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
}
}
break;
case 0x4: // VST1/VLD1
*new_system = true;
{
u32 size = (inst >> 6) & 0x3;
bool load = (inst >> 21) & 1;
if (load)
*flags |= BackPatchInfo::FLAG_LOAD;
else
*flags |= BackPatchInfo::FLAG_STORE;
if (size == 2) // 32bit
{
if (load)
{
// For 32bit loads we are loading to a temporary
// So we need to read PC+8,PC+12 to get the two destination registers
u32 vcvt_1 = *(u32*)(ptr + 8);
u32 vcvt_2 = *(u32*)(ptr + 12);
u32 dest_register_1 = (vcvt_1 >> 12) & 0xF;
dest_register_1 |= (vcvt_1 >> 18) & 0x10;
u32 dest_register_2 = (vcvt_2 >> 12) & 0xF;
dest_register_2 |= (vcvt_2 >> 18) & 0x10;
// Make sure to encode the destination register to something our emitter understands
*rD = (ARMReg)(dest_register_1 + D0);
*V1 = (ARMReg)(dest_register_2 + D0);
}
else
{
// For 32bit stores we are storing from a temporary
// So we need to check the VCVT at PC-8 for the source register
u32 vcvt = *(u32*)(ptr - 8);
u32 src_register = vcvt & 0xF;
src_register |= (vcvt >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
*flags |= BackPatchInfo::FLAG_SIZE_F32;
}
else if (size == 3) // 64bit
{
if (load)
{
// For 64bit loads we load directly in to the VFP register
u32 dest_register = (inst >> 12) & 0xF;
dest_register |= (inst >> 18) & 0x10;
// Make sure to encode the destination register to something our emitter understands
*rD = (ARMReg)(dest_register + D0);
}
else
{
// For 64bit stores we are storing from a temporary
// Check the previous VREV64 instruction for the real register
u32 src_register = prev_inst & 0xF;
src_register |= (prev_inst >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
*flags |= BackPatchInfo::FLAG_SIZE_F64;
}
}
break;
default:
printf("Op is 0x%02x\n", op);
@ -95,94 +195,484 @@ bool JitArm::BackPatch(SContext* ctx)
// We need to get the destination register before we start
u8* codePtr = (u8*)ctx->CTX_PC;
u32 Value = *(u32*)codePtr;
ARMReg rD;
u8 accessSize;
bool Store;
bool new_system = false;
ARMReg rD = INVALID_REG;
ARMReg V1 = INVALID_REG;
u32 flags = 0;
if (!DisamLoadStore(Value, rD, accessSize, Store, &new_system))
if (!DisasmLoadStore(codePtr, &flags, &rD, &V1))
{
printf("Invalid backpatch at location 0x%08lx(0x%08x)\n", ctx->CTX_PC, Value);
exit(0);
}
if (new_system)
{
// The new system is a lot easier to backpatch than the old crap.
// Instead of backpatching over code and making sure we NOP pad and other crap
// We emit both the slow and fast path and branch over the slow path each time
// We search backwards until we find the second branch instruction
// Then proceed to replace it with a NOP and set that to the new PC.
// This ensures that we run the slow path and then branch over the fast path.
BackPatchInfo& info = m_backpatch_info[flags];
ARMXEmitter emitter(codePtr - info.m_fastmem_trouble_inst_offset * 4);
u32 new_pc = (u32)emitter.GetCodePtr();
EmitBackpatchRoutine(&emitter, flags, false, true, rD, V1);
emitter.FlushIcache();
ctx->CTX_PC = new_pc;
return true;
}
// Run backwards until we find the branch we want to NOP
for (int branches = 2; branches > 0; ctx->CTX_PC -= 4)
if ((*(u32*)ctx->CTX_PC & 0x0F000000) == 0x0A000000) // B
--branches;
u32 JitArm::EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ARMReg RS, ARMReg V1)
{
ARMReg addr = R12;
ARMReg temp = R11;
u32 trouble_offset = 0;
const u8* code_base = emit->GetCodePtr();
ctx->CTX_PC += 4;
ARMXEmitter emitter((u8*)ctx->CTX_PC);
emitter.NOP(1);
emitter.FlushIcache();
return true;
}
else
if (fastmem)
{
if (Store)
ARMReg temp2 = R10;
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
emit->BIC(temp, addr, mask); // 1
emit->MOVI2R(temp2, (u32)Memory::base); // 2-3
emit->ADD(temp, temp, temp2); // 4
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
const u32 ARMREGOFFSET = 4 * 5;
ARMXEmitter emitter(codePtr - ARMREGOFFSET);
switch (accessSize)
NEONXEmitter nemit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
case 8: // 8bit
emitter.MOVI2R(R14, (u32)&Memory::Write_U8, false); // 1-2
return 0;
break;
case 16: // 16bit
emitter.MOVI2R(R14, (u32)&Memory::Write_U16, false); // 1-2
return 0;
break;
case 32: // 32bit
emitter.MOVI2R(R14, (u32)&Memory::Write_U32, false); // 1-2
break;
emit->VCVT(S0, RS, 0);
nemit.VREV32(I_8, D0, D0);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
emit->VSTR(S0, temp, 0);
}
emitter.PUSH(4, R0, R1, R2, R3); // 3
emitter.MOV(R0, rD); // Value - 4
emitter.MOV(R1, R10); // Addr- 5
emitter.BL(R14); // 6
emitter.POP(4, R0, R1, R2, R3); // 7
u32 newPC = ctx->CTX_PC - (ARMREGOFFSET + 4 * 4);
ctx->CTX_PC = newPC;
emitter.FlushIcache();
return true;
else
{
nemit.VREV64(I_8, D0, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
nemit.VST1(I_64, D0, temp);
}
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
NEONXEmitter nemit(emit);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
nemit.VLD1(F_32, D0, temp);
nemit.VREV32(I_8, D0, D0); // Byte swap to result
emit->VCVT(RS, S0, 0);
emit->VCVT(V1, S0, 0);
}
else
{
nemit.VLD1(I_64, RS, temp);
nemit.VREV64(I_8, RS, RS); // Byte swap to result
}
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(temp2, RS);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(temp2, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->STR(temp2, temp);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->STRH(temp2, temp);
else
emit->STRB(RS, temp);
}
else
{
const u32 ARMREGOFFSET = 4 * 4;
ARMXEmitter emitter(codePtr - ARMREGOFFSET);
switch (accessSize)
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->LDR(RS, temp); // 5
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->LDRH(RS, temp);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->LDRB(RS, temp);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{
case 8: // 8bit
emitter.MOVI2R(R14, (u32)&Memory::Read_U8, false); // 2
break;
case 16: // 16bit
emitter.MOVI2R(R14, (u32)&Memory::Read_U16, false); // 2
break;
case 32: // 32bit
emitter.MOVI2R(R14, (u32)&Memory::Read_U32, false); // 2
break;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(RS, RS); // 6
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, RS);
}
emitter.PUSH(4, R0, R1, R2, R3); // 3
emitter.MOV(R0, R10); // 4
emitter.BL(R14); // 5
emitter.MOV(R14, R0); // 6
emitter.POP(4, R0, R1, R2, R3); // 7
emitter.MOV(rD, R14); // 8
ctx->CTX_PC -= ARMREGOFFSET + (4 * 4);
emitter.FlushIcache();
return true;
}
}
return 0;
else
{
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->PUSH(4, R0, R1, R2, R3);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->MOV(R1, addr);
emit->VCVT(S0, RS, 0);
emit->VMOV(R0, S0);
emit->MOVI2R(temp, (u32)&Memory::Write_U32);
emit->BL(temp);
}
else
{
emit->MOVI2R(temp, (u32)&Memory::Write_F64);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
emit->VMOV(R0, RS);
emit->MOV(R2, addr);
#else
emit->VMOV(D0, RS);
emit->MOV(R0, addr);
#endif
emit->BL(temp);
}
emit->POP(4, R0, R1, R2, R3);
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, addr);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->MOVI2R(temp, (u32)&Memory::Read_U32);
emit->BL(temp);
emit->VMOV(S0, R0);
emit->VCVT(RS, S0, 0);
emit->VCVT(V1, S0, 0);
}
else
{
emit->MOVI2R(temp, (u32)&Memory::Read_F64);
emit->BL(temp);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
emit->VMOV(RS, R0);
#else
emit->VMOV(RS, D0);
#endif
}
emit->POP(4, R0, R1, R2, R3);
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, RS);
emit->MOV(R1, addr);
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(temp, (u32)&Memory::Write_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(temp, (u32)&Memory::Write_U16);
else
emit->MOVI2R(temp, (u32)&Memory::Write_U8);
emit->BL(temp);
emit->POP(4, R0, R1, R2, R3);
}
else
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, addr);
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(temp, (u32)&Memory::Read_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(temp, (u32)&Memory::Read_U16);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->MOVI2R(temp, (u32)&Memory::Read_U8);
emit->BL(temp);
emit->MOV(temp, R0);
emit->POP(4, R0, R1, R2, R3);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{
emit->MOV(RS, temp);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(RS, temp); // 6
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, temp);
}
}
}
if (do_padding)
{
BackPatchInfo& info = m_backpatch_info[flags];
u32 num_insts_max = std::max(info.m_fastmem_size, info.m_slowmem_size);
u32 code_size = emit->GetCodePtr() - code_base;
code_size /= 4;
emit->NOP(num_insts_max - code_size);
}
return trouble_offset;
}
void JitArm::InitBackpatch()
{
u32 flags = 0;
BackPatchInfo info;
u8* code_base = GetWritableCodePtr();
u8* code_end;
// Writes
{
// 8bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
// Loads
{
// 8bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit - reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit - reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, D0, D1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0, D1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
}

View file

@ -18,114 +18,149 @@
using namespace ArmGen;
void JitArm::UnsafeStoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset)
void JitArm::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, int accessSize, s32 offset)
{
// All this gets replaced on backpatch
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(dest, dest, mask); // 1
MOVI2R(R14, (u32)Memory::base, false); // 2-3
ADD(dest, dest, R14); // 4
switch (accessSize)
{
case 32:
REV(value, value); // 5
break;
case 16:
REV16(value, value);
break;
case 8:
NOP(1);
break;
}
switch (accessSize)
{
case 32:
STR(value, dest); // 6
break;
case 16:
STRH(value, dest);
break;
case 8:
STRB(value, dest);
break;
}
NOP(1); // 7
}
// We want to make sure to not get LR as a temp register
ARMReg rA = R12;
void JitArm::SafeStoreFromReg(bool fastmem, s32 dest, u32 value, s32 regOffset, int accessSize, s32 offset)
{
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && fastmem)
{
ARMReg RA;
ARMReg RB;
ARMReg RS = gpr.R(value);
u32 imm_addr = 0;
bool is_immediate = false;
if (dest != -1)
RA = gpr.R(dest);
if (regOffset != -1)
{
RB = gpr.R(regOffset);
MOV(R10, RB);
NOP(1);
}
else
{
MOVI2R(R10, (u32)offset, false);
}
if (dest != -1)
ADD(R10, R10, RA);
else
NOP(1);
MOV(R12, RS);
UnsafeStoreFromReg(R10, R12, accessSize, 0);
return;
}
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg rC = gpr.GetReg();
ARMReg RA = INVALID_REG;
ARMReg RB = INVALID_REG;
if (dest != -1)
RA = gpr.R(dest);
if (regOffset != -1)
RB = gpr.R(regOffset);
ARMReg RS = gpr.R(value);
switch (accessSize)
{
case 32:
MOVI2R(rA, (u32)&Memory::Write_U32);
break;
case 16:
MOVI2R(rA, (u32)&Memory::Write_U16);
break;
case 8:
MOVI2R(rA, (u32)&Memory::Write_U8);
break;
}
MOV(rB, RS);
if (regOffset == -1)
{
MOVI2R(rC, offset);
if (dest != -1)
ADD(rC, rC, RA);
{
if (gpr.IsImm(dest))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + offset;
}
else
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(rA, gpr.R(dest), off);
}
else
{
MOVI2R(rA, offset);
ADD(rA, rA, gpr.R(dest));
}
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (dest != -1)
ADD(rC, RA, RB);
{
if (gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + gpr.GetImm(regOffset);
}
else if (gpr.IsImm(dest) && !gpr.IsImm(regOffset))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(dest), off))
{
ADD(rA, gpr.R(regOffset), off);
}
else
{
MOVI2R(rA, gpr.GetImm(dest));
ADD(rA, rA, gpr.R(regOffset));
}
}
else if (!gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(regOffset), off))
{
ADD(rA, gpr.R(dest), off);
}
else
{
MOVI2R(rA, gpr.GetImm(regOffset));
ADD(rA, rA, gpr.R(dest));
}
}
else
{
ADD(rA, gpr.R(dest), gpr.R(regOffset));
}
}
else
MOV(rC, RB);
{
if (gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(regOffset);
}
else
{
MOV(rA, gpr.R(regOffset));
}
}
}
ARMReg RS = gpr.R(value);
u32 flags = BackPatchInfo::FLAG_STORE;
if (accessSize == 32)
flags |= BackPatchInfo::FLAG_SIZE_32;
else if (accessSize == 16)
flags |= BackPatchInfo::FLAG_SIZE_16;
else
flags |= BackPatchInfo::FLAG_SIZE_8;
if (is_immediate)
{
if ((imm_addr & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe)
{
MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount);
MOVI2R(R10, (u32)GPFifo::m_gatherPipe);
LDR(R11, R14);
if (accessSize == 32)
{
REV(RS, RS);
STR(RS, R10, R11);
REV(RS, RS);
}
else if (accessSize == 16)
{
REV16(RS, RS);
STRH(RS, R10, R11);
REV16(RS, RS);
}
else
{
STRB(RS, R10, R11);
}
ADD(R11, R11, accessSize >> 3);
STR(R11, R14);
jit->js.fifoBytesThisBlock += accessSize >> 3;
}
else if (Memory::IsRAMAddress(imm_addr))
{
MOVI2R(rA, imm_addr);
EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, false, RS);
}
else
{
MOVI2R(rA, imm_addr);
EmitBackpatchRoutine(this, flags, false, false, RS);
}
}
else
{
EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, true, RS);
}
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
MOV(R1, rC);
BL(rA);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB, rC);
}
void JitArm::stX(UGeckoInstruction inst)
@ -138,7 +173,6 @@ void JitArm::stX(UGeckoInstruction inst)
u32 accessSize = 0;
s32 regOffset = -1;
bool update = false;
bool fastmem = false;
switch (inst.OPCD)
{
case 45: // sthu
@ -152,7 +186,6 @@ void JitArm::stX(UGeckoInstruction inst)
case 183: // stwux
update = true;
case 151: // stwx
fastmem = true;
accessSize = 32;
regOffset = b;
break;
@ -173,7 +206,6 @@ void JitArm::stX(UGeckoInstruction inst)
case 37: // stwu
update = true;
case 36: // stw
fastmem = true;
accessSize = 32;
break;
case 39: // stbu
@ -182,7 +214,9 @@ void JitArm::stX(UGeckoInstruction inst)
accessSize = 8;
break;
}
SafeStoreFromReg(fastmem, update ? a : (a ? a : -1), s, regOffset, accessSize, offset);
SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, accessSize, offset);
if (update)
{
ARMReg rA = gpr.GetReg();
@ -193,143 +227,135 @@ void JitArm::stX(UGeckoInstruction inst)
// Check for DSI exception prior to writing back address
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
TST(rA, EXCEPTION_DSI);
FixupBranch DoNotWrite = B_CC(CC_NEQ);
if (a)
SetCC(CC_EQ);
if (regOffset == -1)
{
if (regOffset == -1)
{
MOVI2R(rA, offset);
ADD(RA, RA, rA);
}
else
{
ADD(RA, RA, RB);
}
MOVI2R(rA, offset);
ADD(RA, RA, rA);
}
else
{
if (regOffset == -1)
MOVI2R(RA, (u32)offset);
else
MOV(RA, RB);
ADD(RA, RA, RB);
}
SetJumpTarget(DoNotWrite);
SetCC();
gpr.Unlock(rA);
}
}
void JitArm::UnsafeLoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offsetReg, s32 offset)
void JitArm::SafeLoadToReg(ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update)
{
ARMReg rA = gpr.GetReg();
// We want to make sure to not get LR as a temp register
ARMReg rA = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (offsetReg == -1)
{
MOVI2R(rA, offset, false); // -3
ADD(addr, addr, rA); // - 1
}
else
{
NOP(2); // -3, -2
// offsetReg is preloaded here
ADD(addr, addr, gpr.R(offsetReg)); // -1
}
// All this gets replaced on backpatch
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(addr, addr, mask); // 1
MOVI2R(rA, (u32)Memory::base, false); // 2-3
ADD(addr, addr, rA); // 4
switch (accessSize)
{
case 32:
LDR(dest, addr); // 5
break;
case 16:
LDRH(dest, addr);
break;
case 8:
LDRB(dest, addr);
break;
}
switch (accessSize)
{
case 32:
REV(dest, dest); // 6
break;
case 16:
REV16(dest, dest);
break;
case 8:
NOP(1);
break;
}
NOP(2); // 7-8
gpr.Unlock(rA);
}
void JitArm::SafeLoadToReg(bool fastmem, u32 dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse)
{
ARMReg RD = gpr.R(dest);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && fastmem)
{
// Preload for fastmem
if (offsetReg != -1)
gpr.R(offsetReg);
if (addr != -1)
MOV(R10, gpr.R(addr));
{
if (gpr.IsImm(addr))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + offset;
}
else
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(rA, gpr.R(addr), off);
}
else
{
MOVI2R(rA, offset);
ADD(rA, rA, gpr.R(addr));
}
}
}
else
MOV(R10, 0);
UnsafeLoadToReg(RD, R10, accessSize, offsetReg, offset);
return;
}
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
if (offsetReg == -1)
{
MOVI2R(rA, offset);
if (addr != -1)
ADD(rA, rA, gpr.R(addr));
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (addr != -1)
ADD(rA, gpr.R(addr), gpr.R(offsetReg));
{
if (gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + gpr.GetImm(offsetReg);
}
else if (gpr.IsImm(addr) && !gpr.IsImm(offsetReg))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(addr), off))
{
ADD(rA, gpr.R(offsetReg), off);
}
else
{
MOVI2R(rA, gpr.GetImm(addr));
ADD(rA, rA, gpr.R(offsetReg));
}
}
else if (!gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(offsetReg), off))
{
ADD(rA, gpr.R(addr), off);
}
else
{
MOVI2R(rA, gpr.GetImm(offsetReg));
ADD(rA, rA, gpr.R(addr));
}
}
else
{
ADD(rA, gpr.R(addr), gpr.R(offsetReg));
}
}
else
MOV(rA, gpr.R(offsetReg));
{
if (gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else
{
MOV(rA, gpr.R(offsetReg));
}
}
}
switch (accessSize)
{
case 8:
MOVI2R(rB, (u32)&Memory::Read_U8);
break;
case 16:
MOVI2R(rB, (u32)&Memory::Read_U16);
break;
case 32:
MOVI2R(rB, (u32)&Memory::Read_U32);
break;
}
PUSH(4, R0, R1, R2, R3);
MOV(R0, rA);
BL(rB);
MOV(rA, R0);
POP(4, R0, R1, R2, R3);
MOV(RD, rA);
if (signExtend) // Only on 16 loads
SXTH(RD, RD);
if (is_immediate)
MOVI2R(rA, imm_addr);
u32 flags = BackPatchInfo::FLAG_LOAD;
if (accessSize == 32)
flags |= BackPatchInfo::FLAG_SIZE_32;
else if (accessSize == 16)
flags |= BackPatchInfo::FLAG_SIZE_16;
else
flags |= BackPatchInfo::FLAG_SIZE_8;
if (reverse)
{
if (accessSize == 32)
REV(RD, RD);
else if (accessSize == 16)
REV16(RD, RD);
}
gpr.Unlock(rA, rB);
flags |= BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
!(is_immediate && Memory::IsRAMAddress(imm_addr)), dest);
if (signExtend) // Only on 16 loads
SXTH(dest, dest);
if (update)
MOV(gpr.R(addr), rA);
}
void JitArm::lXX(UGeckoInstruction inst)
@ -344,7 +370,6 @@ void JitArm::lXX(UGeckoInstruction inst)
bool update = false;
bool signExtend = false;
bool reverse = false;
bool fastmem = false;
switch (inst.OPCD)
{
@ -354,21 +379,18 @@ void JitArm::lXX(UGeckoInstruction inst)
case 55: // lwzux
update = true;
case 23: // lwzx
fastmem = true;
accessSize = 32;
offsetReg = b;
break;
case 119: //lbzux
update = true;
case 87: // lbzx
fastmem = true;
accessSize = 8;
offsetReg = b;
break;
case 311: // lhzux
update = true;
case 279: // lhzx
fastmem = true;
accessSize = 16;
offsetReg = b;
break;
@ -392,19 +414,16 @@ void JitArm::lXX(UGeckoInstruction inst)
case 33: // lwzu
update = true;
case 32: // lwz
fastmem = true;
accessSize = 32;
break;
case 35: // lbzu
update = true;
case 34: // lbz
fastmem = true;
accessSize = 8;
break;
case 41: // lhzu
update = true;
case 40: // lhz
fastmem = true;
accessSize = 16;
break;
case 43: // lhau
@ -417,27 +436,13 @@ void JitArm::lXX(UGeckoInstruction inst)
// Check for exception before loading
ARMReg rA = gpr.GetReg(false);
ARMReg RD = gpr.R(d);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
TST(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_NEQ);
SafeLoadToReg(fastmem, d, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse);
if (update)
{
ARMReg RA = gpr.R(a);
if (offsetReg == -1)
{
rA = gpr.GetReg(false);
MOVI2R(rA, offset);
ADD(RA, RA, rA);
}
else
{
ADD(RA, RA, gpr.R(offsetReg));
}
}
SafeLoadToReg(RD, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse, update);
SetJumpTarget(DoNotLoad);
@ -449,8 +454,6 @@ void JitArm::lXX(UGeckoInstruction inst)
(SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
{
ARMReg RD = gpr.R(d);
// if it's still 0, we can wait until the next event
TST(RD, RD);
FixupBranch noIdle = B_CC(CC_NEQ);

View file

@ -24,16 +24,13 @@ void JitArm::lfXX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RA;
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
bool single = false;
u32 flags = BackPatchInfo::FLAG_LOAD;
bool update = false;
bool zeroA = false;
s32 offsetReg = -1;
switch (inst.OPCD)
@ -42,157 +39,152 @@ void JitArm::lfXX(UGeckoInstruction inst)
switch (inst.SUBOP10)
{
case 567: // lfsux
single = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
offsetReg = b;
break;
case 535: // lfsx
single = true;
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 631: // lfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offsetReg = b;
break;
case 599: // lfdx
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F64;
offsetReg = b;
break;
}
break;
case 49: // lfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
single = true;
break;
case 48: // lfs
single = true;
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 51: // lfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 50: // lfd
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
ARMReg v0 = fpr.R0(inst.FD, false), v1;
if (single)
ARMReg v0 = fpr.R0(inst.FD, false), v1 = INVALID_REG;
if (flags & BackPatchInfo::FLAG_SIZE_F32)
v1 = fpr.R1(inst.FD, false);
ARMReg rA = R11;
ARMReg addr = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (update)
{
RA = gpr.R(a);
// Update path /always/ uses RA
if (offsetReg == -1) // uses SIMM_16
// Always uses RA
if (gpr.IsImm(a) && offsetReg == -1)
{
MOVI2R(rB, offset);
ADD(rB, rB, RA);
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else
{
ADD(rB, gpr.R(offsetReg), RA);
}
}
else
{
if (zeroA)
{
if (offsetReg == -1)
{
if (a)
Operand2 off;
if (TryMakeOperand2(offset, off))
{
RA = gpr.R(a);
MOVI2R(rB, offset);
ADD(rB, rB, RA);
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(rB, (u32)offset);
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
ARMReg RB = gpr.R(offsetReg);
if (a)
{
RA = gpr.R(a);
ADD(rB, RB, RA);
}
else
{
MOV(rB, RB);
}
ADD(addr, gpr.R(offsetReg), gpr.R(a));
}
}
}
else
{
if (offsetReg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else if (!a && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else if (a)
{
ADD(addr, gpr.R(a), gpr.R(offsetReg));
}
else
{
MOV(addr, gpr.R(offsetReg));
}
}
}
if (update)
RA = gpr.R(a);
if (is_immediate)
MOVI2R(addr, imm_addr);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
if (update)
MOV(RA, rB);
MOV(RA, addr);
// This branch gets changed to a NOP when the fastpath fails
FixupBranch fast_path;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem)
fast_path = B();
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
!(is_immediate && Memory::IsRAMAddress(imm_addr)), v0, v1);
{
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
if (single)
{
MOVI2R(rA, (u32)&Memory::Read_U32);
BL(rA);
VMOV(S0, R0);
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
}
else
{
MOVI2R(rA, (u32)&Memory::Read_F64);
BL(rA);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(v0, R0);
#else
VMOV(v0, D0);
#endif
}
POP(4, R0, R1, R2, R3);
}
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem)
{
FixupBranch slow_out = B();
SetJumpTarget(fast_path);
{
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
ARMReg rC = gpr.GetReg();
BIC(rC, rB, mask);
MOVI2R(rA, (u32)Memory::base);
ADD(rC, rC, rA);
NEONXEmitter nemit(this);
if (single)
{
nemit.VLD1(F_32, D0, rC);
nemit.VREV32(I_8, D0, D0); // Byte swap to result
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
}
else
{
nemit.VLD1(I_64, v0, rC);
nemit.VREV64(I_8, v0, v0); // Byte swap to result
}
gpr.Unlock(rC);
}
SetJumpTarget(slow_out);
}
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
@ -201,16 +193,13 @@ void JitArm::stfXX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RA;
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
bool single = false;
u32 flags = BackPatchInfo::FLAG_STORE;
bool update = false;
bool zeroA = false;
s32 offsetReg = -1;
switch (inst.OPCD)
@ -219,157 +208,196 @@ void JitArm::stfXX(UGeckoInstruction inst)
switch (inst.SUBOP10)
{
case 663: // stfsx
single = true;
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 695: // stfsux
single = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 727: // stfdx
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F64;
offsetReg = b;
break;
case 759: // stfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offsetReg = b;
break;
}
break;
case 53: // stfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
single = true;
break;
case 52: // stfs
single = true;
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 55: // stfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 54: // stfd
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
ARMReg v0 = fpr.R0(inst.FS);
ARMReg rA = R11;
ARMReg addr = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (update)
{
RA = gpr.R(a);
// Update path /always/ uses RA
if (offsetReg == -1) // uses SIMM_16
// Always uses RA
if (gpr.IsImm(a) && offsetReg == -1)
{
MOVI2R(rB, offset);
ADD(rB, rB, RA);
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else
{
ADD(rB, gpr.R(offsetReg), RA);
if (offsetReg == -1)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
ADD(addr, gpr.R(offsetReg), gpr.R(a));
}
}
}
else
{
if (zeroA)
if (offsetReg == -1)
{
if (offsetReg == -1)
if (a && gpr.IsImm(a))
{
if (a)
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
RA = gpr.R(a);
MOVI2R(rB, offset);
ADD(rB, rB, RA);
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(rB, (u32)offset);
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
ARMReg RB = gpr.R(offsetReg);
if (a)
{
RA = gpr.R(a);
ADD(rB, RB, RA);
}
else
{
MOV(rB, RB);
}
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else if (!a && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else if (a)
{
ADD(addr, gpr.R(a), gpr.R(offsetReg));
}
else
{
MOV(addr, gpr.R(offsetReg));
}
}
}
if (is_immediate)
MOVI2R(addr, imm_addr);
if (update)
{
RA = gpr.R(a);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
SetCC(CC_NEQ);
MOV(RA, rB);
MOV(RA, addr);
SetCC();
}
// This branch gets changed to a NOP when the fastpath fails
FixupBranch fast_path;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem)
fast_path = B();
if (is_immediate)
{
PUSH(4, R0, R1, R2, R3);
if (single)
if ((imm_addr & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe)
{
MOV(R1, rB);
VCVT(S0, v0, 0);
VMOV(R0, S0);
MOVI2R(rA, (u32)&Memory::Write_U32);
BL(rA);
}
else
{
MOVI2R(rA, (u32)&Memory::Write_F64);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(R0, v0);
MOV(R2, rB);
#else
VMOV(D0, v0);
MOV(R0, rB);
#endif
BL(rA);
}
POP(4, R0, R1, R2, R3);
}
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem)
{
FixupBranch slow_out = B();
SetJumpTarget(fast_path);
{
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
ARMReg rC = gpr.GetReg();
BIC(rC, rB, mask);
MOVI2R(rA, (u32)Memory::base);
ADD(rC, rC, rA);
int accessSize;
if (flags & BackPatchInfo::FLAG_SIZE_F64)
accessSize = 64;
else
accessSize = 32;
MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount);
MOVI2R(R10, (u32)GPFifo::m_gatherPipe);
LDR(R11, R14);
ADD(R10, R10, R11);
NEONXEmitter nemit(this);
if (single)
if (accessSize == 64)
{
PUSH(2, R0, R1);
nemit.VREV64(I_8, D0, v0);
VMOV(R0, D0);
STR(R0, R10, 0);
STR(R1, R10, 4);
POP(2, R0, R1);
}
else if (accessSize == 32)
{
VCVT(S0, v0, 0);
nemit.VREV32(I_8, D0, D0);
VSTR(S0, rC, 0);
VMOV(addr, S0);
STR(addr, R10);
}
else
{
nemit.VREV64(I_8, D0, v0);
VSTR(D0, rC, 0);
}
gpr.Unlock(rC);
}
SetJumpTarget(slow_out);
}
ADD(R11, R11, accessSize >> 3);
STR(R11, R14);
jit->js.fifoBytesThisBlock += accessSize >> 3;
gpr.Unlock(rA, rB);
}
else if (Memory::IsRAMAddress(imm_addr))
{
MOVI2R(addr, imm_addr);
EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, false, v0);
}
else
{
MOVI2R(addr, imm_addr);
EmitBackpatchRoutine(this, flags, false, false, v0);
}
}
else
{
EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, true, v0);
}
}