diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index cb6162ba87..f42f2c059c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -124,6 +124,7 @@ public: void stX(UGeckoInstruction inst); void lmw(UGeckoInstruction inst); void stmw(UGeckoInstruction inst); + void dcbx(UGeckoInstruction inst); void dcbt(UGeckoInstruction inst); void dcbz(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index d122170b85..be6e931381 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -9,9 +9,11 @@ #include "Core/ConfigManager.h" #include "Core/Core.h" #include "Core/CoreTiming.h" +#include "Core/HW/DSP.h" #include "Core/HW/GPFifo.h" #include "Core/HW/Memmap.h" #include "Core/HW/MMIO.h" +#include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" #include "Core/PowerPC/PPCTables.h" #include "Core/PowerPC/JitArm64/Jit.h" @@ -694,6 +696,90 @@ void JitArm64::stmw(UGeckoInstruction inst) gpr.Unlock(WA, WB); } +void JitArm64::dcbx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreOff); + + gpr.Lock(W30); + + ARM64Reg addr = gpr.GetReg(); + ARM64Reg value = gpr.GetReg(); + ARM64Reg WA = W30; + + u32 a = inst.RA, b = inst.RB; + + if (a) + ADD(addr, gpr.R(a), gpr.R(b)); + else + MOV(addr, gpr.R(b)); + + // Check whether a JIT cache line needs to be invalidated. + AND(value, addr, 32 - 10, 28 - 10); // upper three bits and last 10 bit are masked for the bitset of cachelines, 0x1ffffc00 + LSR(value, value, 5 + 5); // >> 5 for cache line size, >> 5 for width of bitset + MOVI2R(EncodeRegTo64(WA), (u64)jit->GetBlockCache()->GetBlockBitSet()); + LDR(value, EncodeRegTo64(WA), ArithOption(EncodeRegTo64(value), true)); + + LSR(addr, addr, 5); // mask sizeof cacheline, & 0x1f is the position within the bitset + + LSR(value, value, addr); // move current bit to bit 0 + + FixupBranch bit_not_set = TBZ(value, 0); + FixupBranch far = B(); + SwitchToFarCode(); + SetJumpTarget(far); + + BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); + BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); + + ABI_PushRegisters(gprs_to_push); + m_float_emit.ABI_PushRegisters(fprs_to_push, X30); + + LSL(W0, addr, 5); + MOVI2R(X1, 32); + MOVI2R(X2, 0); + MOVI2R(X3, (u64)(void*)JitInterface::InvalidateICache); + BLR(X3); + + m_float_emit.ABI_PopRegisters(fprs_to_push, X30); + ABI_PopRegisters(gprs_to_push); + + FixupBranch near = B(); + SwitchToNearCode(); + SetJumpTarget(bit_not_set); + SetJumpTarget(near); + + // dcbi + if (inst.SUBOP10 == 470) + { + // Flush DSP DMA if DMAState bit is set + MOVI2R(EncodeRegTo64(WA), (u64)&DSP::g_dspState); + LDRH(INDEX_UNSIGNED, WA, EncodeRegTo64(WA), 0); + + bit_not_set = TBZ(WA, 9); + far = B(); + SwitchToFarCode(); + SetJumpTarget(far); + + ABI_PushRegisters(gprs_to_push); + m_float_emit.ABI_PushRegisters(fprs_to_push, X30); + + LSL(W0, addr, 5); + MOVI2R(X1, (u64)DSP::FlushInstantDMA); + BLR(X1); + + m_float_emit.ABI_PopRegisters(fprs_to_push, X30); + ABI_PopRegisters(gprs_to_push); + + near = B(); + SwitchToNearCode(); + SetJumpTarget(near); + SetJumpTarget(bit_not_set); + } + + gpr.Unlock(addr, value, W30); +} + void JitArm64::dcbt(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 09fdd271ce..cfe4ec9d99 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -219,11 +219,11 @@ static GekkoOPTemplate table31[] = {824, &JitArm64::srawix}, // srawix {24, &JitArm64::slwx}, // slwx - {54, &JitArm64::FallBackToInterpreter}, // dcbst - {86, &JitArm64::FallBackToInterpreter}, // dcbf + {54, &JitArm64::dcbx}, // dcbst + {86, &JitArm64::dcbx}, // dcbf {246, &JitArm64::dcbt}, // dcbtst {278, &JitArm64::dcbt}, // dcbt - {470, &JitArm64::FallBackToInterpreter}, // dcbi + {470, &JitArm64::dcbx}, // dcbi {758, &JitArm64::DoNothing}, // dcba {1014, &JitArm64::dcbz}, // dcbz