JitArm64: Implement breakpoints

Plus two miscellaneous debugger features that I found along the way when
reading Jit64's code for comparison: bJITNoBlockLinking and tracing.

Fixes https://bugs.dolphin-emu.org/issues/13127.
This commit is contained in:
JosJuice 2022-12-24 13:00:45 +01:00
parent 0659827485
commit c744ff4934
3 changed files with 117 additions and 11 deletions

View file

@ -17,12 +17,14 @@
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HLE/HLE.h"
#include "Core/HW/CPU.h"
#include "Core/HW/GPFifo.h"
#include "Core/HW/Memmap.h"
#include "Core/HW/ProcessorInterface.h"
#include "Core/PatchEngine.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/Profiler.h"
#include "Core/System.h"
@ -57,9 +59,10 @@ void JitArm64::Init()
auto& memory = system.GetMemory();
jo.fastmem_arena = m_fastmem_enabled && memory.InitFastmemArena();
jo.enableBlocklink = true;
jo.optimizeGatherPipe = true;
UpdateMemoryAndExceptionOptions();
SetBlockLinkingEnabled(true);
SetOptimizationEnabled(true);
gpr.Init(this);
fpr.Init(this);
blocks.Init();
@ -67,9 +70,6 @@ void JitArm64::Init()
code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa;
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW);
m_enable_blr_optimization = jo.enableBlocklink && m_fastmem_enabled && !m_enable_debugging;
m_cleanup_after_stackfault = false;
@ -80,6 +80,27 @@ void JitArm64::Init()
ResetFreeMemoryRanges();
}
void JitArm64::SetBlockLinkingEnabled(bool enabled)
{
jo.enableBlocklink = enabled && !SConfig::GetInstance().bJITNoBlockLinking;
}
void JitArm64::SetOptimizationEnabled(bool enabled)
{
if (enabled)
{
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW);
}
else
{
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW);
}
}
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
{
// Ifdef this since the exception handler runs on a separate thread on macOS (ARM)
@ -661,6 +682,31 @@ void JitArm64::SingleStep()
pExecAddr();
}
void JitArm64::Trace()
{
std::string regs;
std::string fregs;
#ifdef JIT_LOG_GPR
for (size_t i = 0; i < std::size(PowerPC::ppcState.gpr); i++)
{
regs += fmt::format("r{:02d}: {:08x} ", i, PowerPC::ppcState.gpr[i]);
}
#endif
#ifdef JIT_LOG_FPR
for (size_t i = 0; i < std::size(PowerPC::ppcState.ps); i++)
{
fregs += fmt::format("f{:02d}: {:016x} ", i, PowerPC::ppcState.ps[i].PS0AsU64());
}
#endif
DEBUG_LOG_FMT(DYNA_REC,
"JitArm64 PC: {:08x} SRR0: {:08x} SRR1: {:08x} FPSCR: {:08x} "
"MSR: {:08x} LR: {:08x} {} {}",
PC, SRR0, SRR1, FPSCR.Hex, MSR.Hex, PowerPC::ppcState.spr[8], regs, fregs);
}
void JitArm64::Jit(u32 em_address)
{
Jit(em_address, true);
@ -706,8 +752,22 @@ void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
if (m_enable_debugging)
{
// Comment out the following to disable breakpoints (speed-up)
block_size = 1;
// We can link blocks as long as we are not single stepping
SetBlockLinkingEnabled(true);
SetOptimizationEnabled(true);
if (!jo.profile_blocks)
{
if (CPU::IsStepping())
{
block_size = 1;
// Do not link this block to other blocks while single stepping
SetBlockLinkingEnabled(false);
SetOptimizationEnabled(false);
}
Trace();
}
}
// Analyze the block, collect all instructions it is made of (including inlining,
@ -1006,11 +1066,38 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
js.firstFPInstructionFound = true;
}
if (bJITRegisterCacheOff)
if (m_enable_debugging && PowerPC::breakpoints.IsAddressBreakPoint(op.address) &&
!CPU::IsStepping())
{
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
static_assert(PPCSTATE_OFF(pc) <= 252);
static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc));
MOVI2R(DISPATCHER_PC, op.address);
STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
MOVP2R(ARM64Reg::X0, &PowerPC::CheckBreakPoints);
BLR(ARM64Reg::X0);
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0,
MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr()));
FixupBranch no_breakpoint = CBZ(ARM64Reg::W0);
Cleanup();
EndTimeProfile(js.curBlock);
DoDownCount();
B(dispatcher_exit);
SetJumpTarget(no_breakpoint);
}
if (bJITRegisterCacheOff)
{
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
}
CompileInstruction(op);

View file

@ -190,6 +190,9 @@ protected:
const u8* slowmem_code;
};
void SetBlockLinkingEnabled(bool enabled);
void SetOptimizationEnabled(bool enabled);
void CompileInstruction(PPCAnalyst::CodeOp& op);
bool HandleFunctionHooking(u32 address);
@ -276,6 +279,8 @@ protected:
bool DoJit(u32 em_address, JitBlock* b, u32 nextPC);
void Trace();
// Finds a free memory region and sets the near and far code emitters to point at that region.
// Returns false if no free memory region can be found for either of the two.
bool SetEmitterStateToFreeCodeRegion();

View file

@ -8,10 +8,12 @@
#include "Common/Arm64Emitter.h"
#include "Common/BitUtils.h"
#include "Common/CommonTypes.h"
#include "Common/Config/Config.h"
#include "Common/FloatUtils.h"
#include "Common/JitRegister.h"
#include "Common/MathUtil.h"
#include "Core/Config/MainSettings.h"
#include "Core/CoreTiming.h"
#include "Core/HW/CPU.h"
#include "Core/HW/Memmap.h"
@ -28,6 +30,8 @@ void JitArm64::GenerateAsm()
{
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
const bool enable_debugging = Config::Get(Config::MAIN_ENABLE_DEBUGGING);
// This value is all of the callee saved registers that we are required to save.
// According to the AACPS64 we need to save R19 ~ R30 and Q8 ~ Q15.
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
@ -85,6 +89,15 @@ void JitArm64::GenerateAsm()
FixupBranch bail = B(CC_LE);
dispatcher_no_timing_check = GetCodePtr();
FixupBranch debug_exit;
if (enable_debugging)
{
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0,
MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr()));
debug_exit = CBNZ(ARM64Reg::W0);
}
dispatcher_no_check = GetCodePtr();
bool assembly_dispatcher = true;
@ -174,9 +187,7 @@ void JitArm64::GenerateAsm()
// Check the state pointer to see if we are exiting
// Gets checked on at the end of every slice
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr()));
CMP(ARM64Reg::W0, 0);
FixupBranch Exit = B(CC_NEQ);
FixupBranch exit = CBNZ(ARM64Reg::W0);
SetJumpTarget(to_start_of_timing_slice);
MOVP2R(ARM64Reg::X8, &CoreTiming::GlobalAdvance);
@ -188,7 +199,10 @@ void JitArm64::GenerateAsm()
// We can safely assume that downcount >= 1
B(dispatcher_no_check);
SetJumpTarget(Exit);
dispatcher_exit = GetCodePtr();
SetJumpTarget(exit);
if (enable_debugging)
SetJumpTarget(debug_exit);
// Reset the stack pointer, as the BLR optimization have touched it.
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1,