From f248fefe06c30fdbdfce66eb8359fa4f6de123d8 Mon Sep 17 00:00:00 2001 From: Wunk Date: Thu, 15 Aug 2024 01:45:20 -0700 Subject: [PATCH] shader_jit_a64: Optimize conditional tests (#229) These conditional tests are a 1:1 translation from the x64 code but do not have to be. Reference-values are known at emit-time and can be embedded as an immediate into an `EOR` instruction rather than moved into a register. The `TST` instruction can be utilized to more optimally test and update the `EQ`/`NE` status flags. --- .../shader/shader_jit_a64_compiler.cpp | 34 +++++++++---------- .../shader/shader_jit_a64_compiler.h | 3 ++ 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/video_core/shader/shader_jit_a64_compiler.cpp b/src/video_core/shader/shader_jit_a64_compiler.cpp index 78afda84e..4e55b7b86 100644 --- a/src/video_core/shader/shader_jit_a64_compiler.cpp +++ b/src/video_core/shader/shader_jit_a64_compiler.cpp @@ -386,35 +386,33 @@ void JitShader::Compile_SanitizedMul(QReg src1, QReg src2, QReg scratch0) { } void JitShader::Compile_EvaluateCondition(Instruction instr) { - // Note: NXOR is used below to check for equality + const u8 refx = instr.flow_control.refx.Value(); + const u8 refy = instr.flow_control.refy.Value(); + switch (instr.flow_control.op) { + // Note: NXOR is used below to check for equality case Instruction::FlowControlType::Or: - MOV(XSCRATCH0, (instr.flow_control.refx.Value() ^ 1)); - MOV(XSCRATCH1, (instr.flow_control.refy.Value() ^ 1)); - EOR(XSCRATCH0, XSCRATCH0, COND0); - EOR(XSCRATCH1, XSCRATCH1, COND1); + EOR(XSCRATCH0, COND0, refx ^ 1); + EOR(XSCRATCH1, COND1, refy ^ 1); ORR(XSCRATCH0, XSCRATCH0, XSCRATCH1); + CMP(XSCRATCH0, 0); break; - + // Note: TST will AND two registers and set the EQ/NE flags on the result case Instruction::FlowControlType::And: - MOV(XSCRATCH0, (instr.flow_control.refx.Value() ^ 1)); - MOV(XSCRATCH1, (instr.flow_control.refy.Value() ^ 1)); - EOR(XSCRATCH0, XSCRATCH0, COND0); - EOR(XSCRATCH1, XSCRATCH1, COND1); - AND(XSCRATCH0, XSCRATCH0, XSCRATCH1); + EOR(XSCRATCH0, COND0, refx ^ 1); + EOR(XSCRATCH1, COND1, refy ^ 1); + TST(XSCRATCH0, XSCRATCH1); break; - case Instruction::FlowControlType::JustX: - MOV(XSCRATCH0, (instr.flow_control.refx.Value() ^ 1)); - EOR(XSCRATCH0, XSCRATCH0, COND0); + CMP(COND0, refx); break; - case Instruction::FlowControlType::JustY: - MOV(XSCRATCH0, (instr.flow_control.refy.Value() ^ 1)); - EOR(XSCRATCH0, XSCRATCH0, COND1); + CMP(COND1, refy); + break; + default: + UNREACHABLE(); break; } - CMP(XSCRATCH0, 0); } void JitShader::Compile_UniformCondition(Instruction instr) { diff --git a/src/video_core/shader/shader_jit_a64_compiler.h b/src/video_core/shader/shader_jit_a64_compiler.h index bfdbb558d..9819b6b7e 100644 --- a/src/video_core/shader/shader_jit_a64_compiler.h +++ b/src/video_core/shader/shader_jit_a64_compiler.h @@ -94,6 +94,9 @@ private: */ void Compile_SanitizedMul(oaknut::QReg src1, oaknut::QReg src2, oaknut::QReg scratch0); + /** + * Emits the code to evaluate a conditional instruction and update the host's EQ/NE status-flags + */ void Compile_EvaluateCondition(Instruction instr); void Compile_UniformCondition(Instruction instr);