diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index a78ee6896a..d6f4568100 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -105,41 +105,49 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; u32 op5 = inst.SUBOP5; - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); + bool singles = fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c); + RegType type = singles ? REG_REG_SINGLE : REG_REG; + u8 size = singles ? 32 : 64; + ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad; + + ARM64Reg VA = reg_encoder(fpr.R(a, type)); + ARM64Reg VB = reg_encoder(fpr.R(b, type)); + ARM64Reg VC = reg_encoder(fpr.R(c, type)); + ARM64Reg VD = reg_encoder(fpr.RW(d, type)); + ARM64Reg V0Q = fpr.GetReg(); + ARM64Reg V0 = reg_encoder(V0Q); + + // TODO: Do FMUL and FADD/FSUB in *one* host call to save accuracy. switch (op5) { case 14: // ps_madds0 - m_float_emit.DUP(64, V0, VC, 0); - m_float_emit.FMUL(64, V0, V0, VA); - m_float_emit.FADD(64, VD, V0, VB); + m_float_emit.DUP(size, V0, VC, 0); + m_float_emit.FMUL(size, V0, V0, VA); + m_float_emit.FADD(size, VD, V0, VB); break; case 15: // ps_madds1 - m_float_emit.DUP(64, V0, VC, 1); - m_float_emit.FMUL(64, V0, V0, VA); - m_float_emit.FADD(64, VD, V0, VB); + m_float_emit.DUP(size, V0, VC, 1); + m_float_emit.FMUL(size, V0, V0, VA); + m_float_emit.FADD(size, VD, V0, VB); break; case 28: // ps_msub - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FSUB(64, VD, V0, VB); + m_float_emit.FMUL(size, V0, VA, VC); + m_float_emit.FSUB(size, VD, V0, VB); break; case 29: // ps_madd - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FADD(64, VD, V0, VB); + m_float_emit.FMUL(size, V0, VA, VC); + m_float_emit.FADD(size, VD, V0, VB); break; case 30: // ps_nmsub - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FSUB(64, VD, V0, VB); - m_float_emit.FNEG(64, VD, VD); + m_float_emit.FMUL(size, V0, VA, VC); + m_float_emit.FSUB(size, VD, V0, VB); + m_float_emit.FNEG(size, VD, VD); break; case 31: // ps_nmadd - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FADD(64, VD, V0, VB); - m_float_emit.FNEG(64, VD, VD); + m_float_emit.FMUL(size, V0, VA, VC); + m_float_emit.FADD(size, VD, V0, VB); + m_float_emit.FNEG(size, VD, VD); break; default: _assert_msg_(DYNA_REC, 0, "ps_madd - invalid op"); @@ -147,7 +155,7 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst) } fpr.FixSinglePrecision(d); - fpr.Unlock(V0); + fpr.Unlock(V0Q); } void JitArm64::ps_res(UGeckoInstruction inst)