From f103234e2b02e90933dae3b4ce505fb9fc1b36df Mon Sep 17 00:00:00 2001 From: Fiora Date: Sat, 13 Sep 2014 05:34:38 -0700 Subject: [PATCH] JIT: flush a register if it won't be used for the rest of the block This should dramatically reduce code size in the case of blocks with lots of branches, and certainly doesn't hurt elsewhere either. This can probably be improved a good bit through smarter tracking of register usage, e.g. discarding registers that are going to be overwritten, but this is a good start and should help reduce code size and register pressure. Unlike that sort of change, this is a "safe" patch; it only flushes registers, which can't affect correctness, unlike actually discarding data. As part of this, refactor PPCAnalyst to support distinguishing between float and integer registers (to properly handle instructions that access both, like floating-point loads and stores). Also update every instruction in the interpreter flags table I could find that didn't have all the correct flags. --- .../Interpreter/Interpreter_Tables.cpp | 244 +++++++++--------- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 9 + Source/Core/Core/PowerPC/PPCAnalyst.cpp | 43 ++- Source/Core/Core/PowerPC/PPCAnalyst.h | 5 +- Source/Core/Core/PowerPC/PPCTables.h | 17 +- 5 files changed, 191 insertions(+), 127 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp index 3dfc55ce23..6d60ab2a64 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp @@ -40,7 +40,7 @@ static GekkoOPTemplate primarytable[] = {20, Interpreter::rlwimix, {"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {21, Interpreter::rlwinmx, {"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, - {23, Interpreter::rlwnmx, {"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT, 1, 0, 0, 0}}, + {23, Interpreter::rlwnmx, {"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT, 1, 0, 0, 0}}, {24, Interpreter::ori, {"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S, 1, 0, 0, 0}}, {25, Interpreter::oris, {"oris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S, 1, 0, 0, 0}}, @@ -69,20 +69,20 @@ static GekkoOPTemplate primarytable[] = {46, Interpreter::lmw, {"lmw", OPTYPE_SYSTEM, FL_EVIL | FL_LOADSTORE, 11, 0, 0, 0}}, {47, Interpreter::stmw, {"stmw", OPTYPE_SYSTEM, FL_EVIL | FL_LOADSTORE, 11, 0, 0, 0}}, - {48, Interpreter::lfs, {"lfs", OPTYPE_LOADFP, FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {49, Interpreter::lfsu, {"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {50, Interpreter::lfd, {"lfd", OPTYPE_LOADFP, FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {51, Interpreter::lfdu, {"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {48, Interpreter::lfs, {"lfs", OPTYPE_LOADFP, FL_OUT_FLOAT_D | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {49, Interpreter::lfsu, {"lfsu", OPTYPE_LOADFP, FL_OUT_FLOAT_D | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {50, Interpreter::lfd, {"lfd", OPTYPE_LOADFP, FL_INOUT_FLOAT_D | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {51, Interpreter::lfdu, {"lfdu", OPTYPE_LOADFP, FL_INOUT_FLOAT_D | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {52, Interpreter::stfs, {"stfs", OPTYPE_STOREFP, FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {53, Interpreter::stfsu, {"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {54, Interpreter::stfd, {"stfd", OPTYPE_STOREFP, FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {55, Interpreter::stfdu, {"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {52, Interpreter::stfs, {"stfs", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {53, Interpreter::stfsu, {"stfsu", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {54, Interpreter::stfd, {"stfd", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {55, Interpreter::stfdu, {"stfdu", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {56, Interpreter::psq_l, {"psq_l", OPTYPE_PS, FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {57, Interpreter::psq_lu, {"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {60, Interpreter::psq_st, {"psq_st", OPTYPE_PS, FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {61, Interpreter::psq_stu, {"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {56, Interpreter::psq_l, {"psq_l", OPTYPE_PS, FL_OUT_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {57, Interpreter::psq_lu, {"psq_lu", OPTYPE_PS, FL_OUT_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {60, Interpreter::psq_st, {"psq_st", OPTYPE_PS, FL_IN_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {61, Interpreter::psq_stu, {"psq_stu", OPTYPE_PS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, //missing: 0, 5, 6, 9, 22, 30, 62, 58 {0, Interpreter::unknown_instruction, {"unknown_instruction", OPTYPE_UNKNOWN, 0, 0, 0, 0, 0}}, @@ -97,50 +97,50 @@ static GekkoOPTemplate primarytable[] = static GekkoOPTemplate table4[] = { //SUBOP10 - {0, Interpreter::ps_cmpu0, {"ps_cmpu0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {32, Interpreter::ps_cmpo0, {"ps_cmpo0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {40, Interpreter::ps_neg, {"ps_neg", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, - {136, Interpreter::ps_nabs, {"ps_nabs", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, - {264, Interpreter::ps_abs, {"ps_abs", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, - {64, Interpreter::ps_cmpu1, {"ps_cmpu1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {72, Interpreter::ps_mr, {"ps_mr", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, - {96, Interpreter::ps_cmpo1, {"ps_cmpo1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {528, Interpreter::ps_merge00, {"ps_merge00", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, - {560, Interpreter::ps_merge01, {"ps_merge01", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, - {592, Interpreter::ps_merge10, {"ps_merge10", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, - {624, Interpreter::ps_merge11, {"ps_merge11", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, + {0, Interpreter::ps_cmpu0, {"ps_cmpu0", OPTYPE_PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {32, Interpreter::ps_cmpo0, {"ps_cmpo0", OPTYPE_PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {40, Interpreter::ps_neg, {"ps_neg", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, + {136, Interpreter::ps_nabs, {"ps_nabs", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, + {264, Interpreter::ps_abs, {"ps_abs", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, + {64, Interpreter::ps_cmpu1, {"ps_cmpu1", OPTYPE_PS, FL_IN_FLOAT_AB | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {72, Interpreter::ps_mr, {"ps_mr", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, + {96, Interpreter::ps_cmpo1, {"ps_cmpo1", OPTYPE_PS, FL_IN_FLOAT_AB | FL_RC_BIT | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {528, Interpreter::ps_merge00, { "ps_merge00", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, + {560, Interpreter::ps_merge01, {"ps_merge01", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, + {592, Interpreter::ps_merge10, {"ps_merge10", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, + {624, Interpreter::ps_merge11, {"ps_merge11", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}}, - {1014, Interpreter::dcbz_l, {"dcbz_l", OPTYPE_SYSTEM, 0, 1, 0, 0, 0}}, + {1014, Interpreter::dcbz_l, {"dcbz_l", OPTYPE_SYSTEM, FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, }; static GekkoOPTemplate table4_2[] = { - {10, Interpreter::ps_sum0, {"ps_sum0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {11, Interpreter::ps_sum1, {"ps_sum1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {12, Interpreter::ps_muls0, {"ps_muls0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {13, Interpreter::ps_muls1, {"ps_muls1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {14, Interpreter::ps_madds0, {"ps_madds0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {15, Interpreter::ps_madds1, {"ps_madds1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {18, Interpreter::ps_div, {"ps_div", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}}, - {20, Interpreter::ps_sub, {"ps_sub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {21, Interpreter::ps_add, {"ps_add", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {23, Interpreter::ps_sel, {"ps_sel", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}}, - {24, Interpreter::ps_res, {"ps_res", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {25, Interpreter::ps_mul, {"ps_mul", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {26, Interpreter::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 2, 0, 0, 0}}, - {28, Interpreter::ps_msub, {"ps_msub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {29, Interpreter::ps_madd, {"ps_madd", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {30, Interpreter::ps_nmsub, {"ps_nmsub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {31, Interpreter::ps_nmadd, {"ps_nmadd", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {10, Interpreter::ps_sum0, {"ps_sum0", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {11, Interpreter::ps_sum1, {"ps_sum1", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {12, Interpreter::ps_muls0, {"ps_muls0", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {13, Interpreter::ps_muls1, {"ps_muls1", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {14, Interpreter::ps_madds0, {"ps_madds0", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {15, Interpreter::ps_madds1, {"ps_madds1", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {18, Interpreter::ps_div, {"ps_div", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}}, + {20, Interpreter::ps_sub, {"ps_sub", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {21, Interpreter::ps_add, {"ps_add", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {23, Interpreter::ps_sel, {"ps_sel", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_USE_FPU, 1, 0, 0, 0}}, + {24, Interpreter::ps_res, {"ps_res", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {25, Interpreter::ps_mul, {"ps_mul", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {26, Interpreter::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_USE_FPU | FL_SET_FPRF, 2, 0, 0, 0}}, + {28, Interpreter::ps_msub, {"ps_msub", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {29, Interpreter::ps_madd, {"ps_madd", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {30, Interpreter::ps_nmsub, {"ps_nmsub", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {31, Interpreter::ps_nmadd, {"ps_nmadd", OPTYPE_PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, }; static GekkoOPTemplate table4_3[] = { - {6, Interpreter::psq_lx, {"psq_lx", OPTYPE_PS, FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {7, Interpreter::psq_stx, {"psq_stx", OPTYPE_PS, FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {38, Interpreter::psq_lux, {"psq_lux", OPTYPE_PS, FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {39, Interpreter::psq_stux, {"psq_stux", OPTYPE_PS, FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {6, Interpreter::psq_lx, {"psq_lx", OPTYPE_PS, FL_OUT_FLOAT_S | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {7, Interpreter::psq_stx, {"psq_stx", OPTYPE_PS, FL_IN_FLOAT_S | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {38, Interpreter::psq_lux, {"psq_lux", OPTYPE_PS, FL_OUT_FLOAT_S | FL_OUT_A | FL_IN_AB | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {39, Interpreter::psq_stux, {"psq_stux", OPTYPE_PS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_AB | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, }; static GekkoOPTemplate table19[] = @@ -157,7 +157,7 @@ static GekkoOPTemplate table19[] = {193, Interpreter::crxor, {"crxor", OPTYPE_CR, FL_EVIL, 1, 0, 0, 0}}, {150, Interpreter::isync, {"isync", OPTYPE_ICACHE, FL_EVIL, 1, 0, 0, 0}}, - {0, Interpreter::mcrf, {"mcrf", OPTYPE_SYSTEM, FL_EVIL, 1, 0, 0, 0}}, + {0, Interpreter::mcrf, {"mcrf", OPTYPE_SYSTEM, FL_EVIL | FL_SET_CRn, 1, 0, 0, 0}}, {50, Interpreter::rfi, {"rfi", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 2, 0, 0, 0}}, {18, Interpreter::rfid, {"rfid", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 1, 0, 0, 0}} @@ -179,10 +179,10 @@ static GekkoOPTemplate table31[] = {26, Interpreter::cntlzwx, {"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, - {536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, - {792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, - {824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, - {24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, + {536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT, 1, 0, 0, 0}}, + {792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, + {824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, + {24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT, 1, 0, 0, 0}}, {54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}}, {86, Interpreter::dcbf, {"dcbf", OPTYPE_DCACHE, 0, 5, 0, 0, 0}}, @@ -193,76 +193,76 @@ static GekkoOPTemplate table31[] = {1014, Interpreter::dcbz, {"dcbz", OPTYPE_DCACHE, FL_LOADSTORE, 5, 0, 0, 0}}, //load word - {23, Interpreter::lwzx, {"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, - {55, Interpreter::lwzux, {"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, + {23, Interpreter::lwzx, {"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, + {55, Interpreter::lwzux, {"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_AB | FL_LOADSTORE, 1, 0, 0, 0}}, //load halfword - {279, Interpreter::lhzx, {"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, - {311, Interpreter::lhzux, {"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, + {279, Interpreter::lhzx, {"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, + {311, Interpreter::lhzux, {"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_AB | FL_LOADSTORE, 1, 0, 0, 0}}, //load halfword signextend - {343, Interpreter::lhax, {"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, - {375, Interpreter::lhaux, {"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, + {343, Interpreter::lhax, {"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, + {375, Interpreter::lhaux, {"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_AB | FL_LOADSTORE, 1, 0, 0, 0}}, //load byte - {87, Interpreter::lbzx, {"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, - {119, Interpreter::lbzux, {"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, + {87, Interpreter::lbzx, {"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, + {119, Interpreter::lbzux, {"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_AB | FL_LOADSTORE, 1, 0, 0, 0}}, //load byte reverse - {534, Interpreter::lwbrx, {"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, - {790, Interpreter::lhbrx, {"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, + {534, Interpreter::lwbrx, {"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, + {790, Interpreter::lhbrx, {"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, // Conditional load/store (Wii SMP) - {150, Interpreter::stwcxd, {"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0 | FL_LOADSTORE, 1, 0, 0, 0}}, + {150, Interpreter::stwcxd, {"stwcxd", OPTYPE_STORE, FL_EVIL | FL_IN_S | FL_IN_A0B | FL_SET_CR0 | FL_LOADSTORE, 1, 0, 0, 0}}, {20, Interpreter::lwarx, {"lwarx", OPTYPE_LOAD, FL_EVIL | FL_OUT_D | FL_IN_A0B | FL_SET_CR0 | FL_LOADSTORE, 1, 0, 0, 0}}, //load string (Inst these) - {533, Interpreter::lswx, {"lswx", OPTYPE_LOAD, FL_EVIL | FL_IN_A | FL_OUT_D | FL_LOADSTORE, 1, 0, 0, 0}}, - {597, Interpreter::lswi, {"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D | FL_LOADSTORE, 1, 0, 0, 0}}, + {533, Interpreter::lswx, {"lswx", OPTYPE_LOAD, FL_EVIL | FL_IN_A0B | FL_OUT_D | FL_LOADSTORE, 1, 0, 0, 0}}, + {597, Interpreter::lswi, {"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_A | FL_OUT_D | FL_LOADSTORE, 1, 0, 0, 0}}, //store word - {151, Interpreter::stwx, {"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, - {183, Interpreter::stwux, {"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, + {151, Interpreter::stwx, {"stwx", OPTYPE_STORE, FL_IN_S | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, + {183, Interpreter::stwux, {"stwux", OPTYPE_STORE, FL_IN_S | FL_OUT_A | FL_IN_AB | FL_LOADSTORE, 1, 0, 0, 0}}, //store halfword - {407, Interpreter::sthx, {"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, - {439, Interpreter::sthux, {"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, + {407, Interpreter::sthx, {"sthx", OPTYPE_STORE, FL_IN_S | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, + {439, Interpreter::sthux, {"sthux", OPTYPE_STORE, FL_IN_S | FL_OUT_A | FL_IN_AB | FL_LOADSTORE, 1, 0, 0, 0}}, //store byte - {215, Interpreter::stbx, {"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, - {247, Interpreter::stbux, {"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, + {215, Interpreter::stbx, {"stbx", OPTYPE_STORE, FL_IN_S | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, + {247, Interpreter::stbux, {"stbux", OPTYPE_STORE, FL_IN_S | FL_OUT_A | FL_IN_AB | FL_LOADSTORE, 1, 0, 0, 0}}, //store bytereverse - {662, Interpreter::stwbrx, {"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, - {918, Interpreter::sthbrx, {"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B | FL_LOADSTORE, 1, 0, 0, 0}}, + {662, Interpreter::stwbrx, {"stwbrx", OPTYPE_STORE, FL_IN_S | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, + {918, Interpreter::sthbrx, {"sthbrx", OPTYPE_STORE, FL_IN_S | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, - {661, Interpreter::stswx, {"stswx", OPTYPE_STORE, FL_EVIL | FL_LOADSTORE, 1, 0, 0, 0}}, - {725, Interpreter::stswi, {"stswi", OPTYPE_STORE, FL_EVIL | FL_LOADSTORE, 1, 0, 0, 0}}, + {661, Interpreter::stswx, {"stswx", OPTYPE_STORE, FL_EVIL | FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, + {725, Interpreter::stswi, {"stswi", OPTYPE_STORE, FL_EVIL | FL_IN_A | FL_LOADSTORE, 1, 0, 0, 0}}, // fp load/store - {535, Interpreter::lfsx, {"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {567, Interpreter::lfsux, {"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {599, Interpreter::lfdx, {"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {631, Interpreter::lfdux, {"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {535, Interpreter::lfsx, {"lfsx", OPTYPE_LOADFP, FL_OUT_FLOAT_D | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {567, Interpreter::lfsux, {"lfsux", OPTYPE_LOADFP, FL_OUT_FLOAT_D | FL_IN_AB | FL_OUT_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {599, Interpreter::lfdx, {"lfdx", OPTYPE_LOADFP, FL_INOUT_FLOAT_D | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {631, Interpreter::lfdux, {"lfdux", OPTYPE_LOADFP, FL_INOUT_FLOAT_D | FL_IN_AB | FL_OUT_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {663, Interpreter::stfsx, {"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {695, Interpreter::stfsux, {"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {727, Interpreter::stfdx, {"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {759, Interpreter::stfdux, {"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {983, Interpreter::stfiwx, {"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {663, Interpreter::stfsx, {"stfsx", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {695, Interpreter::stfsux, {"stfsux", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_IN_AB | FL_OUT_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {727, Interpreter::stfdx, {"stfdx", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {759, Interpreter::stfdux, {"stfdux", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_IN_AB | FL_OUT_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {983, Interpreter::stfiwx, {"stfiwx", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, {19, Interpreter::mfcr, {"mfcr", OPTYPE_SYSTEM, FL_OUT_D, 1, 0, 0, 0}}, {83, Interpreter::mfmsr, {"mfmsr", OPTYPE_SYSTEM, FL_OUT_D, 1, 0, 0, 0}}, - {144, Interpreter::mtcrf, {"mtcrf", OPTYPE_SYSTEM, 0, 1, 0, 0, 0}}, + {144, Interpreter::mtcrf, {"mtcrf", OPTYPE_SYSTEM, FL_IN_S | FL_SET_CRn, 1, 0, 0, 0}}, {146, Interpreter::mtmsr, {"mtmsr", OPTYPE_SYSTEM, FL_IN_S | FL_ENDBLOCK, 1, 0, 0, 0}}, - {210, Interpreter::mtsr, {"mtsr", OPTYPE_SYSTEM, 0, 1, 0, 0, 0}}, - {242, Interpreter::mtsrin, {"mtsrin", OPTYPE_SYSTEM, 0, 1, 0, 0, 0}}, + {210, Interpreter::mtsr, {"mtsr", OPTYPE_SYSTEM, FL_IN_S, 1, 0, 0, 0}}, + {242, Interpreter::mtsrin, {"mtsrin", OPTYPE_SYSTEM, FL_IN_SB, 1, 0, 0, 0}}, {339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}}, - {467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}}, + {467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, FL_IN_S, 2, 0, 0, 0}}, {371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}}, {512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA, 1, 0, 0, 0}}, {595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}}, - {659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}}, + {659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D | FL_IN_B, 3, 0, 0, 0}}, {4, Interpreter::tw, {"tw", OPTYPE_SYSTEM, FL_ENDBLOCK, 2, 0, 0, 0}}, {598, Interpreter::sync, {"sync", OPTYPE_SYSTEM, 0, 3, 0, 0, 0}}, @@ -307,51 +307,51 @@ static GekkoOPTemplate table31_2[] = static GekkoOPTemplate table59[] = { - {18, Interpreter::fdivsx, {"fdivsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}}, // TODO - {20, Interpreter::fsubsx, {"fsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {21, Interpreter::faddsx, {"faddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - //{22, Interpreter::fsqrtsx, {"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, // Not implemented on gekko - {24, Interpreter::fresx, {"fresx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {25, Interpreter::fmulsx, {"fmulsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {28, Interpreter::fmsubsx, {"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {29, Interpreter::fmaddsx, {"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {30, Interpreter::fnmsubsx, {"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {31, Interpreter::fnmaddsx, {"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {18, Interpreter::fdivsx, {"fdivsx", OPTYPE_SINGLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}}, // TODO + {20, Interpreter::fsubsx, {"fsubsx", OPTYPE_SINGLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {21, Interpreter::faddsx, {"faddsx", OPTYPE_SINGLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + //{22, Interpreter::fsqrtsx, {"fsqrtsx", OPTYPE_SINGLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, // Not implemented on gekko + {24, Interpreter::fresx, {"fresx", OPTYPE_SINGLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {25, Interpreter::fmulsx, {"fmulsx", OPTYPE_SINGLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {28, Interpreter::fmsubsx, {"fmsubsx", OPTYPE_SINGLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {29, Interpreter::fmaddsx, {"fmaddsx", OPTYPE_SINGLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {30, Interpreter::fnmsubsx, {"fnmsubsx", OPTYPE_SINGLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {31, Interpreter::fnmaddsx, {"fnmaddsx", OPTYPE_SINGLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, }; static GekkoOPTemplate table63[] = { - {264, Interpreter::fabsx, {"fabsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {32, Interpreter::fcmpo, {"fcmpo", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {0, Interpreter::fcmpu, {"fcmpu", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {14, Interpreter::fctiwx, {"fctiwx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {15, Interpreter::fctiwzx, {"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {72, Interpreter::fmrx, {"fmrx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {136, Interpreter::fnabsx, {"fnabsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {40, Interpreter::fnegx, {"fnegx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {12, Interpreter::frspx, {"frspx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {264, Interpreter::fabsx, {"fabsx", OPTYPE_DOUBLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, + {32, Interpreter::fcmpo, {"fcmpo", OPTYPE_DOUBLEFP, FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {0, Interpreter::fcmpu, {"fcmpu", OPTYPE_DOUBLEFP, FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {14, Interpreter::fctiwx, {"fctiwx", OPTYPE_DOUBLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, + {15, Interpreter::fctiwzx, {"fctiwzx", OPTYPE_DOUBLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, + {72, Interpreter::fmrx, {"fmrx", OPTYPE_DOUBLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, + {136, Interpreter::fnabsx, {"fnabsx", OPTYPE_DOUBLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, + {40, Interpreter::fnegx, {"fnegx", OPTYPE_DOUBLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, + {12, Interpreter::frspx, {"frspx", OPTYPE_DOUBLEFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {64, Interpreter::mcrfs, {"mcrfs", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}}, - {583, Interpreter::mffsx, {"mffsx", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}}, + {64, Interpreter::mcrfs, {"mcrfs", OPTYPE_SYSTEMFP, FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}}, + {583, Interpreter::mffsx, {"mffsx", OPTYPE_SYSTEMFP, FL_OUT_D | FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}}, {70, Interpreter::mtfsb0x, {"mtfsb0x", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}}, {38, Interpreter::mtfsb1x, {"mtfsb1x", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}}, {134, Interpreter::mtfsfix, {"mtfsfix", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}}, - {711, Interpreter::mtfsfx, {"mtfsfx", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}}, + {711, Interpreter::mtfsfx, {"mtfsfx", OPTYPE_SYSTEMFP, FL_IN_B | FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}}, }; static GekkoOPTemplate table63_2[] = { - {18, Interpreter::fdivx, {"fdivx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 31, 0, 0, 0}}, - {20, Interpreter::fsubx, {"fsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {21, Interpreter::faddx, {"faddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {22, Interpreter::fsqrtx, {"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {23, Interpreter::fselx, {"fselx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {25, Interpreter::fmulx, {"fmulx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {26, Interpreter::frsqrtex, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {28, Interpreter::fmsubx, {"fmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {29, Interpreter::fmaddx, {"fmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {30, Interpreter::fnmsubx, {"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {31, Interpreter::fnmaddx, {"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {18, Interpreter::fdivx, {"fdivx", OPTYPE_DOUBLEFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 31, 0, 0, 0}}, + {20, Interpreter::fsubx, {"fsubx", OPTYPE_DOUBLEFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {21, Interpreter::faddx, {"faddx", OPTYPE_DOUBLEFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {22, Interpreter::fsqrtx, {"fsqrtx", OPTYPE_DOUBLEFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {23, Interpreter::fselx, {"fselx", OPTYPE_DOUBLEFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, + {25, Interpreter::fmulx, {"fmulx", OPTYPE_DOUBLEFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {26, Interpreter::frsqrtex, {"frsqrtex", OPTYPE_DOUBLEFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {28, Interpreter::fmsubx, {"fmsubx", OPTYPE_DOUBLEFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {29, Interpreter::fmaddx, {"fmaddx", OPTYPE_DOUBLEFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {30, Interpreter::fnmsubx, {"fnmsubx", OPTYPE_DOUBLEFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {31, Interpreter::fnmaddx, {"fnmaddx", OPTYPE_DOUBLEFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, }; namespace InterpreterTables { diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index f191f2efa5..9b88538935 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -741,6 +741,15 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc Jit64Tables::CompileInstruction(ops[i]); + // If we have a register that will never be used again, flush it. + for (int j = 0; j < 32; j++) + { + if (!(ops[i].gprInUse & (1 << j))) + gpr.StoreFromRegister(j); + if (!(ops[i].fprInUse & (1 << j))) + fpr.StoreFromRegister(j); + } + if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) { TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index d5eb128418..6ec760c77e 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -527,6 +527,7 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf int numOut = 0; int numIn = 0; + int numFloatIn = 0; if (opinfo->flags & FL_OUT_A) { code->regsOut[numOut++] = code->inst.RA; @@ -563,14 +564,29 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf block->m_gpa->SetInputRegister(code->inst.RS, index); } + code->fregOut = -1; + if (opinfo->flags & FL_OUT_FLOAT_D) + code->fregOut = code->inst.FD; + else if (opinfo->flags & FL_OUT_FLOAT_S) + code->fregOut = code->inst.FS; + if (opinfo->flags & FL_IN_FLOAT_A) + code->fregsIn[numFloatIn++] = code->inst.FA; + if (opinfo->flags & FL_IN_FLOAT_B) + code->fregsIn[numFloatIn++] = code->inst.FB; + if (opinfo->flags & FL_IN_FLOAT_C) + code->fregsIn[numFloatIn++] = code->inst.FC; + if (opinfo->flags & FL_IN_FLOAT_D) + code->fregsIn[numFloatIn++] = code->inst.FD; + if (opinfo->flags & FL_IN_FLOAT_S) + code->fregsIn[numFloatIn++] = code->inst.FS; + // Set remaining register slots as unused (-1) for (int j = numIn; j < 3; j++) code->regsIn[j] = -1; for (int j = numOut; j < 2; j++) code->regsOut[j] = -1; - for (int j = 0; j < 3; j++) + for (int j = numFloatIn; j < 4; j++) code->fregsIn[j] = -1; - code->fregOut = -1; switch (opinfo->type) { @@ -580,7 +596,8 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf case OPTYPE_LOADFP: case OPTYPE_STOREFP: break; - case OPTYPE_FPU: + case OPTYPE_SINGLEFP: + case OPTYPE_DOUBLEFP: break; case OPTYPE_BRANCH: if (code->inst.hex == 0x4e800020) @@ -788,6 +805,8 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32 bool wantsCR1 = true; bool wantsFPRF = true; bool wantsCA = true; + u32 fregInUse = 0; + u32 regInUse = 0; for (int i = block->m_num_instructions - 1; i >= 0; i--) { bool opWantsCR0 = code[i].wantsCR0; @@ -806,6 +825,24 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32 wantsCR1 &= !code[i].outputCR1 || opWantsCR1; wantsFPRF &= !code[i].outputFPRF || opWantsFPRF; wantsCA &= !code[i].outputCA || opWantsCA; + code[i].gprInUse = regInUse; + code[i].fprInUse = fregInUse; + // TODO: if there's no possible endblocks or exceptions in between, tell the regcache + // we can throw away a register if it's going to be overwritten later. + for (int j = 0; j < 3; j++) + if (code[i].regsIn[j] >= 0) + regInUse |= 1 << code[i].regsIn[j]; + for (int j = 0; j < 4; j++) + if (code[i].fregsIn[j] >= 0) + fregInUse |= 1 << code[i].fregsIn[j]; + // For now, we need to count output registers as "used" though; otherwise the flush + // will result in a redundant store (e.g. store to regcache, then store again to + // the same location later). + for (int j = 0; j < 2; j++) + if (code[i].regsOut[j] >= 0) + regInUse |= 1 << code[i].regsOut[j]; + if (code[i].fregOut >= 0) + fregInUse |= 1 << code[i].fregOut; } return address; } diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.h b/Source/Core/Core/PowerPC/PPCAnalyst.h index 2238337710..4936f854c5 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.h +++ b/Source/Core/Core/PowerPC/PPCAnalyst.h @@ -29,7 +29,7 @@ struct CodeOp //16B s8 regsOut[2]; s8 regsIn[3]; s8 fregOut; - s8 fregsIn[3]; + s8 fregsIn[4]; bool isBranchTarget; bool wantsCR0; bool wantsCR1; @@ -42,6 +42,9 @@ struct CodeOp //16B bool outputCA; bool canEndBlock; bool skip; // followed BL-s for example + // which registers are still needed after this instruction in this block + u32 gprInUse; + u32 fprInUse; }; struct BlockStats diff --git a/Source/Core/Core/PowerPC/PPCTables.h b/Source/Core/Core/PowerPC/PPCTables.h index a3bb892d45..bfc5c5303a 100644 --- a/Source/Core/Core/PowerPC/PPCTables.h +++ b/Source/Core/Core/PowerPC/PPCTables.h @@ -24,6 +24,8 @@ enum FL_IN_C = (1<<11), FL_IN_S = (1<<12), FL_IN_AB = FL_IN_A | FL_IN_B, + FL_IN_AC = FL_IN_A | FL_IN_C, + FL_IN_ABC = FL_IN_A | FL_IN_B | FL_IN_C, FL_IN_SB = FL_IN_S | FL_IN_B, FL_IN_A0B = FL_IN_A0 | FL_IN_B, FL_IN_A0BC = FL_IN_A0 | FL_IN_B | FL_IN_C, @@ -39,6 +41,18 @@ enum FL_SET_FPRF = (1<<20), FL_READ_FPRF = (1<<21), FL_SET_OE = (1<<22), + FL_IN_FLOAT_A = (1<<23), + FL_IN_FLOAT_B = (1<<24), + FL_IN_FLOAT_C = (1<<25), + FL_IN_FLOAT_S = (1<<26), + FL_IN_FLOAT_D = (1<<27), + FL_IN_FLOAT_AB = FL_IN_FLOAT_A | FL_IN_FLOAT_B, + FL_IN_FLOAT_AC = FL_IN_FLOAT_A | FL_IN_FLOAT_C, + FL_IN_FLOAT_ABC = FL_IN_FLOAT_A | FL_IN_FLOAT_B | FL_IN_FLOAT_C, + FL_OUT_FLOAT_D = (1<<28), + FL_OUT_FLOAT_S = (1<<29), + // Used in the case of double ops (they don't modify the top half of the output) + FL_INOUT_FLOAT_D = FL_IN_FLOAT_D | FL_OUT_FLOAT_D, }; enum @@ -54,7 +68,8 @@ enum OPTYPE_STORE , OPTYPE_LOADFP , OPTYPE_STOREFP , - OPTYPE_FPU , + OPTYPE_DOUBLEFP, + OPTYPE_SINGLEFP, OPTYPE_PS , OPTYPE_DCACHE , OPTYPE_ICACHE ,