Reorganize carry to store flags separately instead of part of XER

Also correct behavior with regards to which bits in XER are treated as zero
based on a hwtest (probably doesn't affect any real games, but might as well
be correct).
This commit is contained in:
Fiora 2014-09-12 13:19:50 -07:00
parent 788a719718
commit 5fce109ce1
10 changed files with 97 additions and 68 deletions

View file

@ -335,16 +335,17 @@ union UFPR
#define XER_CA_SHIFT 29
#define XER_OV_SHIFT 30
#define XER_SO_SHIFT 31
#define XER_CA_MASK (1U << XER_CA_SHIFT)
#define XER_OV_MASK (1U << XER_OV_SHIFT)
#define XER_SO_MASK (1U << XER_SO_SHIFT)
#define XER_OV_MASK 1
#define XER_SO_MASK 2
// XER
union UReg_XER
{
struct
{
u32 BYTE_COUNT : 7;
u32 : 22;
u32 : 1;
u32 BYTE_CMP : 8;
u32 : 13;
u32 CA : 1;
u32 OV : 1;
u32 SO : 1;

View file

@ -499,7 +499,7 @@ void Interpreter::lhzx(UGeckoInstruction _inst)
void Interpreter::lswx(UGeckoInstruction _inst)
{
u32 EA = Helper_Get_EA_X(_inst);
u32 n = rSPR(SPR_XER) & 0x7F;
u32 n = (u8)PowerPC::ppcState.xer_stringctrl;
int r = _inst.RD;
int i = 0;
@ -727,7 +727,7 @@ void Interpreter::stswi(UGeckoInstruction _inst)
void Interpreter::stswx(UGeckoInstruction _inst)
{
u32 EA = Helper_Get_EA_X(_inst);
u32 n = rSPR(SPR_XER) & 0x7F;
u32 n = (u8)PowerPC::ppcState.xer_stringctrl;
int r = _inst.RS;
int i = 0;

View file

@ -112,9 +112,9 @@ void Interpreter::mtfsfx(UGeckoInstruction _inst)
void Interpreter::mcrxr(UGeckoInstruction _inst)
{
// USES_XER
SetCRField(_inst.CRFD, PowerPC::ppcState.spr[SPR_XER] >> 28);
PowerPC::ppcState.spr[SPR_XER] &= ~0xF0000000; // clear 0-3
SetCRField(_inst.CRFD, GetXER().Hex >> 28);
PowerPC::ppcState.xer_ca = 0;
PowerPC::ppcState.xer_so_ov = 0;
}
void Interpreter::mfcr(UGeckoInstruction _inst)
@ -235,6 +235,9 @@ void Interpreter::mfspr(UGeckoInstruction _inst)
rSPR(iIndex) &= ~1;
}
break;
case SPR_XER:
rSPR(iIndex) = GetXER().Hex;
break;
}
m_GPR[_inst.RD] = rSPR(iIndex);
}
@ -350,6 +353,10 @@ void Interpreter::mtspr(UGeckoInstruction _inst)
case SPR_SDR:
Memory::SDRUpdated();
break;
case SPR_XER:
SetXER(rSPR(iIndex));
break;
}
}

View file

@ -22,12 +22,12 @@ void Jit64::GenerateConstantOverflow(bool overflow)
if (overflow)
{
//XER[OV/SO] = 1
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK));
}
else
{
//XER[OV] = 0
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK));
AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
}
}
@ -36,11 +36,11 @@ void Jit64::GenerateOverflow()
{
FixupBranch jno = J_CC(CC_NO);
//XER[OV/SO] = 1
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK));
FixupBranch exit = J();
SetJumpTarget(jno);
//XER[OV] = 0
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK));
AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
SetJumpTarget(exit);
}
@ -60,7 +60,7 @@ void Jit64::FinalizeCarry(CCFlags cond)
{
// convert the condition to a carry flag (is there a better way?)
SETcc(cond, R(RSCRATCH));
BT(8, R(RSCRATCH), Imm8(0));
SHR(8, R(RSCRATCH), Imm8(1));
}
js.carryFlagSet = true;
}
@ -92,23 +92,22 @@ void Jit64::FinalizeCarry(bool ca)
}
else
{
JitClearCAOV(true, false);
JitClearCA();
}
}
}
// Assumes CA,OV are clear
void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
{
// USES_XER
if (oe)
{
// Make sure not to lose the carry flags (not a big deal, this path is rare).
PUSHF();
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_SO_MASK | XER_OV_MASK)));
//XER[OV] = 0
AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
FixupBranch jno = J_CC(CC_NO);
//XER[OV/SO] = 1
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_SO_MASK | XER_OV_MASK));
SetJumpTarget(jno);
POPF();
}
@ -1792,27 +1791,15 @@ void Jit64::srawix(UGeckoInstruction inst)
if (a != s)
MOV(32, gpr.R(a), R(RSCRATCH));
// some optimized common cases that can be done in slightly fewer ops
if (amount == 31)
if (amount == 1)
{
JitSetCA();
SAR(32, gpr.R(a), Imm8(31));
NEG(32, R(RSCRATCH)); // RSCRATCH = input == INT_MIN ? INT_MIN : -input;
AND(32, R(RSCRATCH), Imm32(0x80000000)); // RSCRATCH = input < 0 && input != INT_MIN ? 0 : 0x80000000
SHR(32, R(RSCRATCH), Imm8(31 - XER_CA_SHIFT));
XOR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = (input < 0 && input != INT_MIN)
}
else if (amount == 1)
{
JitClearCAOV(true, false);
SHR(32, R(RSCRATCH), Imm8(31)); // sign
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
SHR(32, R(RSCRATCH), Imm8(31)); // sign
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
SAR(32, gpr.R(a), Imm8(1));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
MOV(8, PPCSTATE(xer_ca), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
}
else
{
JitClearCAOV(true, false);
SAR(32, gpr.R(a), Imm8(amount));
SHL(32, R(RSCRATCH), Imm8(32 - amount));
TEST(32, R(RSCRATCH), gpr.R(a));

View file

@ -140,7 +140,6 @@ void Jit64::mtspr(UGeckoInstruction inst)
case SPR_LR:
case SPR_CTR:
case SPR_XER:
// These are safe to do the easy way, see the bottom of this function.
break;
@ -155,6 +154,24 @@ void Jit64::mtspr(UGeckoInstruction inst)
// These are safe to do the easy way, see the bottom of this function.
break;
case SPR_XER:
gpr.Lock(d);
gpr.BindToRegister(d, true, false);
MOV(32, R(RSCRATCH), gpr.R(d));
AND(32, R(RSCRATCH), Imm32(0xff7f));
MOV(16, PPCSTATE(xer_stringctrl), R(RSCRATCH));
MOV(32, R(RSCRATCH), gpr.R(d));
SHR(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
AND(8, R(RSCRATCH), Imm8(1));
MOV(8, PPCSTATE(xer_ca), R(RSCRATCH));
MOV(32, R(RSCRATCH), gpr.R(d));
SHR(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH));
gpr.UnlockAll();
return;
default:
FALLBACK_IF(true);
}
@ -238,6 +255,18 @@ void Jit64::mfspr(UGeckoInstruction inst)
gpr.UnlockAllX();
break;
}
case SPR_XER:
gpr.Lock(d);
gpr.BindToRegister(d, false);
MOVZX(32, 16, gpr.RX(d), PPCSTATE(xer_stringctrl));
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
OR(32, gpr.R(d), R(RSCRATCH));
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov));
SHL(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
OR(32, gpr.R(d), R(RSCRATCH));
break;
case SPR_WPAR:
case SPR_DEC:
case SPR_PMC1:
@ -422,17 +451,20 @@ void Jit64::mcrxr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
// USES_CR
// Copy XER[0-3] into CR[inst.CRFD]
MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER]));
SHR(32, R(RSCRATCH), Imm8(28));
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
MOVZX(32, 8, RSCRATCH2, PPCSTATE(xer_so_ov));
// [0 SO OV CA]
LEA(32, RSCRATCH, MComplex(RSCRATCH, RSCRATCH2, SCALE_2, 0));
// [SO OV CA 0] << 3
SHL(32, R(RSCRATCH), Imm8(4));
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable));
MOV(64, R(RSCRATCH), MDisp(RSCRATCH, (u32)(u64)m_crTable));
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
// Clear XER[0-3]
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(0x0FFFFFFF));
MOV(8, PPCSTATE(xer_ca), Imm8(0));
MOV(8, PPCSTATE(xer_so_ov), Imm8(0));
}
void Jit64::crXXX(UGeckoInstruction inst)

View file

@ -1027,9 +1027,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break;
X64Reg reg = regFindFreeReg(RI);
Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_XER]));
Jit->SHR(32, R(reg), Imm8(29));
Jit->AND(32, R(reg), Imm8(1));
Jit->MOVZX(32, 8, reg, PPCSTATE(xer_ca));
RI.regs[reg] = I;
break;
}
@ -1107,7 +1105,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->JitSetCA();
FixupBranch cont = Jit->J();
Jit->SetJumpTarget(nocarry);
Jit->JitClearCAOV(true, false);
Jit->JitClearCA();
Jit->SetJumpTarget(cont);
regNormalRegClear(RI, I);
break;

View file

@ -896,30 +896,23 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
void EmuCodeBlock::JitGetAndClearCAOV(bool oe)
{
if (oe)
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); //XER.OV = 0
BTR(32, PPCSTATE(spr[SPR_XER]), Imm8(29)); //carry = XER.CA, XER.CA = 0
AND(8, PPCSTATE(xer_so_ov), Imm32(~XER_OV_MASK)); //XER.OV = 0
SHR(8, PPCSTATE(xer_ca), Imm8(1)); //carry = XER.CA, XER.CA = 0
}
void EmuCodeBlock::JitSetCA()
{
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
MOV(8, PPCSTATE(xer_ca), Imm8(1)); //XER.CA = 1
}
// Some testing shows CA is set roughly ~1/3 of the time (relative to clears), so
// branchless calculation of CA is probably faster in general.
void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
{
SETcc(conditionCode, R(RSCRATCH));
MOVZX(32, 8, RSCRATCH, R(RSCRATCH));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK));
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1
SETcc(conditionCode, PPCSTATE(xer_ca));
}
void EmuCodeBlock::JitClearCAOV(bool ca, bool oe)
void EmuCodeBlock::JitClearCA()
{
u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF);
if (mask == 0xFFFFFFFF)
return;
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(mask));
MOV(8, PPCSTATE(xer_ca), Imm8(0));
}

View file

@ -121,7 +121,7 @@ public:
void JitGetAndClearCAOV(bool oe);
void JitSetCA();
void JitSetCAIf(Gen::CCFlags conditionCode);
void JitClearCAOV(bool ca, bool oe);
void JitClearCA();
void ForceSinglePrecisionS(Gen::X64Reg xmm);
void ForceSinglePrecisionP(Gen::X64Reg xmm);

View file

@ -61,6 +61,12 @@ struct GC_ALIGNED64(PowerPCState)
// This variable should be inside of the CoreTiming namespace if we wanted to be correct.
int downcount;
// XER, reformatted into byte fields for easier access.
u8 xer_ca;
u8 xer_so_ov; // format: (SO << 1) | OV
// The Broadway CPU implements bits 16-23 of the XER register... even though it doesn't support lscbx
u16 xer_stringctrl;
#if _M_X86_64
// This member exists for the purpose of an assertion in x86 JitBase.cpp
// that its offset <= 0x100. To minimize code size on x86, we want as much
@ -252,35 +258,40 @@ inline u32 GetCR()
return PowerPC::CompactCR();
}
// SetCarry/GetCarry may speed up soon.
inline void SetCarry(int ca)
{
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA = ca;
PowerPC::ppcState.xer_ca = ca;
}
inline int GetCarry()
{
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA;
return PowerPC::ppcState.xer_ca;
}
inline UReg_XER GetXER()
{
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]);
u32 xer = 0;
xer |= PowerPC::ppcState.xer_stringctrl;
xer |= PowerPC::ppcState.xer_ca << XER_CA_SHIFT;
xer |= PowerPC::ppcState.xer_so_ov << XER_OV_SHIFT;
return xer;
}
inline void SetXER(UReg_XER new_xer)
{
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]) = new_xer;
PowerPC::ppcState.xer_stringctrl = new_xer.BYTE_COUNT + (new_xer.BYTE_CMP << 8);
PowerPC::ppcState.xer_ca = new_xer.CA;
PowerPC::ppcState.xer_so_ov = (new_xer.SO << 1) + new_xer.OV;
}
inline int GetXER_SO()
{
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO;
return PowerPC::ppcState.xer_so_ov >> 1;
}
inline void SetXER_SO(int value)
{
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value;
PowerPC::ppcState.xer_so_ov |= value << 1;
}
void UpdateFPRF(double dvalue);

View file

@ -63,7 +63,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread;
// Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 33;
static const u32 STATE_VERSION = 34;
enum
{