A bit more WIP JIT work; primary change is psq_st implementation.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1758 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
magumagu9 2009-01-04 08:28:45 +00:00
parent a72da4e76a
commit b4d78829c3
10 changed files with 203 additions and 19 deletions

View file

@ -153,7 +153,7 @@ InstLoc IRBuilder::EmitUOp(unsigned Opcode, InstLoc Op1, unsigned extra) {
return curIndex;
}
InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) {
InstLoc curIndex = &InstList[InstList.size()];
unsigned backOp1 = curIndex - 1 - Op1;
if (backOp1 >= 255) {
@ -168,7 +168,7 @@ InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
backOp1++;
curIndex++;
}
InstList.push_back(Opcode | backOp1 << 8 | backOp2 << 16);
InstList.push_back(Opcode | (backOp1 << 8) | (backOp2 << 16) | (extra << 24));
return curIndex;
}
@ -451,7 +451,7 @@ InstLoc IRBuilder::FoldInterpreterFallback(InstLoc Op1, InstLoc Op2) {
return EmitBiOp(InterpreterFallback, Op1, Op2);
}
InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) {
switch (Opcode) {
case Add: return FoldAdd(Op1, Op2);
case And: return FoldAnd(Op1, Op2);
@ -462,7 +462,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
case Rol: return FoldRol(Op1, Op2);
case BranchCond: return FoldBranchCond(Op1, Op2);
case InterpreterFallback: return FoldInterpreterFallback(Op1, Op2);
default: return EmitBiOp(Opcode, Op1, Op2);
default: return EmitBiOp(Opcode, Op1, Op2, extra);
}
}
@ -1019,6 +1019,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
case DupSingleToMReg:
case DoubleToSingle:
case ExpandPackedToMReg:
case CompactMRegToPacked:
if (thisUsed)
regMarkUse(RI, I, getOp1(I), 1);
break;
@ -1075,6 +1076,10 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
regMarkUse(RI, I, getOp1(I), 1);
regMarkMemAddress(RI, I, getOp2(I), 2);
break;
case StorePaired:
regMarkUse(RI, I, getOp1(I), 1);
regMarkUse(RI, I, getOp2(I), 2);
break;
case BranchUncond:
if (!isImm(*getOp1(I)))
regMarkUse(RI, I, getOp1(I), 1);
@ -1390,6 +1395,23 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
regNormalRegClear(RI, I);
break;
}
case StorePaired: {
regSpill(RI, EAX);
regSpill(RI, EDX);
unsigned quantreg = *I >> 24;
Jit->MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg]));
Jit->MOVZX(32, 8, EDX, R(AL));
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]!
Jit->SHL(32, R(EDX), Imm8(2));
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
Jit->CALLptr(MDisp(EDX, (u32)asm_routines.pairedStoreQuantized));
if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8)
regClearInst(RI, getOp2(I));
break;
}
case DupSingleToMReg: {
if (!thisUsed) break;
X64Reg reg = fregFindFreeReg(RI);
@ -1417,6 +1439,14 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
fregNormalRegClear(RI, I);
break;
}
case CompactMRegToPacked: {
if (!thisUsed) break;
X64Reg reg = fregFindFreeReg(RI);
Jit->CVTPD2PS(reg, fregLocForInst(RI, getOp1(I)));
RI.fregs[reg] = I;
fregNormalRegClear(RI, I);
break;
}
case LoadFReg: {
if (!thisUsed) break;
X64Reg reg = fregFindFreeReg(RI);

View file

@ -146,10 +146,12 @@ namespace IREmitter {
LoadSingle,
LoadDouble,
LoadPaired, // This handles quantizers itself
StorePaired,
DoubleToSingle,
DupSingleToMReg,
InsertDoubleInMReg,
ExpandPackedToMReg,
CompactMRegToPacked,
LoadFReg,
StoreFReg,
FSMul,
@ -232,7 +234,8 @@ namespace IREmitter {
InstLoc EmitZeroOp(unsigned Opcode, unsigned extra);
InstLoc EmitUOp(unsigned OpCode, InstLoc Op1,
unsigned extra = 0);
InstLoc EmitBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2);
InstLoc EmitBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2,
unsigned extra = 0);
InstLoc FoldAdd(InstLoc Op1, InstLoc Op2);
InstLoc FoldAnd(InstLoc Op1, InstLoc Op2);
@ -248,7 +251,8 @@ namespace IREmitter {
InstLoc FoldZeroOp(unsigned Opcode, unsigned extra);
InstLoc FoldUOp(unsigned OpCode, InstLoc Op1,
unsigned extra = 0);
InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2);
InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2,
unsigned extra = 0);
unsigned ComputeKnownZeroBits(InstLoc I);
@ -389,6 +393,9 @@ namespace IREmitter {
InstLoc EmitLoadPaired(InstLoc addr, unsigned quantReg) {
return FoldUOp(LoadPaired, addr, quantReg);
}
InstLoc EmitStorePaired(InstLoc value, InstLoc addr, unsigned quantReg) {
return FoldBiOp(StorePaired, value, addr, quantReg);
}
InstLoc EmitLoadFReg(unsigned freg) {
return FoldZeroOp(LoadFReg, freg);
}
@ -404,6 +411,9 @@ namespace IREmitter {
InstLoc EmitExpandPackedToMReg(InstLoc val) {
return FoldUOp(ExpandPackedToMReg, val);
}
InstLoc EmitCompactMRegToPacked(InstLoc val) {
return FoldUOp(CompactMRegToPacked, val);
}
InstLoc EmitFSMul(InstLoc op1, InstLoc op2) {
return FoldBiOp(FSMul, op1, op2);
}

View file

@ -58,6 +58,9 @@ struct CONTEXT
#endif
// #define INSTRUCTION_START Default(inst); return;
// #define INSTRUCTION_START PPCTables::CountInstruction(inst);
#define INSTRUCTION_START
class TrampolineCache : public Gen::XCodeBlock
{

View file

@ -28,6 +28,7 @@
#include "ABI.h"
#include "Jit.h"
#include "JitCache.h"
#include "Thunk.h"
#include "../../HW/CPUCompare.h"
#include "../../HW/GPFifo.h"
@ -213,6 +214,145 @@ const float m_dequantizeTableS[] =
float psTemp[2];
void AsmRoutineManager::GenQuantizedStores() {
const u8* storePairedIllegal = AlignCode4();
UD2();
const u8* storePairedFloat = AlignCode4();
if (cpu_info.bSSSE3) {
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
#ifdef _M_X64
MOVQ_xmm(MComplex(RBX, RCX, 1, 0), XMM0);
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(MDisp(ECX, (u32)Memory::base), XMM0);
#endif
} else {
#ifdef _M_X64
MOVQ_xmm(R(RCX), XMM0);
ROL(64, RCX, Imm8(32));
BSWAP(64, RCX);
MOV(64, MComplex(RBX, RCX, 1, 0), R(RCX));
#else
#if 0
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
PXOR(XMM1, R(XMM1));
PSHUFLW(XMM0, R(XMM0), 0xB1);
MOVAPD(XMM1, R(XMM0));
PSRLW(XMM0, 8);
PSLLW(XMM1, 8);
POR(XMM0, R(XMM1));
#else
MOVQ_xmm(M(&psTemp[0]), XMM0);
#if 0
TEST(32, R(ECX), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
MOV(32, R(EAX), M(&psTemp));
BSWAP(32, EAX);
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
MOV(32, R(EAX), M(((char*)&psTemp) + 4));
BSWAP(32, EAX);
MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX));
FixupBranch arg2 = J();
SetJumpTarget(argh);
#endif
MOV(32, R(EAX), M(((char*)&psTemp)));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
MOV(32, R(EAX), M(((char*)&psTemp)+4));
ADD(32, R(ECX), Imm32(4));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
#if 0
SetJumpTarget(arg2);
#endif
#endif
#endif
}
RET();
const u8* storePairedU8 = AlignCode4();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
CVTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
#ifdef _M_X64
MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
#endif
RET();
const u8* storePairedS8 = AlignCode4();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
CVTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
PACKSSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
#ifdef _M_X64
MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
#endif
RET();
const u8* storePairedU16 = AlignCode4();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
CVTPS2DQ(XMM0, R(XMM0));
PXOR(XMM1, R(XMM1));
PCMPGTD(XMM1, R(XMM0));
PANDN(XMM0, R(XMM1));
PACKSSDW(XMM0, R(XMM0)); //PACKUSDW(XMM0, R(XMM0)); // FIXME: Wrong!
MOVD_xmm(R(EAX), XMM0);
BSWAP(32, EAX);
ROL(32, R(EAX), Imm8(16));
#ifdef _M_X64
MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
#endif
RET();
const u8* storePairedS16 = AlignCode4();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
CVTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
BSWAP(32, EAX);
ROL(32, R(EAX), Imm8(16));
#ifdef _M_X64
MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
#endif
RET();
pairedStoreQuantized[0] = storePairedFloat;
pairedStoreQuantized[1] = storePairedIllegal;
pairedStoreQuantized[2] = storePairedIllegal;
pairedStoreQuantized[3] = storePairedIllegal;
pairedStoreQuantized[4] = storePairedU8;
pairedStoreQuantized[5] = storePairedU16;
pairedStoreQuantized[6] = storePairedS8;
pairedStoreQuantized[7] = storePairedS16;
}
void AsmRoutineManager::GenQuantizedLoads() {
const u8* loadPairedIllegal = AlignCode4();
UD2();
@ -429,6 +569,7 @@ void AsmRoutineManager::GenerateCommon()
JMP(dispatcher, true);
GenQuantizedLoads();
GenQuantizedStores();
computeRcFp = AlignCode16();
//CMPSD(R(XMM0), M(&zero),

View file

@ -43,6 +43,7 @@ private:
void GenFifoFloatWrite();
void GenFifoXmm64Write();
void GenQuantizedLoads();
void GenQuantizedStores();
public:
void Init() {
@ -82,6 +83,7 @@ public:
const u8 *doReJit;
const u8 *pairedLoadQuantized[8];
const u8 *pairedStoreQuantized[8];
bool compareEnabled;
};

View file

@ -57,6 +57,8 @@ using namespace Gen;
void Jit64::bx(UGeckoInstruction inst)
{
NORMALBRANCH_START
INSTRUCTION_START;
if (inst.LK)
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));

View file

@ -26,9 +26,6 @@
#include "JitCache.h"
#include "JitRegCache.h"
#define INSTRUCTION_START
// #define INSTRUCTION_START Default(inst); return;
void Jit64::fp_arith_s(UGeckoInstruction inst)
{
if (inst.Rc || inst.OPCD != 59 || inst.SUBOP5 != 25) {

View file

@ -36,9 +36,6 @@
#include "JitAsm.h"
#include "JitRegCache.h"
// #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
// pshufb todo: MOVQ
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};

View file

@ -37,14 +37,19 @@
#include "JitAsm.h"
#include "JitRegCache.h"
#define INSTRUCTION_START
// #define INSTRUCTION_START Default(inst); return;
// The big problem is likely instructions that set the quantizers in the same block.
// We will have to break block after quantizers are written to.
void Jit64::psq_st(UGeckoInstruction inst)
{
Default(inst); return;
if (inst.W) {Default(inst); return;}
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
if (inst.OPCD == 61)
ibuild.EmitStoreGReg(addr, inst.RA);
val = ibuild.EmitLoadFReg(inst.RS);
val = ibuild.EmitCompactMRegToPacked(val);
ibuild.EmitStorePaired(val, addr, inst.I);
}
void Jit64::psq_l(UGeckoInstruction inst)

View file

@ -35,9 +35,6 @@
// cmppd, andpd, andnpd, or
// lfsx, ps_merge01 etc
// #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};