For unknown reasons, this patch fixes Beyond Good and Evil and Metroid intro in 32-bit mode only. Yeah, I have some work to do on the JIT.

Also adds some minor stuff like memory card write notification, plus some minor SSSE3 optimizations.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@179 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-11 19:35:38 +00:00
parent 93429219ab
commit 29102ecbc6
15 changed files with 183 additions and 50 deletions

View file

@ -2,3 +2,18 @@
GFXPlugin = Plugins\Plugin_VideoOGL.dll GFXPlugin = Plugins\Plugin_VideoOGL.dll
DSPPlugin = Plugins\Plugin_DSP_NULL.dll DSPPlugin = Plugins\Plugin_DSP_NULL.dll
PadPlugin = Plugins\Plugin_PadSimple.dll PadPlugin = Plugins\Plugin_PadSimple.dll
[General]
LastFilename =
GCMPathes = 1
GCMPath0 = E:\GCM
[Core]
GFXPlugin = Plugins\Plugin_VideoOGL.dll
DSPPlugin = Plugins\Plugin_DSP.dll
PadPlugin = Plugins\Plugin_PadSimple.dll
HLEBios = True
UseDynarec = True
UseDualCore = True
Throttle = False
LockThreads = True
DefaultGCM =
OptimizeQuantizers = True

View file

@ -2,3 +2,18 @@
GFXPlugin = Plugins\Plugin_VideoOGL.dll GFXPlugin = Plugins\Plugin_VideoOGL.dll
DSPPlugin = Plugins\Plugin_DSP_NULL.dll DSPPlugin = Plugins\Plugin_DSP_NULL.dll
PadPlugin = Plugins\Plugin_PadSimple.dll PadPlugin = Plugins\Plugin_PadSimple.dll
[General]
LastFilename =
GCMPathes = 1
GCMPath0 = E:\GCM
[Core]
GFXPlugin = Plugins\Plugin_VideoOGL.dll
DSPPlugin = Plugins\Plugin_DSP.dll
PadPlugin = Plugins\Plugin_PadSimple.dll
HLEBios = True
UseDynarec = True
UseDualCore = False
Throttle = False
LockThreads = True
DefaultGCM =
OptimizeQuantizers = True

View file

@ -71,6 +71,7 @@ typedef signed __int16 s16;
typedef signed __int8 s8; typedef signed __int8 s8;
#define GC_ALIGNED16(x) __declspec(align(16)) x #define GC_ALIGNED16(x) __declspec(align(16)) x
#define GC_ALIGNED32(x) __declspec(align(32)) x
#define GC_ALIGNED64(x) __declspec(align(64)) x #define GC_ALIGNED64(x) __declspec(align(64)) x
#define GC_ALIGNED16_DECL(x) __declspec(align(16)) x #define GC_ALIGNED16_DECL(x) __declspec(align(16)) x
#define GC_ALIGNED64_DECL(x) __declspec(align(64)) x #define GC_ALIGNED64_DECL(x) __declspec(align(64)) x
@ -101,6 +102,7 @@ typedef union _LARGE_INTEGER
#endif #endif
#define GC_ALIGNED16(x) __attribute((aligned(16))) x #define GC_ALIGNED16(x) __attribute((aligned(16))) x
#define GC_ALIGNED32(x) __attribute((aligned(16))) x
#define GC_ALIGNED64(x) __attribute((aligned(64))) x #define GC_ALIGNED64(x) __attribute((aligned(64))) x
#define GC_ALIGNED16_DECL(x) __attribute((aligned(16))) x #define GC_ALIGNED16_DECL(x) __attribute((aligned(16))) x
#define GC_ALIGNED64_DECL(x) __attribute((aligned(64))) x #define GC_ALIGNED64_DECL(x) __attribute((aligned(64))) x

View file

@ -974,8 +974,7 @@ namespace Gen
void MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);} void MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);}
void MOVQ_xmm(X64Reg dest, OpArg arg) { void MOVQ_xmm(X64Reg dest, OpArg arg) {
if (dest > 7) #ifdef _M_X64
{
// Alternate encoding // Alternate encoding
// This does not display correctly in MSVC's debugger, it thinks it's a MOVD // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
arg.operandReg = dest; arg.operandReg = dest;
@ -984,14 +983,13 @@ namespace Gen
Write8(0x0f); Write8(0x0f);
Write8(0x6E); Write8(0x6E);
arg.WriteRest(0); arg.WriteRest(0);
} else { #else
arg.operandReg = dest; arg.operandReg = dest;
arg.WriteRex(false);
Write8(0xF3); Write8(0xF3);
Write8(0x0f); Write8(0x0f);
Write8(0x7E); Write8(0x7E);
arg.WriteRest(0); arg.WriteRest(0);
} #endif
} }
void MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);} void MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);}

View file

@ -93,7 +93,7 @@ Common::Event emuThreadGoing;
bool PanicAlertToVideo(const char* text, bool yes_no) bool PanicAlertToVideo(const char* text, bool yes_no)
{ {
PluginVideo::Video_AddMessage(text,3000); DisplayMessage(text, 3000);
return true; return true;
} }
@ -140,13 +140,24 @@ bool Init(const SCoreStartupParameter _CoreParameter)
// all right ... here we go // all right ... here we go
Host_SetWaitCursor(false); Host_SetWaitCursor(false);
PluginVideo::Video_AddMessage("Emulation started.",3000); DisplayMessage("Emulation started.", 3000);
//RegisterPanicAlertHandler(PanicAlertToVideo); //RegisterPanicAlertHandler(PanicAlertToVideo);
return true; return true;
} }
void DisplayMessage(const std::string &message, int time_in_ms)
{
PluginVideo::Video_AddMessage(message.c_str(), time_in_ms);
}
void DisplayMessage(const char *message, int time_in_ms)
{
PluginVideo::Video_AddMessage(message, time_in_ms);
}
// Called from GUI thread or VI thread // Called from GUI thread or VI thread
void Stop() // - Hammertime! void Stop() // - Hammertime!
{ {

View file

@ -54,6 +54,8 @@ namespace Core
extern bool bWriteTrace; extern bool bWriteTrace;
void StartTrace(bool write); void StartTrace(bool write);
void DisplayMessage(const std::string &message, int time_in_ms); // This displays messages in a user-visible way.
void DisplayMessage(const char *message, int time_in_ms); // This displays messages in a user-visible way.
int SyncTrace(); int SyncTrace();
void SetBlockStart(u32 addr); void SetBlockStart(u32 addr);

View file

@ -84,6 +84,8 @@ void CEXIMemoryCard::Flush()
} }
fwrite(memory_card_content, memory_card_size, 1, pFile); fwrite(memory_card_content, memory_card_size, 1, pFile);
fclose(pFile); fclose(pFile);
Core::DisplayMessage(StringFromFormat("Wrote memory card contents to %s", m_strFilename.c_str()), 4000);
} }
void CEXIMemoryCard::FlushCallback(u64 userdata, int cyclesLate) void CEXIMemoryCard::FlushCallback(u64 userdata, int cyclesLate)

View file

@ -37,7 +37,7 @@ namespace GPFifo
// Both of these should actually work! Only problem is that we have to decide at run time, // Both of these should actually work! Only problem is that we have to decide at run time,
// the same function could use both methods. Compile 2 different versions of each such block? // the same function could use both methods. Compile 2 different versions of each such block?
u8 m_gatherPipe[GATHER_PIPE_SIZE*16]; //more room, for the fastmodes u8 GC_ALIGNED32(m_gatherPipe[GATHER_PIPE_SIZE*16]); //more room, for the fastmodes
// pipe counter // pipe counter
u32 m_gatherPipeCount = 0; u32 m_gatherPipeCount = 0;

View file

@ -28,6 +28,11 @@ enum
GATHER_PIPE_SIZE = 32 GATHER_PIPE_SIZE = 32
}; };
extern u8 m_gatherPipe[GATHER_PIPE_SIZE*16]; //more room, for the fastmodes
// pipe counter
extern u32 m_gatherPipeCount;
// Init // Init
void Init(); void Init();

View file

@ -183,6 +183,20 @@ void Generate()
SetJumpTarget(pLesser); SetJumpTarget(pLesser);
OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0 OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0
RET(); RET();
// Fast write routines - special case the most common hardware write
// TODO: use this.
// Even in x86, the param values will be in the right registers.
/*
const u8 *fastMemWrite8 = AlignCode16();
CMP(32, R(ABI_PARAM2), Imm32(0xCC008000));
FixupBranch skip_fast_write = J_CC(CC_NE, false);
MOV(32, EAX, M(&m_gatherPipeCount));
MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1);
ADD(32, 1, M(&m_gatherPipeCount));
RET();
SetJumpTarget(skip_fast_write);
CALL((void *)&Memory::Write_U8);*/
} }
#elif defined(_M_X64) #elif defined(_M_X64)

View file

@ -139,6 +139,15 @@ namespace Jit64
fpr.UnlockAll(); fpr.UnlockAll();
} }
void fmrx(UGeckoInstruction inst)
{
INSTRUCTION_START;
Default(inst); return;
}
void fcmpx(UGeckoInstruction inst) void fcmpx(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;

View file

@ -27,6 +27,7 @@
#include "../../HW/PixelEngine.h" #include "../../HW/PixelEngine.h"
#include "../../HW/Memmap.h" #include "../../HW/Memmap.h"
#include "../PPCTables.h" #include "../PPCTables.h"
#include "CPUDetect.h"
#include "x64Emitter.h" #include "x64Emitter.h"
#include "ABI.h" #include "ABI.h"
@ -51,6 +52,7 @@ namespace Jit64
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15}; const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15}; const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0};
const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}; const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
static u64 GC_ALIGNED16(temp64); static u64 GC_ALIGNED16(temp64);
@ -115,12 +117,18 @@ void lfd(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(a); gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
fpr.LoadToX64(d, false);
fpr.Lock(d);
if (cpu_info.bSSE3NewInstructions) {
X64Reg xd = fpr.RX(d);
MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
PSHUFB(xd, M((void *)bswapShuffle1x8Dupe));
} else {
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
BSWAP(64, EAX); BSWAP(64, EAX);
MOV(64, M(&temp64), R(EAX)); MOV(64, M(&temp64), R(EAX));
fpr.Lock(d);
fpr.LoadToX64(d, false);
MOVDDUP(fpr.RX(d), M(&temp64)); MOVDDUP(fpr.RX(d), M(&temp64));
}
gpr.UnlockAll(); gpr.UnlockAll();
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -128,7 +136,10 @@ void lfd(UGeckoInstruction inst)
void stfd(UGeckoInstruction inst) void stfd(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
if (!cpu_info.bSSSE3NewInstructions)
{
DISABLE_32BIT; DISABLE_32BIT;
}
int s = inst.RS; int s = inst.RS;
int a = inst.RA; int a = inst.RA;
if (!a) if (!a)
@ -140,12 +151,25 @@ void stfd(UGeckoInstruction inst)
gpr.Lock(a); gpr.Lock(a);
fpr.Lock(s); fpr.Lock(s);
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(a));
#ifdef _M_IX86
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
#endif
if (cpu_info.bSSSE3NewInstructions) {
MOVAPS(XMM0, fpr.R(s));
PSHUFB(XMM0, M((void *)bswapShuffle1x8));
#ifdef _M_X64
MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, offset), XMM0);
#else
MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base + offset), XMM0);
#endif
} else {
fpr.LoadToX64(s, true, false); fpr.LoadToX64(s, true, false);
MOVSD(M(&temp64), fpr.RX(s)); MOVSD(M(&temp64), fpr.RX(s));
MOV(32, R(ABI_PARAM1), gpr.R(a));
MOV(64, R(EAX), M(&temp64)); MOV(64, R(EAX), M(&temp64));
BSWAP(64, EAX); BSWAP(64, EAX);
MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX)); MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX));
}
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();
@ -154,6 +178,7 @@ void stfd(UGeckoInstruction inst)
void stfs(UGeckoInstruction inst) void stfs(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
DISABLE_32BIT;
bool update = inst.OPCD & 1; bool update = inst.OPCD & 1;
int s = inst.RS; int s = inst.RS;
int a = inst.RA; int a = inst.RA;
@ -192,10 +217,24 @@ void lfsx(UGeckoInstruction inst)
MOV(32, R(EAX), gpr.R(inst.RB)); MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA) if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA)); ADD(32, R(EAX), gpr.R(inst.RA));
if (cpu_info.bSSSE3NewInstructions) {
// PanicAlert("SSE3 supported!");
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
#ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVD_xmm(r, MDisp(EAX, (u32)Memory::base));
#else
MOVD_xmm(r, MComplex(RBX, EAX, SCALE_1, 0));
#endif
PSHUFB(r, M((void *)bswapShuffle1x4));
CVTSS2SD(r, R(r));
MOVDDUP(r, R(r));
} else {
UnsafeLoadRegToReg(EAX, EAX, 32, false); UnsafeLoadRegToReg(EAX, EAX, 32, false);
MOV(32, M(&temp32), R(EAX)); MOV(32, M(&temp32), R(EAX));
CVTSS2SD(XMM0, M(&temp32)); CVTSS2SD(XMM0, M(&temp32));
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0)); MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
}
fpr.UnlockAll(); fpr.UnlockAll();
} }

View file

@ -27,6 +27,7 @@
#include "../../HW/PixelEngine.h" #include "../../HW/PixelEngine.h"
#include "../../HW/Memmap.h" #include "../../HW/Memmap.h"
#include "../PPCTables.h" #include "../PPCTables.h"
#include "CPUDetect.h"
#include "x64Emitter.h" #include "x64Emitter.h"
#include "ABI.h" #include "ABI.h"
@ -225,6 +226,8 @@ void psq_st(UGeckoInstruction inst)
} }
} }
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(pbswapShuffleNoop[16]) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
void psq_l(UGeckoInstruction inst) void psq_l(UGeckoInstruction inst)
{ {
@ -247,21 +250,38 @@ void psq_l(UGeckoInstruction inst)
} }
int offset = inst.SIMM_12; int offset = inst.SIMM_12;
switch (ldType) { switch (ldType) {
case QUANTIZE_FLOAT: case QUANTIZE_FLOAT: // We know this is from RAM, so we don't need to check the address.
{ {
#ifdef _M_X64 #ifdef _M_X64
gpr.LoadToX64(inst.RA); gpr.LoadToX64(inst.RA, true, update);
fpr.LoadToX64(inst.RS, false);
if (cpu_info.bSSSE3NewInstructions) {
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
PSHUFB(xd, M((void *)pbswapShuffle2x4));
CVTPS2PD(xd, R(xd));
} else {
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
BSWAP(64, RAX); BSWAP(64, RAX);
MOV(64, M(&psTemp[0]), R(RAX)); MOV(64, M(&psTemp[0]), R(RAX));
fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg(); X64Reg r = fpr.R(inst.RS).GetSimpleReg();
CVTPS2PD(r, M(&psTemp[0])); CVTPS2PD(r, M(&psTemp[0]));
SHUFPD(r, R(r), 1); SHUFPD(r, R(r), 1);
}
if (update) if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset)); ADD(32, gpr.R(inst.RA), Imm32(offset));
break; break;
#else #else
if (cpu_info.bSSSE3NewInstructions) {
gpr.LoadToX64(inst.RA, true, update);
fpr.LoadToX64(inst.RS, false);
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
MOV(32, R(EAX), gpr.R(inst.RA));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset));
PSHUFB(xd, M((void *)pbswapShuffle2x4));
CVTPS2PD(xd, R(xd));
} else {
gpr.FlushR(ECX); gpr.FlushR(ECX);
gpr.LockX(ECX); gpr.LockX(ECX);
gpr.LoadToX64(inst.RA); gpr.LoadToX64(inst.RA);
@ -277,9 +297,10 @@ void psq_l(UGeckoInstruction inst)
fpr.LoadToX64(inst.RS, false); fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg(); X64Reg r = fpr.R(inst.RS).GetSimpleReg();
CVTPS2PD(r, M(&psTemp[0])); CVTPS2PD(r, M(&psTemp[0]));
gpr.UnlockAllX();
}
if (update) if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset)); ADD(32, gpr.R(inst.RA), Imm32(offset));
gpr.UnlockAllX();
break; break;
#endif #endif
} }

View file

@ -33,7 +33,7 @@ public:
{ {
TCacheEntry() : texture(0), addr(0), hash(0), w(0), h(0), isRenderTarget(false), isUpsideDown(false), isNonPow2(true), bHaveMipMaps(false) { mode.hex = 0xFCFCFCFC; } TCacheEntry() : texture(0), addr(0), hash(0), w(0), h(0), isRenderTarget(false), isUpsideDown(false), isNonPow2(true), bHaveMipMaps(false) { mode.hex = 0xFCFCFCFC; }
u32 texture; GLuint texture;
u32 addr; u32 addr;
u32 hash; u32 hash;
u32 paletteHash; u32 paletteHash;