VideoCommon: cleanup OpcodeDecoder

This commit is contained in:
degasus 2014-11-27 23:53:11 +01:00
parent 4b22885ed8
commit 21970c4a2a
8 changed files with 197 additions and 224 deletions

View file

@ -252,7 +252,7 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
} }
} }
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len); Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
s_video_buffer_pp_read_ptr = OpcodeDecoder_Preprocess(s_video_buffer_pp_read_ptr, write_ptr + len, false); s_video_buffer_pp_read_ptr = OpcodeDecoder_Run<true>(DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
// This would have to be locked if the GPU thread didn't spin. // This would have to be locked if the GPU thread didn't spin.
s_video_buffer_write_ptr = write_ptr + len; s_video_buffer_write_ptr = write_ptr + len;
} }
@ -294,7 +294,7 @@ void RunGpuLoop()
// See comment in SyncGPU // See comment in SyncGPU
if (write_ptr > seen_ptr) if (write_ptr > seen_ptr)
{ {
s_video_buffer_read_ptr = OpcodeDecoder_Run(s_video_buffer_read_ptr, write_ptr, nullptr, false); s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
{ {
std::lock_guard<std::mutex> vblk(s_video_buffer_lock); std::lock_guard<std::mutex> vblk(s_video_buffer_lock);
@ -330,7 +330,7 @@ void RunGpuLoop()
u8* write_ptr = s_video_buffer_write_ptr; u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder_Run(s_video_buffer_read_ptr, write_ptr, &cyclesExecuted, false); s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted)
@ -403,7 +403,7 @@ void RunGpu()
FPURoundMode::SaveSIMDState(); FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState(); FPURoundMode::LoadDefaultSIMDState();
ReadDataFromFifo(fifo.CPReadPointer); ReadDataFromFifo(fifo.CPReadPointer);
s_video_buffer_read_ptr = OpcodeDecoder_Run(s_video_buffer_read_ptr, s_video_buffer_write_ptr, nullptr, false); s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
FPURoundMode::LoadSIMDState(); FPURoundMode::LoadSIMDState();
} }

View file

@ -34,12 +34,8 @@
bool g_bRecordFifoData = false; bool g_bRecordFifoData = false;
u8* g_video_buffer_read_ptr;
static u8* s_video_buffer_pp_read_ptr;
static u32 InterpretDisplayList(u32 address, u32 size) static u32 InterpretDisplayList(u32 address, u32 size)
{ {
u8* old_pVideoData = g_video_buffer_read_ptr;
u8* startAddress; u8* startAddress;
if (g_use_deterministic_gpu_thread) if (g_use_deterministic_gpu_thread)
@ -55,32 +51,26 @@ static u32 InterpretDisplayList(u32 address, u32 size)
// temporarily swap dl and non-dl (small "hack" for the stats) // temporarily swap dl and non-dl (small "hack" for the stats)
Statistics::SwapDL(); Statistics::SwapDL();
OpcodeDecoder_Run(startAddress, startAddress + size, &cycles, true); OpcodeDecoder_Run(DataReader(startAddress, startAddress + size), &cycles, true);
INCSTAT(stats.thisFrame.numDListsCalled); INCSTAT(stats.thisFrame.numDListsCalled);
// un-swap // un-swap
Statistics::SwapDL(); Statistics::SwapDL();
} }
// reset to the old pointer
g_video_buffer_read_ptr = old_pVideoData;
return cycles; return cycles;
} }
static void InterpretDisplayListPreprocess(u32 address, u32 size) static void InterpretDisplayListPreprocess(u32 address, u32 size)
{ {
u8* old_read_ptr = s_video_buffer_pp_read_ptr;
u8* startAddress = Memory::GetPointer(address); u8* startAddress = Memory::GetPointer(address);
PushFifoAuxBuffer(startAddress, size); PushFifoAuxBuffer(startAddress, size);
if (startAddress != nullptr) if (startAddress != nullptr)
{ {
OpcodeDecoder_Preprocess(startAddress, startAddress + size, true); OpcodeDecoder_Run<true>(DataReader(startAddress, startAddress + size), nullptr, true);
} }
s_video_buffer_pp_read_ptr = old_read_ptr;
} }
static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess) static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
@ -131,29 +121,42 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
} }
} }
template <bool is_preprocess, u8** bufp> void OpcodeDecoder_Init()
static u32 Decode(u8* end, bool in_display_list)
{ {
u8 *opcodeStart = *bufp; }
if (*bufp == end)
return 0;
u8 cmd_byte = DataRead<u8>(bufp);
u32 cycles; void OpcodeDecoder_Shutdown()
{
}
template <bool is_preprocess>
u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list)
{
u32 totalCycles = 0;
u8* opcodeStart;
while (true)
{
src.WritePointer(&opcodeStart);
if (!src.size())
goto end;
u8 cmd_byte = src.Read<u8>();
int refarray; int refarray;
switch (cmd_byte) switch (cmd_byte)
{ {
case GX_NOP: case GX_NOP:
cycles = 6; // Hm, this means that we scan over nop streams pretty slowly... totalCycles += 6; // Hm, this means that we scan over nop streams pretty slowly...
break; break;
case GX_LOAD_CP_REG: //0x08 case GX_LOAD_CP_REG: //0x08
{ {
if (end - *bufp < 1 + 4) if (src.size() < 1 + 4)
return 0; goto end;
cycles = 12; totalCycles += 12;
u8 sub_cmd = DataRead<u8>(bufp); u8 sub_cmd = src.Read<u8>();
u32 value = DataRead<u32>(bufp); u32 value = src.Read<u32>();
LoadCPReg(sub_cmd, value, is_preprocess); LoadCPReg(sub_cmd, value, is_preprocess);
if (!is_preprocess) if (!is_preprocess)
INCSTAT(stats.thisFrame.numCPLoads); INCSTAT(stats.thisFrame.numCPLoads);
@ -162,24 +165,21 @@ static u32 Decode(u8* end, bool in_display_list)
case GX_LOAD_XF_REG: case GX_LOAD_XF_REG:
{ {
if (end - *bufp < 4) if (src.size() < 4)
return 0; goto end;
u32 Cmd2 = DataRead<u32>(bufp); u32 Cmd2 = src.Read<u32>();
int transfer_size = ((Cmd2 >> 16) & 15) + 1; int transfer_size = ((Cmd2 >> 16) & 15) + 1;
if ((size_t) (end - *bufp) < transfer_size * sizeof(u32)) if (src.size() < transfer_size * sizeof(u32))
return 0; goto end;
cycles = 18 + 6 * transfer_size; totalCycles += 18 + 6 * transfer_size;
if (!is_preprocess) if (!is_preprocess)
{ {
u32 xf_address = Cmd2 & 0xFFFF; u32 xf_address = Cmd2 & 0xFFFF;
LoadXFReg(transfer_size, xf_address); LoadXFReg(transfer_size, xf_address, src);
INCSTAT(stats.thisFrame.numXFLoads); INCSTAT(stats.thisFrame.numXFLoads);
} }
else src.Skip<u32>(transfer_size);
{
*bufp += transfer_size * sizeof(u32);
}
} }
break; break;
@ -196,25 +196,25 @@ static u32 Decode(u8* end, bool in_display_list)
refarray = 0xF; refarray = 0xF;
goto load_indx; goto load_indx;
load_indx: load_indx:
if (end - *bufp < 4) if (src.size() < 4)
return 0; goto end;
cycles = 6; totalCycles += 6;
if (is_preprocess) if (is_preprocess)
PreprocessIndexedXF(DataRead<u32>(bufp), refarray); PreprocessIndexedXF(src.Read<u32>(), refarray);
else else
LoadIndexedXF(DataRead<u32>(bufp), refarray); LoadIndexedXF(src.Read<u32>(), refarray);
break; break;
case GX_CMD_CALL_DL: case GX_CMD_CALL_DL:
{ {
if (end - *bufp < 8) if (src.size() < 8)
return 0; goto end;
u32 address = DataRead<u32>(bufp); u32 address = src.Read<u32>();
u32 count = DataRead<u32>(bufp); u32 count = src.Read<u32>();
if (in_display_list) if (in_display_list)
{ {
cycles = 6; totalCycles += 6;
WARN_LOG(VIDEO,"recursive display list detected"); WARN_LOG(VIDEO,"recursive display list detected");
} }
else else
@ -222,18 +222,18 @@ static u32 Decode(u8* end, bool in_display_list)
if (is_preprocess) if (is_preprocess)
InterpretDisplayListPreprocess(address, count); InterpretDisplayListPreprocess(address, count);
else else
cycles = 6 + InterpretDisplayList(address, count); totalCycles += 6 + InterpretDisplayList(address, count);
} }
} }
break; break;
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
cycles = 6; totalCycles += 6;
DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte); DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
break; break;
case GX_CMD_INVL_VC: // Invalidate Vertex Cache case GX_CMD_INVL_VC: // Invalidate Vertex Cache
cycles = 6; totalCycles += 6;
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
break; break;
@ -241,10 +241,10 @@ static u32 Decode(u8* end, bool in_display_list)
// In skipped_frame case: We have to let BP writes through because they set // In skipped_frame case: We have to let BP writes through because they set
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead. // tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
{ {
if (end - *bufp < 4) if (src.size() < 4)
return 0; goto end;
cycles = 12; totalCycles += 12;
u32 bp_cmd = DataRead<u32>(bufp); u32 bp_cmd = src.Read<u32>();
if (is_preprocess) if (is_preprocess)
{ {
LoadBPRegPreprocess(bp_cmd); LoadBPRegPreprocess(bp_cmd);
@ -261,18 +261,17 @@ static u32 Decode(u8* end, bool in_display_list)
default: default:
if ((cmd_byte & 0xC0) == 0x80) if ((cmd_byte & 0xC0) == 0x80)
{ {
cycles = 1600;
// load vertices // load vertices
if (end - *bufp < 2) if (src.size() < 2)
return 0; goto end;
u16 num_vertices = DataRead<u16>(bufp); u16 num_vertices = src.Read<u16>();
if (is_preprocess) if (is_preprocess)
{ {
size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess); size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess);
if ((size_t) (end - *bufp) < size) if (src.size() < size)
return 0; goto end;
*bufp += size; src.Skip(size);
} }
else else
{ {
@ -280,72 +279,36 @@ static u32 Decode(u8* end, bool in_display_list)
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
num_vertices, num_vertices,
end - *bufp, src,
g_bSkipCurrentFrame)) g_bSkipCurrentFrame))
return 0; goto end;
} }
totalCycles += 1600;
} }
else else
{ {
UnknownOpcode(cmd_byte, opcodeStart, is_preprocess); UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
cycles = 1; totalCycles += 1;
} }
break; break;
} }
// Display lists get added directly into the FIFO stream // Display lists get added directly into the FIFO stream
if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL) if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(*bufp - opcodeStart));
// In is_preprocess mode, we don't actually care about cycles, at least for
// now... make sure the compiler realizes that.
return is_preprocess ? 1 : cycles;
}
void OpcodeDecoder_Init()
{
g_video_buffer_read_ptr = GetVideoBufferStartPtr();
}
void OpcodeDecoder_Shutdown()
{
}
u8* OpcodeDecoder_Run(u8* start, u8* end, u32* cycles, bool in_display_list)
{
g_video_buffer_read_ptr = start;
u32 totalCycles = 0;
while (true)
{ {
u8* old = g_video_buffer_read_ptr; u8* opcodeEnd;
u32 cycles_op = Decode</*is_preprocess*/ false, &g_video_buffer_read_ptr>(end, in_display_list); src.WritePointer(&opcodeEnd);
if (cycles_op == 0) FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(opcodeEnd - opcodeStart));
{
g_video_buffer_read_ptr = old;
break;
} }
totalCycles += cycles_op;
} }
end:
if (cycles) if (cycles)
{ {
*cycles = totalCycles; *cycles = totalCycles;
} }
return g_video_buffer_read_ptr; return opcodeStart;
} }
u8* OpcodeDecoder_Preprocess(u8* start, u8 *end, bool in_display_list) template u8* OpcodeDecoder_Run<true>(DataReader src, u32* cycles, bool in_display_list);
{ template u8* OpcodeDecoder_Run<false>(DataReader src, u32* cycles, bool in_display_list);
s_video_buffer_pp_read_ptr = start;
while (true)
{
u8* old = s_video_buffer_pp_read_ptr;
u32 cycles = Decode</*is_preprocess*/ true, &s_video_buffer_pp_read_ptr>(end, in_display_list);
if (cycles == 0)
{
s_video_buffer_pp_read_ptr = old;
break;
}
}
return s_video_buffer_pp_read_ptr;
}

View file

@ -5,6 +5,7 @@
#pragma once #pragma once
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "VideoCommon/DataReader.h"
#define GX_NOP 0x00 #define GX_NOP 0x00
@ -40,5 +41,6 @@ extern bool g_bRecordFifoData;
void OpcodeDecoder_Init(); void OpcodeDecoder_Init();
void OpcodeDecoder_Shutdown(); void OpcodeDecoder_Shutdown();
u8* OpcodeDecoder_Run(u8* start, u8* end, u32* cycles, bool in_display_list);
u8* OpcodeDecoder_Preprocess(u8* start, u8* end, bool in_display_list); template <bool is_preprocess = false>
u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list);

View file

@ -45,6 +45,9 @@ int colElements[2];
GC_ALIGNED128(float posScale[4]); GC_ALIGNED128(float posScale[4]);
GC_ALIGNED64(float tcScale[8][2]); GC_ALIGNED64(float tcScale[8][2]);
// This pointer is used as the source for all fixed function loader calls
u8* g_video_buffer_read_ptr;
static const float fractionTable[32] = { static const float fractionTable[32] = {
1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3),
1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7),

View file

@ -130,7 +130,7 @@ static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state)
return loader; return loader;
} }
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing) bool RunVertices(int vtx_attr_group, int primitive, int count, DataReader& src, bool skip_drawing)
{ {
if (!count) if (!count)
return true; return true;
@ -140,13 +140,13 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
VertexLoader* loader = RefreshLoader(vtx_attr_group, state); VertexLoader* loader = RefreshLoader(vtx_attr_group, state);
size_t size = count * loader->GetVertexSize(); size_t size = count * loader->GetVertexSize();
if (buf_size < size) if (src.size() < size)
return false; return false;
if (skip_drawing || (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5)) if (skip_drawing || (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5))
{ {
// if cull mode is CULL_ALL, ignore triangles and quads // if cull mode is CULL_ALL, ignore triangles and quads
DataSkip((u32)size); src.Skip(size);
return true; return true;
} }
@ -160,7 +160,10 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
VertexManager::PrepareForAdditionalData(primitive, count, VertexManager::PrepareForAdditionalData(primitive, count,
loader->GetNativeVertexDeclaration().stride); loader->GetNativeVertexDeclaration().stride);
src.WritePointer(&g_video_buffer_read_ptr);
loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count); loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count);
src = g_video_buffer_read_ptr;
IndexGenerator::AddIndices(primitive, count); IndexGenerator::AddIndices(primitive, count);

View file

@ -7,6 +7,7 @@
#include <string> #include <string>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/NativeVertexFormat.h"
namespace VertexLoaderManager namespace VertexLoaderManager
@ -18,7 +19,7 @@ namespace VertexLoaderManager
int GetVertexSize(int vtx_attr_group, bool preprocess); int GetVertexSize(int vtx_attr_group, bool preprocess);
// Returns false if buf_size is insufficient. // Returns false if buf_size is insufficient.
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing = false); bool RunVertices(int vtx_attr_group, int primitive, int count, DataReader& src, bool skip_drawing = false);
// For debugging // For debugging
void AppendListToString(std::string *dest); void AppendListToString(std::string *dest);

View file

@ -6,6 +6,7 @@
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "VideoCommon/CPMemory.h" #include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
// Lighting // Lighting
@ -273,6 +274,6 @@ struct XFMemory
extern XFMemory xfmem; extern XFMemory xfmem;
void LoadXFReg(u32 transferSize, u32 address); void LoadXFReg(u32 transferSize, u32 address, DataReader src);
void LoadIndexedXF(u32 val, int array); void LoadIndexedXF(u32 val, int array);
void PreprocessIndexedXF(u32 val, int refarray); void PreprocessIndexedXF(u32 val, int refarray);

View file

@ -19,14 +19,14 @@ static void XFMemWritten(u32 transferSize, u32 baseAddress)
VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize); VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize);
} }
static void XFRegWritten(int transferSize, u32 baseAddress) static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
{ {
u32 address = baseAddress; u32 address = baseAddress;
u32 dataIndex = 0; u32 dataIndex = 0;
while (transferSize > 0 && address < 0x1058) while (transferSize > 0 && address < 0x1058)
{ {
u32 newValue = DataPeek<u32>(dataIndex * sizeof(u32)); u32 newValue = src.Peek<u32>(dataIndex * sizeof(u32));
u32 nextAddress = address + 1; u32 nextAddress = address + 1;
switch (address) switch (address)
@ -193,7 +193,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress)
} }
} }
void LoadXFReg(u32 transferSize, u32 baseAddress) void LoadXFReg(u32 transferSize, u32 baseAddress, DataReader src)
{ {
// do not allow writes past registers // do not allow writes past registers
if (baseAddress + transferSize > 0x1058) if (baseAddress + transferSize > 0x1058)
@ -229,17 +229,17 @@ void LoadXFReg(u32 transferSize, u32 baseAddress)
XFMemWritten(xfMemTransferSize, xfMemBase); XFMemWritten(xfMemTransferSize, xfMemBase);
for (u32 i = 0; i < xfMemTransferSize; i++) for (u32 i = 0; i < xfMemTransferSize; i++)
{ {
((u32*)&xfmem)[xfMemBase + i] = DataRead<u32>(); ((u32*)&xfmem)[xfMemBase + i] = src.Read<u32>();
} }
} }
// write to XF regs // write to XF regs
if (transferSize > 0) if (transferSize > 0)
{ {
XFRegWritten(transferSize, baseAddress); XFRegWritten(transferSize, baseAddress, src);
for (u32 i = 0; i < transferSize; i++) for (u32 i = 0; i < transferSize; i++)
{ {
((u32*)&xfmem)[baseAddress + i] = DataRead<u32>(); ((u32*)&xfmem)[baseAddress + i] = src.Read<u32>();
} }
} }
} }