dolphin/Source/Core/VideoCommon/OpcodeDecoding.cpp
Pokechu22 0bcd3c79bb VertexLoader: Eliminate use of DataReader
DataReader is generally jank - it has a start and end pointer, but the end pointer is generally not used, and all of the vertex loaders mostly bypassed it anyways.

Wrapper code (the vertex loaer test, as well as Fifo.cpp and OpcodeDecoding.cpp) still uses it, as does the software vertex loader (which is not a subclass of VertexLoader). These can probably be eliminated later.
2022-11-22 17:17:11 -08:00

264 lines
7.7 KiB
C++

// Copyright 2008 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// DL facts:
// Ikaruga uses (nearly) NO display lists!
// Zelda WW uses TONS of display lists
// Zelda TP uses almost 100% display lists except menus (we like this!)
// Super Mario Galaxy has nearly all geometry and more than half of the state in DLs (great!)
// Note that it IS NOT GENERALLY POSSIBLE to precompile display lists! You can compile them as they
// are while interpreting them, and hope that the vertex format doesn't change, though, if you do
// it right when they are called. The reason is that the vertex format affects the sizes of the
// vertices.
#include "VideoCommon/OpcodeDecoding.h"
#include "Common/Assert.h"
#include "Common/Logging/Log.h"
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/Memmap.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderBase.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/XFMemory.h"
#include "VideoCommon/XFStructs.h"
namespace OpcodeDecoder
{
bool g_record_fifo_data = false;
template <bool is_preprocess>
class RunCallback final : public Callback
{
public:
OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data))
{
m_cycles += 18 + 6 * count;
if constexpr (!is_preprocess)
{
LoadXFReg(address, count, data);
INCSTAT(g_stats.this_frame.num_xf_loads);
}
}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value))
{
m_cycles += 12;
const u8 sub_command = command & CP_COMMAND_MASK;
if constexpr (!is_preprocess)
{
if (sub_command == MATINDEX_A)
{
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
VertexShaderManager::SetTexMatrixChangedA(value);
}
else if (sub_command == MATINDEX_B)
{
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
VertexShaderManager::SetTexMatrixChangedB(value);
}
else if (sub_command == VCD_LO || sub_command == VCD_HI)
{
VertexLoaderManager::g_main_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG);
VertexLoaderManager::g_bases_dirty = true;
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
}
else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B ||
sub_command == CP_VAT_REG_C)
{
VertexLoaderManager::g_main_vat_dirty[command & CP_VAT_MASK] = true;
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
}
else if (sub_command == ARRAY_BASE)
{
VertexLoaderManager::g_bases_dirty = true;
}
INCSTAT(g_stats.this_frame.num_cp_loads);
}
else if constexpr (is_preprocess)
{
if (sub_command == VCD_LO || sub_command == VCD_HI)
{
VertexLoaderManager::g_preprocess_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG);
}
else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B ||
sub_command == CP_VAT_REG_C)
{
VertexLoaderManager::g_preprocess_vat_dirty[command & CP_VAT_MASK] = true;
}
}
GetCPState().LoadCPReg(command, value);
}
OPCODE_CALLBACK(void OnBP(u8 command, u32 value))
{
m_cycles += 12;
if constexpr (is_preprocess)
{
LoadBPRegPreprocess(command, value, m_cycles);
}
else
{
LoadBPReg(command, value, m_cycles);
INCSTAT(g_stats.this_frame.num_bp_loads);
}
}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size))
{
m_cycles += 6;
if constexpr (is_preprocess)
PreprocessIndexedXF(array, index, address, size);
else
LoadIndexedXF(array, index, address, size);
}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices, const u8* vertex_data))
{
// load vertices
const u32 size = vertex_size * num_vertices;
const u32 bytes =
VertexLoaderManager::RunVertices<is_preprocess>(vat, primitive, num_vertices, vertex_data);
ASSERT(bytes == size);
// 4 GPU ticks per vertex, 3 CPU ticks per GPU tick
m_cycles += num_vertices * 4 * 3 + 6;
}
// This can't be inlined since it calls Run, which makes it recursive
// m_in_display_list prevents it from actually recursing infinitely, but there's no real benefit
// to inlining Run for the display list directly.
OPCODE_CALLBACK_NOINLINE(void OnDisplayList(u32 address, u32 size))
{
m_cycles += 6;
if (m_in_display_list)
{
WARN_LOG_FMT(VIDEO, "recursive display list detected");
}
else
{
m_in_display_list = true;
if constexpr (is_preprocess)
{
const u8* const start_address = Memory::GetPointer(address);
Fifo::PushFifoAuxBuffer(start_address, size);
if (start_address != nullptr)
{
Run(start_address, size, *this);
}
}
else
{
const u8* start_address;
if (Fifo::UseDeterministicGPUThread())
start_address = static_cast<u8*>(Fifo::PopFifoAuxBuffer(size));
else
start_address = Memory::GetPointer(address);
// Avoid the crash if Memory::GetPointer failed ..
if (start_address != nullptr)
{
// temporarily swap dl and non-dl (small "hack" for the stats)
g_stats.SwapDL();
Run(start_address, size, *this);
INCSTAT(g_stats.this_frame.num_dlists_called);
// un-swap
g_stats.SwapDL();
}
}
m_in_display_list = false;
}
}
OPCODE_CALLBACK(void OnNop(u32 count))
{
m_cycles += 6 * count; // Hm, this means that we scan over nop streams pretty slowly...
}
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data))
{
if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_UNKNOWN_METRICS)
{
// 'Zelda Four Swords' calls it and checks the metrics registers after that
m_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "GX 0x44");
}
else if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_INVL_VC)
{
// Invalidate Vertex Cache
m_cycles += 6;
DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)");
}
else
{
CommandProcessor::HandleUnknownOpcode(opcode, data, is_preprocess);
m_cycles += 1;
}
}
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size))
{
ASSERT(size >= 1);
if constexpr (!is_preprocess)
{
// Display lists get added directly into the FIFO stream since this same callback is used to
// process them.
if (g_record_fifo_data && static_cast<Opcode>(data[0]) != Opcode::GX_CMD_CALL_DL)
{
FifoRecorder::GetInstance().WriteGPCommand(data, size);
}
}
}
OPCODE_CALLBACK(CPState& GetCPState())
{
if constexpr (is_preprocess)
return g_preprocess_cp_state;
else
return g_main_cp_state;
}
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
VertexLoaderBase* loader = VertexLoaderManager::RefreshLoader<is_preprocess>(vat);
return loader->m_vertex_size;
}
u32 m_cycles = 0;
bool m_in_display_list = false;
};
template <bool is_preprocess>
u8* RunFifo(DataReader src, u32* cycles)
{
using CallbackT = RunCallback<is_preprocess>;
auto callback = CallbackT{};
u32 size = Run(src.GetPointer(), static_cast<u32>(src.size()), callback);
if (cycles != nullptr)
*cycles = callback.m_cycles;
src.Skip(size);
return src.GetPointer();
}
template u8* RunFifo<true>(DataReader src, u32* cycles);
template u8* RunFifo<false>(DataReader src, u32* cycles);
} // namespace OpcodeDecoder