Extend TMEM cache implementation

Now works with games that deliberately avoid invalidating TMEM because
they know textures are too large to fit:

 * Sonic Riders
 * Metal Arms: Glitch in the System
 * Godzilla: Destroy All Monsters Melee
 * NHL Slapshot
 * Tak and the Power of Juju
 * Night at the Museum: Battle of the Smithsonian
 * 428: Fūsa Sareta Shibuya de
This commit is contained in:
Scott Mansell 2019-09-06 21:22:47 +12:00
parent eee302c040
commit 88bd10cd30
8 changed files with 301 additions and 32 deletions

View file

@ -650,6 +650,7 @@
<ClInclude Include="VideoCommon\TextureDecoder_Util.h" />
<ClInclude Include="VideoCommon\TextureDecoder.h" />
<ClInclude Include="VideoCommon\TextureInfo.h" />
<ClInclude Include="VideoCommon\TMEM.h" />
<ClInclude Include="VideoCommon\UberShaderCommon.h" />
<ClInclude Include="VideoCommon\UberShaderPixel.h" />
<ClInclude Include="VideoCommon\UberShaderVertex.h" />
@ -1209,6 +1210,7 @@
<ClCompile Include="VideoCommon\TextureConverterShaderGen.cpp" />
<ClCompile Include="VideoCommon\TextureDecoder_Common.cpp" />
<ClCompile Include="VideoCommon\TextureInfo.cpp" />
<ClCompile Include="VideoCommon\TMEM.cpp" />
<ClCompile Include="VideoCommon\UberShaderCommon.cpp" />
<ClCompile Include="VideoCommon\UberShaderPixel.cpp" />
<ClCompile Include="VideoCommon\UberShaderVertex.cpp" />

View file

@ -32,6 +32,7 @@
#include "VideoCommon/PixelEngine.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/TMEM.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VertexShaderManager.h"
@ -353,7 +354,7 @@ static void BPWritten(const BPCmd& bp)
if (OpcodeDecoder::g_record_fifo_data)
FifoRecorder::GetInstance().UseMemory(addr, tlutXferCount, MemoryUpdate::TMEM);
TextureCacheBase::InvalidateAllBindPoints();
TMEM::InvalidateAll();
return;
}
@ -459,8 +460,7 @@ static void BPWritten(const BPCmd& bp)
}
return;
case BPMEM_TEXINVALIDATE:
// TODO: Needs some restructuring in TextureCacheBase.
TextureCacheBase::InvalidateAllBindPoints();
TMEM::Invalidate(bp.newvalue);
return;
case BPMEM_ZCOMPARE: // Set the Z-Compare and EFB pixel format
@ -568,7 +568,7 @@ static void BPWritten(const BPCmd& bp)
if (OpcodeDecoder::g_record_fifo_data)
FifoRecorder::GetInstance().UseMemory(src_addr, bytes_read, MemoryUpdate::TMEM);
TextureCacheBase::InvalidateAllBindPoints();
TMEM::InvalidateAll();
}
return;
@ -661,7 +661,7 @@ static void BPWritten(const BPCmd& bp)
// ------------------------
case TexUnitAddress::Register::SETMODE0:
case TexUnitAddress::Register::SETMODE1:
TextureCacheBase::InvalidateAllBindPoints();
TMEM::ConfigurationChanged(tex_address, bp.newvalue);
return;
// --------------------------------------------
@ -675,7 +675,7 @@ static void BPWritten(const BPCmd& bp)
case TexUnitAddress::Register::SETIMAGE1:
case TexUnitAddress::Register::SETIMAGE2:
case TexUnitAddress::Register::SETIMAGE3:
TextureCacheBase::InvalidateAllBindPoints();
TMEM::ConfigurationChanged(tex_address, bp.newvalue);
return;
// -------------------------------
@ -683,7 +683,7 @@ static void BPWritten(const BPCmd& bp)
// BPMEM_TX_SETTLUT - Format, TMEM Offset (offset of TLUT from start of TMEM high bank > > 5)
// -------------------------------
case TexUnitAddress::Register::SETTLUT:
TextureCacheBase::InvalidateAllBindPoints();
TMEM::ConfigurationChanged(tex_address, bp.newvalue);
return;
case TexUnitAddress::Register::UNKNOWN:
break; // Not handled

View file

@ -90,6 +90,8 @@ add_library(videocommon
TextureDecoder_Util.h
TextureInfo.cpp
TextureInfo.h
TMEM.cpp
TMEM.h
UberShaderCommon.cpp
UberShaderCommon.h
UberShaderPixel.cpp
@ -170,12 +172,12 @@ if(FFmpeg_FOUND)
FFmpeg::swresample
FFmpeg::swscale
)
if(APPLE)
if(APPLE)
target_link_libraries(videocommon PRIVATE
${COREMEDIA_LIBRARY}
${VIDEOTOOLBOX_LIBRARY}
${COREVIDEO_LIBRARY}
${AUDIOTOOLBOX_LIBRARY}
${AUDIOTOOLBOX_LIBRARY}
)
endif()
endif()

View file

@ -0,0 +1,224 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <array>
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/TMEM.h"
namespace TMEM
{
struct TextureUnitState
{
enum class State
{
// Cache is invalid. Configuration has changed
INVALID,
// Valid, but not cached due to either being too big, or overlapping with another texture unit
VALID,
// Texture unit has cached all of the previous draw
CACHED,
};
struct BankConfig
{
u32 width = 0;
u32 height = 0;
u32 base = 0;
u32 size = 0;
bool Overlaps(const BankConfig& other) const;
};
BankConfig even;
BankConfig odd;
State state;
bool Overlaps(const TextureUnitState& other) const;
};
static u32 CalculateUnitSize(TextureUnitState::BankConfig bank_config);
static std::array<TextureUnitState, 8> s_unit;
// On TMEM configuration changed:
// 1. invalidate stage.
void ConfigurationChanged(TexUnitAddress bp_addr, u32 config)
{
TextureUnitState& unit_state = s_unit[bp_addr.GetUnitID()];
// If anything has changed, we can't assume existing state is still valid.
unit_state.state = TextureUnitState::State::INVALID;
// Note: BPStructs has already filtered out NOP changes before calling us
switch (bp_addr.Reg)
{
case TexUnitAddress::Register::SETIMAGE1:
{
// Image Type and Even bank's Cache Height, Cache Width, TMEM Offset
TexImage1 even = {.hex = config};
unit_state.even = {even.cache_width, even.cache_height, even.tmem_even << 5, 0};
break;
}
case TexUnitAddress::Register::SETIMAGE2:
{
// Odd bank's Cache Height, Cache Width, TMEM Offset
TexImage2 odd = {.hex = config};
unit_state.odd = {odd.cache_width, odd.cache_height, odd.tmem_odd << 5, 0};
break;
}
default:
// Something else has changed
return;
}
}
void InvalidateAll()
{
for (auto& unit : s_unit)
{
unit.state = TextureUnitState::State::INVALID;
}
}
// On invalidate cache:
// 1. invalidate all texture units.
void Invalidate([[maybe_unused]] u32 param)
{
// The exact arguments of Invalidate commands is currently unknown.
// It appears to contain the TMEM address and a size.
// For simplicity, we will just invalidate everything
InvalidateAll();
}
// On bind:
// 1. use mipmapping/32bit status to calculate final sizes
// 2. if texture size is small enough to fit in region mark as cached.
// otherwise, mark as valid
void Bind(u32 unit, int width, int height, bool is_mipmapped, bool is_32_bit)
{
TextureUnitState& unit_state = s_unit[unit];
// All textures use the even bank.
// It holds the level 0 mipmap (and other even mipmap LODs, if mipmapping is enabled)
unit_state.even.size = CalculateUnitSize(unit_state.even);
bool fits = (width * height * 32U) <= unit_state.even.size;
if (is_mipmapped || is_32_bit)
{
// And the odd bank is enabled when either mipmapping is enabled or the texture is 32 bit
// It holds the Alpha and Red channels of 32 bit textures or the odd layers of a mipmapped
// texture
unit_state.odd.size = CalculateUnitSize(unit_state.odd);
fits = fits && (width * height * 32U) <= unit_state.odd.size;
}
else
{
unit_state.odd.size = 0;
}
if (is_mipmapped)
{
// TODO: This is what games appear to expect from hardware. But seems odd, as it doesn't line up
// with how much extra memory is required for mipmapping, just 33% more.
// Hardware testing is required to see exactly what gets used.
// When mipmapping is enabled, the even bank is doubled in size
// The extended region holds the remaining even mipmap layers
unit_state.even.size *= 2;
if (is_32_bit)
{
// When a 32bit texture is mipmapped, the odd bank is also doubled in size
unit_state.odd.size *= 2;
}
}
unit_state.state = fits ? TextureUnitState::State::CACHED : TextureUnitState::State::VALID;
}
static u32 CalculateUnitSize(TextureUnitState::BankConfig bank_config)
{
u32 width = bank_config.width;
u32 height = bank_config.height;
// These are the only cache sizes supported by the sdk
if (width == height)
{
switch (width)
{
case 3: // 32KB
return 32 * 1024;
case 4: // 128KB
return 128 * 1024;
case 5: // 512KB
return 512 * 1024;
default:
break;
}
}
// However, the registers allow a much larger amount of configurablity.
// Maybe other sizes are broken?
// Until hardware tests are done, this is a guess at the size algorithm
return 512 * (1 << width) * (1 << height);
}
bool TextureUnitState::BankConfig::Overlaps(const BankConfig& other) const
{
if (size == 0 || other.size == 0)
return false;
return (base <= other.base && (base + size) > other.base) ||
(other.base <= base && (other.base + other.size) > base);
}
bool TextureUnitState::Overlaps(const TextureUnitState& other) const
{
if (state == TextureUnitState::State::INVALID || other.state == TextureUnitState::State::INVALID)
return false;
return even.Overlaps(other.even) || even.Overlaps(other.odd) || odd.Overlaps(other.even) ||
odd.Overlaps(other.odd);
}
// Scans though active texture units checks for overlaps.
void FinalizeBinds(BitSet32 used_textures)
{
for (u32 i : used_textures)
{
if (s_unit[i].even.Overlaps(s_unit[i].odd))
{ // Self-overlap
s_unit[i].state = TextureUnitState::State::VALID;
}
for (size_t j = 0; j < s_unit.size(); j++)
{
if (j != i && s_unit[i].Overlaps(s_unit[j]))
{
// There is an overlap, downgrade both from CACHED
// (for there to be an overlap, both must have started as valid or cached)
s_unit[i].state = TextureUnitState::State::VALID;
s_unit[j].state = TextureUnitState::State::VALID;
}
}
}
}
bool IsCached(u32 unit)
{
return s_unit[unit].state == TextureUnitState::State::CACHED;
}
bool IsValid(u32 unit)
{
return s_unit[unit].state != TextureUnitState::State::INVALID;
}
} // namespace TMEM

View file

@ -0,0 +1,21 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/BPMemory.h"
namespace TMEM
{
void InvalidateAll();
void Invalidate(u32 param);
void ConfigurationChanged(TexUnitAddress bp_addr, u32 config);
void Bind(u32 unit, int num_blocks_width, int num_blocks_height, bool is_mipmapped, bool is_32_bit);
void FinalizeBinds(BitSet32 used_textures);
bool IsCached(u32 unit);
bool IsValid(u32 unit);
} // namespace TMEM

View file

@ -43,6 +43,7 @@
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/ShaderCache.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/TMEM.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureConverterShaderGen.h"
#include "VideoCommon/TextureDecoder.h"
@ -57,8 +58,6 @@ static const int TEXTURE_POOL_KILL_THRESHOLD = 3;
std::unique_ptr<TextureCacheBase> g_texture_cache;
std::bitset<8> TextureCacheBase::valid_bind_points;
TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr<AbstractTexture> tex,
std::unique_ptr<AbstractFramebuffer> fb)
: texture(std::move(tex)), framebuffer(std::move(fb))
@ -95,7 +94,7 @@ TextureCacheBase::TextureCacheBase()
Common::SetHash64Function();
InvalidateAllBindPoints();
TMEM::InvalidateAll();
}
TextureCacheBase::~TextureCacheBase()
@ -123,7 +122,7 @@ bool TextureCacheBase::Initialize()
void TextureCacheBase::Invalidate()
{
FlushEFBCopies();
InvalidateAllBindPoints();
TMEM::InvalidateAll();
bound_textures.fill(nullptr);
for (auto& tex : textures_by_address)
@ -1026,12 +1025,12 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex,
g_renderer->SetSamplerState(index, state);
}
void TextureCacheBase::BindTextures()
void TextureCacheBase::BindTextures(BitSet32 used_textures)
{
for (u32 i = 0; i < bound_textures.size(); i++)
{
const TCacheEntry* tentry = bound_textures[i];
if (IsValidBindPoint(i) && tentry)
if (used_textures[i] && tentry)
{
g_renderer->SetTexture(i, tentry->texture.get());
PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height);
@ -1040,6 +1039,8 @@ void TextureCacheBase::BindTextures()
SetSamplerState(i, custom_tex_scale, tentry->is_custom_tex, tentry->has_arbitrary_mips);
}
}
TMEM::FinalizeBinds(used_textures);
}
class ArbitraryMipmapDetector
@ -1190,9 +1191,22 @@ private:
TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
{
// if this stage was not invalidated by changes to texture registers, keep the current texture
if (IsValidBindPoint(stage) && bound_textures[stage])
if (TMEM::IsValid(stage) && bound_textures[stage])
{
return bound_textures[stage];
TCacheEntry* entry = bound_textures[stage];
// If the TMEM configuration is such that this texture is more or less guaranteed to still
// be in TMEM, then we know we can reuse the old entry without even hashing the memory
if (TMEM::IsCached(stage))
{
return entry;
}
// Otherwise, hash the backing memory and check it's unchanged.
// FIXME: this doesn't correctly handle textures from tmem.
if (!entry->tmem_only && entry->base_hash == entry->CalculateHash())
{
return entry;
}
}
TextureInfo texture_info = TextureInfo::FromStage(stage);
@ -1207,7 +1221,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// We need to keep track of invalided textures until they have actually been replaced or
// re-loaded
valid_bind_points.set(stage);
TMEM::Bind(stage, entry->NumBlocksX(), entry->NumBlocksY(), entry->GetNumLevels() > 1,
entry->format == TextureFormat::RGBA8);
return entry;
}
@ -1510,7 +1525,7 @@ TextureCacheBase::GetTexture(const int textureCacheSafetyColorSampleSize, Textur
const u32 texLevels = hires_tex ? (u32)hires_tex->m_levels.size() : texture_info.GetLevelCount();
// We can decode on the GPU if it is a supported format and the flag is enabled.
// Currently we don't decode RGBA8 textures from Tmem, as that would require copying from both
// Currently we don't decode RGBA8 textures from TMEM, as that would require copying from both
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
// there's no conversion between formats. In the future this could be extended with a separate
// shader, however.
@ -2537,10 +2552,10 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pe
for (size_t i = 0; i < bound_textures.size(); ++i)
{
// If the entry is currently bound and not invalidated, keep it, but mark it as invalidated.
// This way it can still be used via tmem cache emulation, but nothing else.
// If the entry is currently bound and tmem has it recorded as cached, keep it, but mark it as
// invalidated. This way it can still be used via tmem cache emulation, but nothing else.
// Spyro: A Hero's Tail is known for using such overwritten textures.
if (bound_textures[i] == entry && IsValidBindPoint(static_cast<u32>(i)))
if (bound_textures[i] == entry && TMEM::IsCached(static_cast<u32>(i)))
{
bound_textures[i]->tmem_only = true;
return ++iter;
@ -2815,18 +2830,21 @@ bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, con
}
u32 TextureCacheBase::TCacheEntry::BytesPerRow() const
{
// RGBA takes two cache lines per block; all others take one
const u32 bytes_per_block = format == TextureFormat::RGBA8 ? 64 : 32;
return NumBlocksX() * bytes_per_block;
}
u32 TextureCacheBase::TCacheEntry::NumBlocksX() const
{
const u32 blockW = TexDecoder_GetBlockWidthInTexels(format.texfmt);
// Round up source height to multiple of block size
const u32 actualWidth = Common::AlignUp(native_width, blockW);
const u32 numBlocksX = actualWidth / blockW;
// RGBA takes two cache lines per block; all others take one
const u32 bytes_per_block = format == TextureFormat::RGBA8 ? 64 : 32;
return numBlocksX * bytes_per_block;
return actualWidth / blockW;
}
u32 TextureCacheBase::TCacheEntry::NumBlocksY() const
@ -2883,6 +2901,8 @@ u64 TextureCacheBase::TCacheEntry::CalculateHash() const
{
const u32 bytes_per_row = BytesPerRow();
const u32 hash_sample_size = HashSampleSize();
// FIXME: textures from tmem won't get the correct hash.
u8* ptr = Memory::GetPointer(addr);
if (memory_stride == bytes_per_row)
{

View file

@ -14,6 +14,7 @@
#include <unordered_set>
#include <vector>
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/MathUtil.h"
#include "VideoCommon/AbstractTexture.h"
@ -175,6 +176,7 @@ public:
bool IsEfbCopy() const { return is_efb_copy; }
bool IsCopy() const { return is_xfb_copy || is_efb_copy; }
u32 NumBlocksX() const;
u32 NumBlocksY() const;
u32 BytesPerRow() const;
@ -214,13 +216,11 @@ public:
void Invalidate();
TCacheEntry* Load(const u32 stage);
static void InvalidateAllBindPoints() { valid_bind_points.reset(); }
static bool IsValidBindPoint(u32 i) { return valid_bind_points.test(i); }
TCacheEntry* GetTexture(const int textureCacheSafetyColorSampleSize, TextureInfo& texture_info);
TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
MathUtil::Rectangle<int>* display_rect);
virtual void BindTextures();
virtual void BindTextures(BitSet32 used_textures);
void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height,
u32 dstStride, bool is_depth_copy,
const MathUtil::Rectangle<int>& srcRect, bool isIntensity,

View file

@ -350,7 +350,7 @@ void VertexManagerBase::LoadTextures()
for (unsigned int i : usedtextures)
g_texture_cache->Load(i);
g_texture_cache->BindTextures();
g_texture_cache->BindTextures(usedtextures);
}
void VertexManagerBase::Flush()