Merge pull request #9956 from Pokechu22/non-power-of-2-wrap-2

VideoCommon: Manually handle texture wrapping and sampling
This commit is contained in:
JMC47 2021-11-18 13:08:23 -05:00 committed by GitHub
commit 6f4bbac528
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
46 changed files with 752 additions and 329 deletions

View file

@ -198,6 +198,8 @@ public enum BooleanSetting implements AbstractBooleanSetting
GFX_HACK_EFB_EMULATE_FORMAT_CHANGES(Settings.FILE_GFX, Settings.SECTION_GFX_HACKS,
"EFBEmulateFormatChanges", false),
GFX_HACK_VERTEX_ROUDING(Settings.FILE_GFX, Settings.SECTION_GFX_HACKS, "VertexRounding", false),
GFX_HACK_FAST_TEXTURE_SAMPLING(Settings.FILE_GFX, Settings.SECTION_GFX_HACKS,
"FastTextureSampling", true),
LOGGER_WRITE_TO_FILE(Settings.FILE_LOGGER, Settings.SECTION_LOGGER_OPTIONS, "WriteToFile", false),

View file

@ -744,6 +744,8 @@ public final class SettingsFragmentPresenter
R.string.backend_multithreading, R.string.backend_multithreading_description));
sl.add(new CheckBoxSetting(mContext, BooleanSetting.GFX_HACK_EFB_DEFER_INVALIDATION,
R.string.defer_efb_invalidation, R.string.defer_efb_invalidation_description));
sl.add(new InvertedCheckBoxSetting(mContext, BooleanSetting.GFX_HACK_FAST_TEXTURE_SAMPLING,
R.string.manual_texture_sampling, R.string.manual_texture_sampling_description));
sl.add(new CheckBoxSetting(mContext, BooleanSetting.GFX_INTERNAL_RESOLUTION_FRAME_DUMPS,
R.string.internal_resolution_dumps, R.string.internal_resolution_dumps_description));

View file

@ -303,6 +303,8 @@
<string name="backend_multithreading_description">Enables graphics backend multithreading (Vulkan only). May affect performance. If unsure, leave this unchecked.</string>
<string name="defer_efb_invalidation">Defer EFB Cache Invalidation</string>
<string name="defer_efb_invalidation_description">Defers invalidation of the EFB access cache until a GPU synchronization command is executed. May improve performance in some games at the cost of stability. If unsure, leave this unchecked.</string>
<string name="manual_texture_sampling">Manual Texture Sampling</string>
<string name="manual_texture_sampling_description">Use a manual implementation of texture sampling instead of the graphics backend\'s built-in functionality.</string>
<string name="internal_resolution_dumps">Dump Frames at Internal Resolution</string>
<string name="internal_resolution_dumps_description">Creates frame dumps and screenshots at the internal resolution of the renderer, rather than the size of the window it is displayed within. If the aspect ratio is widescreen, the output image will be scaled horizontally to preserve the vertical resolution.</string>
<string name="debugging">Debugging</string>

View file

@ -149,6 +149,7 @@ public:
constexpr T Value() const { return Value(std::is_signed<T>()); }
constexpr operator T() const { return Value(); }
static constexpr bool IsSigned() { return std::is_signed<T>(); }
static constexpr std::size_t StartBit() { return position; }
static constexpr std::size_t NumBits() { return bits; }
@ -244,6 +245,7 @@ public:
BitFieldArray& operator=(const BitFieldArray&) = delete;
public:
constexpr bool IsSigned() const { return std::is_signed<T>(); }
constexpr std::size_t StartBit() const { return position; }
constexpr std::size_t NumBits() const { return bits; }
constexpr std::size_t Size() const { return size; }

View file

@ -150,6 +150,8 @@ const Info<bool> GFX_HACK_EFB_EMULATE_FORMAT_CHANGES{
const Info<bool> GFX_HACK_VERTEX_ROUDING{{System::GFX, "Hacks", "VertexRounding"}, false};
const Info<u32> GFX_HACK_MISSING_COLOR_VALUE{{System::GFX, "Hacks", "MissingColorValue"},
0xFFFFFFFF};
const Info<bool> GFX_HACK_FAST_TEXTURE_SAMPLING{{System::GFX, "Hacks", "FastTextureSampling"},
true};
// Graphics.GameSpecific

View file

@ -123,6 +123,7 @@ extern const Info<bool> GFX_HACK_COPY_EFB_SCALED;
extern const Info<bool> GFX_HACK_EFB_EMULATE_FORMAT_CHANGES;
extern const Info<bool> GFX_HACK_VERTEX_ROUDING;
extern const Info<u32> GFX_HACK_MISSING_COLOR_VALUE;
extern const Info<bool> GFX_HACK_FAST_TEXTURE_SAMPLING;
// Graphics.GameSpecific

View file

@ -644,7 +644,6 @@
<ClInclude Include="VideoCommon\PostProcessing.h" />
<ClInclude Include="VideoCommon\RenderBase.h" />
<ClInclude Include="VideoCommon\RenderState.h" />
<ClInclude Include="VideoCommon\SamplerCommon.h" />
<ClInclude Include="VideoCommon\ShaderCache.h" />
<ClInclude Include="VideoCommon\ShaderGenCommon.h" />
<ClInclude Include="VideoCommon\Statistics.h" />

View file

@ -138,8 +138,11 @@ void AdvancedWidget::CreateWidgets()
m_defer_efb_access_invalidation =
new GraphicsBool(tr("Defer EFB Cache Invalidation"), Config::GFX_HACK_EFB_DEFER_INVALIDATION);
m_manual_texture_sampling =
new GraphicsBool(tr("Manual Texture Sampling"), Config::GFX_HACK_FAST_TEXTURE_SAMPLING, true);
experimental_layout->addWidget(m_defer_efb_access_invalidation, 0, 0);
experimental_layout->addWidget(m_manual_texture_sampling, 0, 1);
main_layout->addWidget(debugging_box);
main_layout->addWidget(utility_box);
@ -266,6 +269,17 @@ void AdvancedWidget::AddDescriptions()
"<br><br>May improve performance in some games which rely on CPU EFB Access at the cost "
"of stability.<br><br><dolphin_emphasis>If unsure, leave this "
"unchecked.</dolphin_emphasis>");
static const char TR_MANUAL_TEXTURE_SAMPLING_DESCRIPTION[] = QT_TR_NOOP(
"Use a manual implementation of texture sampling instead of the graphics backend's built-in "
"functionality.<br><br>"
"This setting can fix graphical issues in some games on certain GPUs, most commonly vertical "
"lines on FMVs. In addition to this, enabling Manual Texture Sampling will allow for correct "
"emulation of texture wrapping special cases (at 1x IR or when scaled EFB is disabled, and "
"with custom textures disabled) and better emulates Level of Detail calculation.<br><br>"
"This comes at the cost of potentially worse performance, especially at higher internal "
"resolutions; additionally, Anisotropic Filtering is currently incompatible with Manual "
"Texture Sampling.<br><br>"
"<dolphin_emphasis>If unsure, leave this unchecked.</dolphin_emphasis>");
#ifdef _WIN32
static const char TR_BORDERLESS_FULLSCREEN_DESCRIPTION[] = QT_TR_NOOP(
@ -299,4 +313,5 @@ void AdvancedWidget::AddDescriptions()
m_borderless_fullscreen->SetDescription(tr(TR_BORDERLESS_FULLSCREEN_DESCRIPTION));
#endif
m_defer_efb_access_invalidation->SetDescription(tr(TR_DEFER_EFB_ACCESS_INVALIDATION_DESCRIPTION));
m_manual_texture_sampling->SetDescription(tr(TR_MANUAL_TEXTURE_SAMPLING_DESCRIPTION));
}

View file

@ -61,4 +61,5 @@ private:
// Experimental
GraphicsBool* m_defer_efb_access_invalidation;
GraphicsBool* m_manual_texture_sampling;
};

View file

@ -26,6 +26,7 @@ private:
GraphicsBool* m_skip_efb_cpu;
GraphicsBool* m_ignore_format_changes;
GraphicsBool* m_store_efb_copies;
GraphicsBool* m_defer_efb_copies;
// Texture Cache
QLabel* m_accuracy_label;
@ -42,7 +43,6 @@ private:
GraphicsBool* m_disable_bounding_box;
GraphicsBool* m_vertex_rounding;
GraphicsBool* m_save_texture_cache_state;
GraphicsBool* m_defer_efb_copies;
void CreateWidgets();
void ConnectWidgets();

View file

@ -106,6 +106,8 @@ void VideoBackend::FillBackendInfo()
g_Config.backend_info.bSupportsSSAA = true;
g_Config.backend_info.bSupportsShaderBinaries = true;
g_Config.backend_info.bSupportsPipelineCacheData = false;
g_Config.backend_info.bSupportsCoarseDerivatives = true;
g_Config.backend_info.bSupportsTextureQueryLevels = true;
g_Config.backend_info.bSupportsLogicOp = D3D::SupportsLogicOp(g_Config.iAdapter);
g_Config.backend_info.Adapters = D3DCommon::GetAdapterNames();

View file

@ -303,43 +303,43 @@ StateCache::~StateCache() = default;
ID3D11SamplerState* StateCache::Get(SamplerState state)
{
std::lock_guard<std::mutex> guard(m_lock);
auto it = m_sampler.find(state.hex);
auto it = m_sampler.find(state);
if (it != m_sampler.end())
return it->second.Get();
D3D11_SAMPLER_DESC sampdc = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT());
if (state.mipmap_filter == SamplerState::Filter::Linear)
if (state.tm0.mipmap_filter == FilterMode::Linear)
{
if (state.min_filter == SamplerState::Filter::Linear)
sampdc.Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
if (state.tm0.min_filter == FilterMode::Linear)
sampdc.Filter = (state.tm0.mag_filter == FilterMode::Linear) ?
D3D11_FILTER_MIN_MAG_MIP_LINEAR :
D3D11_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR;
else
sampdc.Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
sampdc.Filter = (state.tm0.mag_filter == FilterMode::Linear) ?
D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR :
D3D11_FILTER_MIN_MAG_POINT_MIP_LINEAR;
}
else
{
if (state.min_filter == SamplerState::Filter::Linear)
sampdc.Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
if (state.tm0.min_filter == FilterMode::Linear)
sampdc.Filter = (state.tm0.mag_filter == FilterMode::Linear) ?
D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT :
D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT;
else
sampdc.Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
sampdc.Filter = (state.tm0.mag_filter == FilterMode::Linear) ?
D3D11_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT :
D3D11_FILTER_MIN_MAG_MIP_POINT;
}
static constexpr std::array<D3D11_TEXTURE_ADDRESS_MODE, 3> address_modes = {
{D3D11_TEXTURE_ADDRESS_CLAMP, D3D11_TEXTURE_ADDRESS_WRAP, D3D11_TEXTURE_ADDRESS_MIRROR}};
sampdc.AddressU = address_modes[static_cast<u32>(state.wrap_u.Value())];
sampdc.AddressV = address_modes[static_cast<u32>(state.wrap_v.Value())];
sampdc.MaxLOD = state.max_lod / 16.f;
sampdc.MinLOD = state.min_lod / 16.f;
sampdc.MipLODBias = (s32)state.lod_bias / 256.f;
sampdc.AddressU = address_modes[static_cast<u32>(state.tm0.wrap_u.Value())];
sampdc.AddressV = address_modes[static_cast<u32>(state.tm0.wrap_v.Value())];
sampdc.MaxLOD = state.tm1.max_lod / 16.f;
sampdc.MinLOD = state.tm1.min_lod / 16.f;
sampdc.MipLODBias = state.tm0.lod_bias / 256.f;
if (state.anisotropic_filtering)
if (state.tm0.anisotropic_filtering)
{
sampdc.Filter = D3D11_FILTER_ANISOTROPIC;
sampdc.MaxAnisotropy = 1u << g_ActiveConfig.iMaxAnisotropy;
@ -348,7 +348,7 @@ ID3D11SamplerState* StateCache::Get(SamplerState state)
ComPtr<ID3D11SamplerState> res;
HRESULT hr = D3D::device->CreateSamplerState(&sampdc, res.GetAddressOf());
CHECK(SUCCEEDED(hr), "Creating D3D sampler state failed");
return m_sampler.emplace(state.hex, std::move(res)).first->second.Get();
return m_sampler.emplace(state, std::move(res)).first->second.Get();
}
ID3D11BlendState* StateCache::Get(BlendingState state)

View file

@ -37,7 +37,7 @@ private:
std::unordered_map<u32, ComPtr<ID3D11DepthStencilState>> m_depth;
std::unordered_map<u32, ComPtr<ID3D11RasterizerState>> m_raster;
std::unordered_map<u32, ComPtr<ID3D11BlendState>> m_blend;
std::unordered_map<SamplerState::StorageType, ComPtr<ID3D11SamplerState>> m_sampler;
std::unordered_map<SamplerState, ComPtr<ID3D11SamplerState>> m_sampler;
std::mutex m_lock;
};

View file

@ -85,32 +85,32 @@ SamplerHeapManager::~SamplerHeapManager() = default;
static void GetD3DSamplerDesc(D3D12_SAMPLER_DESC* desc, const SamplerState& state)
{
if (state.mipmap_filter == SamplerState::Filter::Linear)
if (state.tm0.mipmap_filter == FilterMode::Linear)
{
if (state.min_filter == SamplerState::Filter::Linear)
if (state.tm0.min_filter == FilterMode::Linear)
{
desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
desc->Filter = (state.tm0.mag_filter == FilterMode::Linear) ?
D3D12_FILTER_MIN_MAG_MIP_LINEAR :
D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR;
}
else
{
desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
desc->Filter = (state.tm0.mag_filter == FilterMode::Linear) ?
D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR :
D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR;
}
}
else
{
if (state.min_filter == SamplerState::Filter::Linear)
if (state.tm0.min_filter == FilterMode::Linear)
{
desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
desc->Filter = (state.tm0.mag_filter == FilterMode::Linear) ?
D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT :
D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT;
}
else
{
desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
desc->Filter = (state.tm0.mag_filter == FilterMode::Linear) ?
D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT :
D3D12_FILTER_MIN_MAG_MIP_POINT;
}
@ -119,15 +119,15 @@ static void GetD3DSamplerDesc(D3D12_SAMPLER_DESC* desc, const SamplerState& stat
static constexpr std::array<D3D12_TEXTURE_ADDRESS_MODE, 3> address_modes = {
{D3D12_TEXTURE_ADDRESS_MODE_CLAMP, D3D12_TEXTURE_ADDRESS_MODE_WRAP,
D3D12_TEXTURE_ADDRESS_MODE_MIRROR}};
desc->AddressU = address_modes[static_cast<u32>(state.wrap_u.Value())];
desc->AddressV = address_modes[static_cast<u32>(state.wrap_v.Value())];
desc->AddressU = address_modes[static_cast<u32>(state.tm0.wrap_u.Value())];
desc->AddressV = address_modes[static_cast<u32>(state.tm0.wrap_v.Value())];
desc->AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
desc->MaxLOD = state.max_lod / 16.f;
desc->MinLOD = state.min_lod / 16.f;
desc->MipLODBias = static_cast<s32>(state.lod_bias) / 256.f;
desc->MaxLOD = state.tm1.max_lod / 16.f;
desc->MinLOD = state.tm1.min_lod / 16.f;
desc->MipLODBias = static_cast<s32>(state.tm0.lod_bias) / 256.f;
desc->ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
if (state.anisotropic_filtering)
if (state.tm0.anisotropic_filtering)
{
desc->Filter = D3D12_FILTER_ANISOTROPIC;
desc->MaxAnisotropy = 1u << g_ActiveConfig.iMaxAnisotropy;
@ -136,7 +136,7 @@ static void GetD3DSamplerDesc(D3D12_SAMPLER_DESC* desc, const SamplerState& stat
bool SamplerHeapManager::Lookup(const SamplerState& ss, D3D12_CPU_DESCRIPTOR_HANDLE* handle)
{
const auto it = m_sampler_map.find(ss.hex);
const auto it = m_sampler_map.find(ss);
if (it != m_sampler_map.end())
{
*handle = it->second;
@ -158,7 +158,7 @@ bool SamplerHeapManager::Lookup(const SamplerState& ss, D3D12_CPU_DESCRIPTOR_HAN
m_current_offset * m_descriptor_increment_size};
g_dx_context->GetDevice()->CreateSampler(&desc, new_handle);
m_sampler_map.emplace(ss.hex, new_handle);
m_sampler_map.emplace(ss, new_handle);
m_current_offset++;
*handle = new_handle;
return true;

View file

@ -68,6 +68,6 @@ private:
D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu{};
std::unordered_map<SamplerState::StorageType, D3D12_CPU_DESCRIPTOR_HANDLE> m_sampler_map;
std::unordered_map<SamplerState, D3D12_CPU_DESCRIPTOR_HANDLE> m_sampler_map;
};
} // namespace DX12

View file

@ -82,6 +82,8 @@ void VideoBackend::FillBackendInfo()
g_Config.backend_info.AAModes = DXContext::GetAAModes(g_Config.iAdapter);
g_Config.backend_info.bSupportsShaderBinaries = true;
g_Config.backend_info.bSupportsPipelineCacheData = true;
g_Config.backend_info.bSupportsCoarseDerivatives = true;
g_Config.backend_info.bSupportsTextureQueryLevels = true;
// We can only check texture support once we have a device.
if (g_dx_context)

View file

@ -55,6 +55,8 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsPartialDepthCopies = false;
g_Config.backend_info.bSupportsShaderBinaries = false;
g_Config.backend_info.bSupportsPipelineCacheData = false;
g_Config.backend_info.bSupportsCoarseDerivatives = false;
g_Config.backend_info.bSupportsTextureQueryLevels = false;
// aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear();

View file

@ -99,7 +99,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsGPUTextureDecoding = true;
g_Config.backend_info.bSupportsBBox = true;
// Overwritten in Render.cpp later
// Overwritten in OGLRender.cpp later
g_Config.backend_info.bSupportsDualSourceBlend = true;
g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsPaletteConversion = true;
@ -107,6 +107,8 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsDepthClamp = true;
g_Config.backend_info.bSupportsST3CTextures = false;
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsCoarseDerivatives = false;
g_Config.backend_info.bSupportsTextureQueryLevels = false;
g_Config.backend_info.Adapters.clear();

View file

@ -483,6 +483,10 @@ Renderer::Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_
GLExtensions::Supports("GL_EXT_texture_compression_s3tc");
g_Config.backend_info.bSupportsBPTCTextures =
GLExtensions::Supports("GL_ARB_texture_compression_bptc");
g_Config.backend_info.bSupportsCoarseDerivatives =
GLExtensions::Supports("GL_ARB_derivative_control") || GLExtensions::Version() >= 450;
g_Config.backend_info.bSupportsTextureQueryLevels =
GLExtensions::Supports("GL_ARB_texture_query_levels") || GLExtensions::Version() >= 430;
if (m_main_gl_context->IsGLES())
{

View file

@ -747,6 +747,8 @@ void ProgramShaderCache::CreateHeader()
"%s\n" // shader image load store
"%s\n" // shader framebuffer fetch
"%s\n" // shader thread shuffle
"%s\n" // derivative control
"%s\n" // query levels
// Precision defines for GLSL ES
"%s\n"
@ -826,6 +828,12 @@ void ProgramShaderCache::CreateHeader()
"#extension GL_ARB_shader_image_load_store : enable" :
"",
framebuffer_fetch_string.c_str(), shader_shuffle_string.c_str(),
g_ActiveConfig.backend_info.bSupportsCoarseDerivatives ?
"#extension GL_ARB_derivative_control : enable" :
"",
g_ActiveConfig.backend_info.bSupportsTextureQueryLevels ?
"#extension GL_ARB_texture_query_levels : enable" :
"",
is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "",
is_glsles ? "precision highp sampler2DArray;" : "",
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?

View file

@ -7,7 +7,6 @@
#include <memory>
#include "Common/CommonTypes.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/VideoConfig.h"
namespace OGL
@ -72,16 +71,16 @@ void SamplerCache::InvalidateBinding(u32 stage)
void SamplerCache::SetParameters(GLuint sampler_id, const SamplerState& params)
{
GLenum min_filter;
GLenum mag_filter = (params.mag_filter == SamplerState::Filter::Point) ? GL_NEAREST : GL_LINEAR;
if (params.mipmap_filter == SamplerState::Filter::Linear)
GLenum mag_filter = (params.tm0.mag_filter == FilterMode::Near) ? GL_NEAREST : GL_LINEAR;
if (params.tm0.mipmap_filter == FilterMode::Linear)
{
min_filter = (params.min_filter == SamplerState::Filter::Point) ? GL_NEAREST_MIPMAP_LINEAR :
GL_LINEAR_MIPMAP_LINEAR;
min_filter = (params.tm0.min_filter == FilterMode::Near) ? GL_NEAREST_MIPMAP_LINEAR :
GL_LINEAR_MIPMAP_LINEAR;
}
else
{
min_filter = (params.min_filter == SamplerState::Filter::Point) ? GL_NEAREST_MIPMAP_NEAREST :
GL_LINEAR_MIPMAP_NEAREST;
min_filter = (params.tm0.min_filter == FilterMode::Near) ? GL_NEAREST_MIPMAP_NEAREST :
GL_LINEAR_MIPMAP_NEAREST;
}
glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, min_filter);
@ -91,17 +90,17 @@ void SamplerCache::SetParameters(GLuint sampler_id, const SamplerState& params)
{GL_CLAMP_TO_EDGE, GL_REPEAT, GL_MIRRORED_REPEAT}};
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S,
address_modes[static_cast<u32>(params.wrap_u.Value())]);
address_modes[static_cast<u32>(params.tm0.wrap_u.Value())]);
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T,
address_modes[static_cast<u32>(params.wrap_v.Value())]);
address_modes[static_cast<u32>(params.tm0.wrap_v.Value())]);
glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, params.min_lod / 16.f);
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, params.max_lod / 16.f);
glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, params.tm1.min_lod / 16.f);
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, params.tm1.max_lod / 16.f);
if (!static_cast<Renderer*>(g_renderer.get())->IsGLES())
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.lod_bias / 256.f);
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.tm0.lod_bias / 256.f);
if (params.anisotropic_filtering && g_ogl_config.bSupportsAniso)
if (params.tm0.anisotropic_filtering && g_ogl_config.bSupportsAniso)
{
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT,
static_cast<float>(1 << g_ActiveConfig.iMaxAnisotropy));

View file

@ -171,22 +171,25 @@ static inline void CalculateLOD(s32* lodp, bool* linear, u32 texmap, u32 texcoor
const TexMode1& tm1 = texUnit.texMode1;
float sDelta, tDelta;
float* uv00 = rasterBlock.Pixel[0][0].Uv[texcoord];
float* uv10 = rasterBlock.Pixel[1][0].Uv[texcoord];
float* uv01 = rasterBlock.Pixel[0][1].Uv[texcoord];
float dudx = fabsf(uv00[0] - uv10[0]);
float dvdx = fabsf(uv00[1] - uv10[1]);
float dudy = fabsf(uv00[0] - uv01[0]);
float dvdy = fabsf(uv00[1] - uv01[1]);
if (tm0.diag_lod == LODType::Diagonal)
{
float* uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
float* uv1 = rasterBlock.Pixel[1][1].Uv[texcoord];
sDelta = fabsf(uv0[0] - uv1[0]);
tDelta = fabsf(uv0[1] - uv1[1]);
sDelta = dudx + dudy;
tDelta = dvdx + dvdy;
}
else
{
float* uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
float* uv1 = rasterBlock.Pixel[1][0].Uv[texcoord];
float* uv2 = rasterBlock.Pixel[0][1].Uv[texcoord];
sDelta = std::max(fabsf(uv0[0] - uv1[0]), fabsf(uv0[0] - uv2[0]));
tDelta = std::max(fabsf(uv0[1] - uv1[1]), fabsf(uv0[1] - uv2[1]));
sDelta = std::max(dudx, dudy);
tDelta = std::max(dvdx, dvdy);
}
// get LOD in s28.4

View file

@ -84,6 +84,8 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsShaderBinaries = false;
g_Config.backend_info.bSupportsPipelineCacheData = false;
g_Config.backend_info.bSupportsBBox = true;
g_Config.backend_info.bSupportsCoarseDerivatives = false;
g_Config.backend_info.bSupportsTextureQueryLevels = false;
// aamodes
g_Config.backend_info.AAModes = {1};

View file

@ -11,7 +11,6 @@
#include "Core/HW/Memmap.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/TextureDecoder.h"
#define ALLOW_MIPMAP 1
@ -79,7 +78,7 @@ void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8* sample)
const s32 lodFract = lod & 0xf;
if (lod > 0 && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0))
if (lod > 0 && tm0.mipmap_filter != MipMode::None)
{
// use mipmap
baseMip = lod >> 4;

View file

@ -315,28 +315,28 @@ VkSampler ObjectCache::GetSampler(const SamplerState& info)
VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT}};
VkSamplerCreateInfo create_info = {
VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType
nullptr, // const void* pNext
0, // VkSamplerCreateFlags flags
filters[static_cast<u32>(info.mag_filter.Value())], // VkFilter magFilter
filters[static_cast<u32>(info.min_filter.Value())], // VkFilter minFilter
mipmap_modes[static_cast<u32>(info.mipmap_filter.Value())], // VkSamplerMipmapMode mipmapMode
address_modes[static_cast<u32>(info.wrap_u.Value())], // VkSamplerAddressMode addressModeU
address_modes[static_cast<u32>(info.wrap_v.Value())], // VkSamplerAddressMode addressModeV
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW
info.lod_bias / 256.0f, // float mipLodBias
VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType
nullptr, // const void* pNext
0, // VkSamplerCreateFlags flags
filters[u32(info.tm0.mag_filter.Value())], // VkFilter magFilter
filters[u32(info.tm0.min_filter.Value())], // VkFilter minFilter
mipmap_modes[u32(info.tm0.mipmap_filter.Value())], // VkSamplerMipmapMode mipmapMode
address_modes[u32(info.tm0.wrap_u.Value())], // VkSamplerAddressMode addressModeU
address_modes[u32(info.tm0.wrap_v.Value())], // VkSamplerAddressMode addressModeV
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW
info.tm0.lod_bias / 256.0f, // float mipLodBias
VK_FALSE, // VkBool32 anisotropyEnable
0.0f, // float maxAnisotropy
VK_FALSE, // VkBool32 compareEnable
VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp
info.min_lod / 16.0f, // float minLod
info.max_lod / 16.0f, // float maxLod
info.tm1.min_lod / 16.0f, // float minLod
info.tm1.max_lod / 16.0f, // float maxLod
VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor
VK_FALSE // VkBool32 unnormalizedCoordinates
};
// Can we use anisotropic filtering with this sampler?
if (info.anisotropic_filtering && g_vulkan_context->SupportsAnisotropicFiltering())
if (info.tm0.anisotropic_filtering && g_vulkan_context->SupportsAnisotropicFiltering())
{
// Cap anisotropy to device limits.
create_info.anisotropyEnable = VK_TRUE;

View file

@ -49,7 +49,7 @@ Renderer::Renderer(std::unique_ptr<SwapChain> swap_chain, float backbuffer_scale
{
UpdateActiveConfig();
for (SamplerState& m_sampler_state : m_sampler_states)
m_sampler_state.hex = RenderState::GetPointSamplerState().hex;
m_sampler_state = RenderState::GetPointSamplerState();
}
Renderer::~Renderer() = default;
@ -545,7 +545,7 @@ void Renderer::SetTexture(u32 index, const AbstractTexture* texture)
void Renderer::SetSamplerState(u32 index, const SamplerState& state)
{
// Skip lookup if the state hasn't changed.
if (m_sampler_states[index].hex == state.hex)
if (m_sampler_states[index] == state)
return;
// Look up new state and replace in state tracker.
@ -557,7 +557,7 @@ void Renderer::SetSamplerState(u32 index, const SamplerState& state)
}
StateTracker::GetInstance()->SetSampler(index, sampler);
m_sampler_states[index].hex = state.hex;
m_sampler_states[index] = state;
}
void Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write)
@ -588,7 +588,7 @@ void Renderer::ResetSamplerStates()
// Invalidate all sampler states, next draw will re-initialize them.
for (u32 i = 0; i < m_sampler_states.size(); i++)
{
m_sampler_states[i].hex = RenderState::GetPointSamplerState().hex;
m_sampler_states[i] = RenderState::GetPointSamplerState();
StateTracker::GetInstance()->SetSampler(i, g_object_cache->GetPointSampler());
}

View file

@ -286,6 +286,8 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsLogicOp = false; // Dependent on features.
config->backend_info.bSupportsLargePoints = false; // Dependent on features.
config->backend_info.bSupportsFramebufferFetch = false; // No support.
config->backend_info.bSupportsCoarseDerivatives = true; // Assumed support.
config->backend_info.bSupportsTextureQueryLevels = true; // Assumed support.
}
void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list)

View file

@ -70,7 +70,6 @@ add_library(videocommon
RenderBase.h
RenderState.cpp
RenderState.h
SamplerCommon.h
ShaderCache.cpp
ShaderCache.h
ShaderGenCommon.cpp

View file

@ -21,7 +21,7 @@ struct PixelShaderConstants
std::array<int4, 4> colors;
std::array<int4, 4> kcolors;
int4 alpha;
std::array<float4, 8> texdims;
std::array<uint4, 8> texdims;
std::array<int4, 2> zbias;
std::array<int4, 2> indtexscale;
std::array<int4, 6> indtexmtx;
@ -32,7 +32,7 @@ struct PixelShaderConstants
float4 zslope;
std::array<float, 2> efbscale; // .xy
// Constants from here onwards are only used in ubershaders.
// Constants from here onwards are only used in ubershaders, other than pack2.
u32 genmode; // .z
u32 alphaTest; // .w
u32 fogParam3; // .x
@ -44,7 +44,7 @@ struct PixelShaderConstants
u32 dither; // .z (bool)
u32 bounding_box; // .w (bool)
std::array<uint4, 16> pack1; // .xy - combiners, .z - tevind, .w - iref
std::array<uint4, 8> pack2; // .x - tevorder, .y - tevksel
std::array<uint4, 8> pack2; // .x - tevorder, .y - tevksel, .z/.w - SamplerState tm0/tm1
std::array<int4, 32> konst; // .rgba
// The following are used in ubershaders when using shader_framebuffer_fetch blending
u32 blend_enable;

View file

@ -381,7 +381,7 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
// Declare samplers
out.Write("SamplerState samp[8] : register(s0);\n"
"\n"
"Texture2DArray Tex[8] : register(t0);\n");
"Texture2DArray tex[8] : register(t0);\n");
}
out.Write("\n");
@ -393,7 +393,7 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
out.Write("\tint4 " I_COLORS "[4];\n"
"\tint4 " I_KCOLORS "[4];\n"
"\tint4 " I_ALPHA ";\n"
"\tfloat4 " I_TEXDIMS "[8];\n"
"\tint4 " I_TEXDIMS "[8];\n"
"\tint4 " I_ZBIAS "[2];\n"
"\tint4 " I_INDTEXSCALE "[2];\n"
"\tint4 " I_INDTEXMTX "[6];\n"
@ -414,7 +414,7 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
"\tbool bpmem_dither;\n"
"\tbool bpmem_bounding_box;\n"
"\tuint4 bpmem_pack1[16];\n" // .xy - combiners, .z - tevind
"\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel
"\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel, .zw - SamplerState tm0/tm1
"\tint4 konstLookup[32];\n"
"\tbool blend_enable;\n"
"\tuint blend_src_factor;\n"
@ -428,7 +428,9 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
"#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n"
"#define bpmem_iref(i) (bpmem_pack1[(i)].w)\n"
"#define bpmem_tevorder(i) (bpmem_pack2[(i)].x)\n"
"#define bpmem_tevksel(i) (bpmem_pack2[(i)].y)\n\n");
"#define bpmem_tevksel(i) (bpmem_pack2[(i)].y)\n"
"#define samp_texmode0(i) (bpmem_pack2[(i)].z)\n"
"#define samp_texmode1(i) (bpmem_pack2[(i)].w)\n\n");
if (host_config.per_pixel_lighting)
{
@ -534,14 +536,304 @@ void UpdateBoundingBox(float2 rawpos) {{
)",
fmt::arg("efb_height", EFB_HEIGHT), fmt::arg("efb_scale", I_EFBSCALE));
}
if (host_config.manual_texture_sampling)
{
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write(R"(
int4 readTexture(in sampler2DArray tex, uint u, uint v, int layer, int lod) {{
return iround(texelFetch(tex, int3(u, v, layer), lod) * 255.0);
}}
int4 readTextureLinear(in sampler2DArray tex, uint2 uv1, uint2 uv2, int layer, int lod, int2 frac_uv) {{)");
}
else if (api_type == APIType::D3D)
{
out.Write(R"(
int4 readTexture(in Texture2DArray tex, uint u, uint v, int layer, int lod) {{
return iround(tex.Load(int4(u, v, layer, lod)) * 255.0);
}}
int4 readTextureLinear(in Texture2DArray tex, uint2 uv1, uint2 uv2, int layer, int lod, int2 frac_uv) {{)");
}
out.Write(R"(
int4 result =
readTexture(tex, uv1.x, uv1.y, layer, lod) * (128 - frac_uv.x) * (128 - frac_uv.y) +
readTexture(tex, uv2.x, uv1.y, layer, lod) * ( frac_uv.x) * (128 - frac_uv.y) +
readTexture(tex, uv1.x, uv2.y, layer, lod) * (128 - frac_uv.x) * ( frac_uv.y) +
readTexture(tex, uv2.x, uv2.y, layer, lod) * ( frac_uv.x) * ( frac_uv.y);
return result >> 14;
}}
)");
if (host_config.manual_texture_sampling_custom_texture_sizes)
{
// This is slower, and doesn't result in the same odd behavior that happens on console when
// wrapping with non-power-of-2 sizes, but it's fine for custom textures to have non-console
// behavior.
out.Write(R"(
// Both GLSL and HLSL produce undefined values when the modulo operator (%) is used with a negative
// dividend and a positive divisor. We want a positive value such that SafeModulo(-1, 3) is 2.
int SafeModulo(int dividend, int divisor) {{
if (dividend >= 0) {{
return dividend % divisor;
}} else {{
// This works because ~x is the same as -x - 1.
// `~x % 5` over -5 to -1 gives 4, 3, 2, 1, 0. `4 - (~x % 5)` gives 0, 1, 2, 3, 4.
return (divisor - 1) - (~dividend % divisor);
}}
}}
uint WrapCoord(int coord, uint wrap, int size) {{
switch (wrap) {{
case {:s}:
default: // confirmed that clamp is used for invalid (3) via hardware test
return uint(clamp(coord, 0, size - 1));
case {:s}:
return uint(SafeModulo(coord, size)); // coord % size
case {:s}:
if (SafeModulo(coord, 2 * size) >= size) {{ // coord % (2 * size)
coord = ~coord;
}}
return uint(SafeModulo(coord, size)); // coord % size
}}
}}
)",
WrapMode::Clamp, WrapMode::Repeat, WrapMode::Mirror);
}
else
{
out.Write(R"(
uint WrapCoord(int coord, uint wrap, int size) {{
switch (wrap) {{
case {:s}:
default: // confirmed that clamp is used for invalid (3) via hardware test
return uint(clamp(coord, 0, size - 1));
case {:s}:
return uint(coord & (size - 1));
case {:s}:
if ((coord & size) != 0) {{
coord = ~coord;
}}
return uint(coord & (size - 1));
}}
}}
)",
WrapMode::Clamp, WrapMode::Repeat, WrapMode::Mirror);
}
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write("\nint4 sampleTexture(uint texmap, in sampler2DArray tex, int2 uv, int layer) {{\n");
}
else if (api_type == APIType::D3D)
{
out.Write("\nint4 sampleTexture(uint texmap, in Texture2DArray tex, in SamplerState tex_samp, "
"int2 uv, int layer) {{\n");
}
if (!host_config.manual_texture_sampling)
{
out.Write(" float size_s = float(" I_TEXDIMS "[texmap].x * 128);\n"
" float size_t = float(" I_TEXDIMS "[texmap].y * 128);\n"
" float3 coords = float3(float(uv.x) / size_s, float(uv.y) / size_t, layer);\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write(" return iround(255.0 * texture(tex, coords));\n}}\n");
}
else if (api_type == APIType::D3D)
{
out.Write(" return iround(255.0 * tex.Sample(tex_samp, coords));\n}}\n");
}
}
else
{
out.Write(R"(
uint texmode0 = samp_texmode0(texmap);
uint texmode1 = samp_texmode1(texmap);
uint wrap_s = {};
uint wrap_t = {};
bool mag_linear = {} != 0u;
bool mipmap_linear = {} != 0u;
bool min_linear = {} != 0u;
bool diag_lod = {} != 0u;
int lod_bias = {};
// uint max_aniso = TODO;
bool lod_clamp = {} != 0u;
int min_lod = int({});
int max_lod = int({});
)",
BitfieldExtract<&SamplerState::TM0::wrap_u>("texmode0"),
BitfieldExtract<&SamplerState::TM0::wrap_v>("texmode0"),
BitfieldExtract<&SamplerState::TM0::mag_filter>("texmode0"),
BitfieldExtract<&SamplerState::TM0::mipmap_filter>("texmode0"),
BitfieldExtract<&SamplerState::TM0::min_filter>("texmode0"),
BitfieldExtract<&SamplerState::TM0::diag_lod>("texmode0"),
BitfieldExtract<&SamplerState::TM0::lod_bias>("texmode0"),
// BitfieldExtract<&SamplerState::TM0::max_aniso>("texmode0"),
BitfieldExtract<&SamplerState::TM0::lod_clamp>("texmode0"),
BitfieldExtract<&SamplerState::TM1::min_lod>("texmode1"),
BitfieldExtract<&SamplerState::TM1::max_lod>("texmode1"));
if (host_config.manual_texture_sampling_custom_texture_sizes)
{
out.Write(R"(
int native_size_s = )" I_TEXDIMS R"([texmap].x;
int native_size_t = )" I_TEXDIMS R"([texmap].y;
)");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write(R"(
int3 size = textureSize(tex, 0);
int size_s = size.x;
int size_t = size.y;
)");
if (g_ActiveConfig.backend_info.bSupportsTextureQueryLevels)
{
out.Write(" int number_of_levels = textureQueryLevels(tex);\n");
}
else
{
out.Write(" int number_of_levels = 256; // textureQueryLevels is not supported\n");
ERROR_LOG_FMT(VIDEO, "textureQueryLevels is not supported! Odd graphical results may "
"occur if custom textures are in use!");
}
}
else if (api_type == APIType::D3D)
{
ASSERT(g_ActiveConfig.backend_info.bSupportsTextureQueryLevels);
out.Write(R"(
int size_s, size_t, layers, number_of_levels;
tex.GetDimensions(0, size_s, size_t, layers, number_of_levels);
)");
}
out.Write(R"(
// Prevent out-of-bounds LOD values when using custom textures
max_lod = min(max_lod, (number_of_levels - 1) << 4);
// Rescale uv to account for the new texture size
uv.x = (uv.x * size_s) / native_size_s;
uv.y = (uv.y * size_t) / native_size_t;
)");
}
else
{
out.Write(R"(
int size_s = )" I_TEXDIMS R"([texmap].x;
int size_t = )" I_TEXDIMS R"([texmap].y;
)");
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
if (g_ActiveConfig.backend_info.bSupportsCoarseDerivatives)
{
// The software renderer uses the equivalent of coarse derivatives, so use them here for
// consistency. This hasn't been hardware tested.
// Note that bSupportsCoarseDerivatives being false only means dFdxCoarse and dFdxFine don't
// exist. The GPU may still implement dFdx using coarse derivatives; we just don't have the
// ability to specifically require it.
out.Write(R"(
float2 uv_delta_x = abs(dFdxCoarse(float2(uv)));
float2 uv_delta_y = abs(dFdyCoarse(float2(uv)));
)");
}
else
{
out.Write(R"(
float2 uv_delta_x = abs(dFdx(float2(uv)));
float2 uv_delta_y = abs(dFdy(float2(uv)));
)");
}
}
else if (api_type == APIType::D3D)
{
ASSERT(g_ActiveConfig.backend_info.bSupportsCoarseDerivatives);
out.Write(R"(
float2 uv_delta_x = abs(ddx_coarse(float2(uv)));
float2 uv_delta_y = abs(ddy_coarse(float2(uv)));
)");
}
// TODO: LOD bias is normally S2.5 (Dolphin uses S7.8 for arbitrary mipmap detection and higher
// IRs), but (at least per the software renderer) actual LOD is S28.4. How does this work?
// Also, note that we can make some assumptions due to use of a SamplerState version of the BP
// configuration, which tidies things compared to whatever nonsense games can put in.
out.Write(R"(
float2 uv_delta = diag_lod ? uv_delta_x + uv_delta_y : max(uv_delta_x, uv_delta_y);
float max_delta = max(uv_delta.x / 128.0, uv_delta.y / 128.0);
// log2(x) is undefined if x <= 0, but in practice it seems log2(0) is -infinity, which becomes INT_MIN.
// If lod_bias is negative, adding it to INT_MIN causes an underflow, resulting in a large positive value.
// Hardware testing indicates that min_lod should be used when the derivative is 0.
int lod = max_delta == 0.0 ? min_lod : int(floor(log2(max_delta) * 16.0)) + (lod_bias >> 4);
bool is_linear = (lod > 0) ? min_linear : mag_linear;
lod = clamp(lod, min_lod, max_lod);
int base_lod = lod >> 4;
int frac_lod = lod & 15;
if (!mipmap_linear && frac_lod >= 8) {{
// Round to nearest LOD in point mode
base_lod++;
}}
if (is_linear) {{
uint2 texuv1 = uint2(
WrapCoord(((uv.x >> base_lod) - 64) >> 7, wrap_s, size_s >> base_lod),
WrapCoord(((uv.y >> base_lod) - 64) >> 7, wrap_t, size_t >> base_lod));
uint2 texuv2 = uint2(
WrapCoord(((uv.x >> base_lod) + 64) >> 7, wrap_s, size_s >> base_lod),
WrapCoord(((uv.y >> base_lod) + 64) >> 7, wrap_t, size_t >> base_lod));
int2 frac_uv = int2(((uv.x >> base_lod) - 64) & 0x7f, ((uv.y >> base_lod) - 64) & 0x7f);
int4 result = readTextureLinear(tex, texuv1, texuv2, layer, base_lod, frac_uv);
if (frac_lod != 0 && mipmap_linear) {{
texuv1 = uint2(
WrapCoord(((uv.x >> (base_lod + 1)) - 64) >> 7, wrap_s, size_s >> (base_lod + 1)),
WrapCoord(((uv.y >> (base_lod + 1)) - 64) >> 7, wrap_t, size_t >> (base_lod + 1)));
texuv2 = uint2(
WrapCoord(((uv.x >> (base_lod + 1)) + 64) >> 7, wrap_s, size_s >> (base_lod + 1)),
WrapCoord(((uv.y >> (base_lod + 1)) + 64) >> 7, wrap_t, size_t >> (base_lod + 1)));
frac_uv = int2(((uv.x >> (base_lod + 1)) - 64) & 0x7f, ((uv.y >> (base_lod + 1)) - 64) & 0x7f);
result *= 16 - frac_lod;
result += readTextureLinear(tex, texuv1, texuv2, layer, base_lod + 1, frac_uv) * frac_lod;
result >>= 4;
}}
return result;
}} else {{
uint2 texuv = uint2(
WrapCoord(uv.x >> (7 + base_lod), wrap_s, size_s >> base_lod),
WrapCoord(uv.y >> (7 + base_lod), wrap_t, size_t >> base_lod));
int4 result = readTexture(tex, texuv.x, texuv.y, layer, base_lod);
if (frac_lod != 0 && mipmap_linear) {{
texuv = uint2(
WrapCoord(uv.x >> (7 + base_lod + 1), wrap_s, size_s >> (base_lod + 1)),
WrapCoord(uv.y >> (7 + base_lod + 1), wrap_t, size_t >> (base_lod + 1)));
result *= 16 - frac_lod;
result += readTexture(tex, texuv.x, texuv.y, layer, base_lod + 1) * frac_lod;
result >>= 4;
}}
return result;
}}
}}
)");
}
}
static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
APIType api_type, bool stereo);
static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op,
bool clamp, TevScale scale, bool alpha);
static void SampleTexture(ShaderCode& out, std::string_view texcoords, std::string_view texswap,
int texmap, bool stereo, APIType api_type);
static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, APIType api_type,
bool per_pixel_depth, bool use_dual_source);
static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data);
@ -565,8 +857,20 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
uid_data->genMode_numtexgens, uid_data->genMode_numindstages);
// Stuff that is shared between ubershaders and pixelgen.
WriteBitfieldExtractHeader(out, api_type, host_config);
WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box);
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) "
"sampleTexture(texmap, samp[texmap], uv, layer)\n");
}
else if (api_type == APIType::D3D)
{
out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) "
"sampleTexture(texmap, tex[texmap], samp[texmap], uv, layer)\n");
}
if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ)
{
// Zcomploc (aka early_ztest) is a way to control whether depth test is done before
@ -754,6 +1058,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n");
out.Write(" ) {{\n");
}
if (!stereo)
out.Write("\tint layer = 0;\n");
out.Write("\tint4 c0 = " I_COLORS "[1], c1 = " I_COLORS "[2], c2 = " I_COLORS
"[3], prev = " I_COLORS "[0];\n"
@ -811,7 +1117,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
{
out.Write("\tint2 fixpoint_uv{} = int2(", i);
out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i);
out.Write(" * " I_TEXDIMS "[{}].zw);\n", i);
out.Write(" * float2(" I_TEXDIMS "[{}].zw * 128));\n", i);
// TODO: S24 overflows here?
}
}
@ -834,8 +1140,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2,
(i & 1) ? "zw" : "xy");
out.Write("\tint3 iindtex{} = ", i);
SampleTexture(out, "float2(tempcoord)", "abg", texmap, stereo, api_type);
out.Write("\tint3 iindtex{0} = sampleTextureWrapper({1}u, tempcoord, layer).abg;\n", i,
texmap);
}
}
@ -1243,8 +1549,8 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
'\0',
};
out.Write("\ttextemp = ");
SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, stereo, api_type);
out.Write("\ttextemp = sampleTextureWrapper({0}u, tevcoord.xy, layer).{1};\n",
stage.tevorders_texmap, texswap);
}
else if (uid_data->genMode_numtexgens == 0)
{
@ -1428,24 +1734,6 @@ static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBia
out.Write("){}", tev_scale_table_right[u32(scale)]);
}
static void SampleTexture(ShaderCode& out, std::string_view texcoords, std::string_view texswap,
int texmap, bool stereo, APIType api_type)
{
out.SetConstantsUsed(C_TEXDIMS + texmap, C_TEXDIMS + texmap);
if (api_type == APIType::D3D)
{
out.Write("iround(255.0 * Tex[{}].Sample(samp[{}], float3({}.xy * " I_TEXDIMS
"[{}].xy, {}))).{};\n",
texmap, texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap);
}
else
{
out.Write("iround(255.0 * texture(samp[{}], float3({}.xy * " I_TEXDIMS "[{}].xy, {}))).{};\n",
texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap);
}
}
constexpr std::array<const char*, 8> tev_alpha_funcs_table{
"(false)", // CompareMode::Never
"(prev.a < {})", // CompareMode::Less

View file

@ -273,16 +273,22 @@ void PixelShaderManager::SetDestAlphaChanged()
void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height)
{
float rwidth = 1.0f / (width * 128.0f);
float rheight = 1.0f / (height * 128.0f);
// TODO: move this check out to callee. There we could just call this function on texture changes
// or better, use textureSize() in glsl
if (constants.texdims[texmapid][0] != rwidth || constants.texdims[texmapid][1] != rheight)
if (constants.texdims[texmapid][0] != width || constants.texdims[texmapid][1] != height)
dirty = true;
constants.texdims[texmapid][0] = rwidth;
constants.texdims[texmapid][1] = rheight;
constants.texdims[texmapid][0] = width;
constants.texdims[texmapid][1] = height;
}
void PixelShaderManager::SetSamplerState(int texmapid, u32 tm0, u32 tm1)
{
if (constants.pack2[texmapid][2] != tm0 || constants.pack2[texmapid][3] != tm1)
dirty = true;
constants.pack2[texmapid][2] = tm0;
constants.pack2[texmapid][3] = tm1;
}
void PixelShaderManager::SetZTextureBias()
@ -382,8 +388,8 @@ void PixelShaderManager::SetZTextureOpChanged()
void PixelShaderManager::SetTexCoordChanged(u8 texmapid)
{
TCoordInfo& tc = bpmem.texcoords[texmapid];
constants.texdims[texmapid][2] = (float)(tc.s.scale_minus_1 + 1) * 128.0f;
constants.texdims[texmapid][3] = (float)(tc.t.scale_minus_1 + 1) * 128.0f;
constants.texdims[texmapid][2] = tc.s.scale_minus_1 + 1;
constants.texdims[texmapid][3] = tc.t.scale_minus_1 + 1;
dirty = true;
}

View file

@ -30,6 +30,7 @@ public:
static void SetAlphaTestChanged();
static void SetDestAlphaChanged();
static void SetTexDims(int texmapid, u32 width, u32 height);
static void SetSamplerState(int texmapid, u32 tm0, u32 tm1);
static void SetZTextureBias();
static void SetViewportChanged();
static void SetEfbScaleChanged(float scalex, float scaley);

View file

@ -2,9 +2,10 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoCommon/RenderState.h"
#include <algorithm>
#include <array>
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/TextureConfig.h"
void RasterizationState::Generate(const BPMemory& bp, PrimitiveType primitive_type)
@ -17,18 +18,6 @@ void RasterizationState::Generate(const BPMemory& bp, PrimitiveType primitive_ty
cullmode = CullMode::None;
}
RasterizationState& RasterizationState::operator=(const RasterizationState& rhs)
{
hex = rhs.hex;
return *this;
}
FramebufferState& FramebufferState::operator=(const FramebufferState& rhs)
{
hex = rhs.hex;
return *this;
}
void DepthState::Generate(const BPMemory& bp)
{
testenable = bp.zmode.testenable.Value();
@ -36,12 +25,6 @@ void DepthState::Generate(const BPMemory& bp)
func = bp.zmode.func.Value();
}
DepthState& DepthState::operator=(const DepthState& rhs)
{
hex = rhs.hex;
return *this;
}
// If the framebuffer format has no alpha channel, it is assumed to
// ONE on blending. As the backends may emulate this framebuffer
// configuration with an alpha channel, we just drop all references
@ -216,42 +199,45 @@ void BlendingState::ApproximateLogicOpWithBlending()
dstfactor = approximations[u32(logicmode.Value())].dstfactor;
}
BlendingState& BlendingState::operator=(const BlendingState& rhs)
{
hex = rhs.hex;
return *this;
}
void SamplerState::Generate(const BPMemory& bp, u32 index)
{
auto tex = bp.tex.GetUnit(index);
const TexMode0& tm0 = tex.texMode0;
const TexMode1& tm1 = tex.texMode1;
const TexMode0& bp_tm0 = tex.texMode0;
const TexMode1& bp_tm1 = tex.texMode1;
// GX can configure the mip filter to none. However, D3D and Vulkan can't express this in their
// sampler states. Therefore, we set the min/max LOD to zero if this option is used.
min_filter = tm0.min_filter == FilterMode::Linear ? Filter::Linear : Filter::Point;
mipmap_filter = tm0.mipmap_filter == MipMode::Linear ? Filter::Linear : Filter::Point;
mag_filter = tm0.mag_filter == FilterMode::Linear ? Filter::Linear : Filter::Point;
tm0.min_filter = bp_tm0.min_filter;
tm0.mipmap_filter =
bp_tm0.mipmap_filter == MipMode::Linear ? FilterMode::Linear : FilterMode::Near;
tm0.mag_filter = bp_tm0.mag_filter;
// If mipmaps are disabled, clamp min/max lod
max_lod = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm1.max_lod.Value() : 0;
min_lod = std::min(max_lod.Value(), static_cast<u64>(tm1.min_lod));
lod_bias = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm0.lod_bias * (256 / 32) : 0;
if (bp_tm0.mipmap_filter == MipMode::None)
{
tm1.max_lod = 0;
tm1.min_lod = 0;
tm0.lod_bias = 0;
}
else
{
// NOTE: When comparing, max is checked first, then min; if max is less than min, max wins
tm1.max_lod = bp_tm1.max_lod.Value();
tm1.min_lod = std::min(tm1.max_lod.Value(), bp_tm1.min_lod.Value());
tm0.lod_bias = bp_tm0.lod_bias * (256 / 32);
}
// Address modes
// Wrap modes
// Hardware testing indicates that wrap_mode set to 3 behaves the same as clamp.
static constexpr std::array<AddressMode, 4> address_modes = {
{AddressMode::Clamp, AddressMode::Repeat, AddressMode::MirroredRepeat, AddressMode::Clamp}};
wrap_u = address_modes[u32(tm0.wrap_s.Value())];
wrap_v = address_modes[u32(tm0.wrap_t.Value())];
anisotropic_filtering = 0;
}
auto filter_invalid_wrap = [](WrapMode mode) {
return (mode <= WrapMode::Mirror) ? mode : WrapMode::Clamp;
};
tm0.wrap_u = filter_invalid_wrap(bp_tm0.wrap_s);
tm0.wrap_v = filter_invalid_wrap(bp_tm0.wrap_t);
SamplerState& SamplerState::operator=(const SamplerState& rhs)
{
hex = rhs.hex;
return *this;
tm0.diag_lod = bp_tm0.diag_lod;
tm0.anisotropic_filtering = false; // TODO: Respect BP anisotropic filtering mode
tm0.lod_clamp = bp_tm0.lod_clamp; // TODO: What does this do?
}
namespace RenderState
@ -344,37 +330,42 @@ BlendingState GetNoColorWriteBlendState()
SamplerState GetInvalidSamplerState()
{
SamplerState state;
state.hex = UINT64_C(0xFFFFFFFFFFFFFFFF);
state.tm0.hex = 0xFFFFFFFF;
state.tm1.hex = 0xFFFFFFFF;
return state;
}
SamplerState GetPointSamplerState()
{
SamplerState state = {};
state.min_filter = SamplerState::Filter::Point;
state.mag_filter = SamplerState::Filter::Point;
state.mipmap_filter = SamplerState::Filter::Point;
state.wrap_u = SamplerState::AddressMode::Clamp;
state.wrap_v = SamplerState::AddressMode::Clamp;
state.min_lod = 0;
state.max_lod = 255;
state.lod_bias = 0;
state.anisotropic_filtering = false;
state.tm0.min_filter = FilterMode::Near;
state.tm0.mag_filter = FilterMode::Near;
state.tm0.mipmap_filter = FilterMode::Near;
state.tm0.wrap_u = WrapMode::Clamp;
state.tm0.wrap_v = WrapMode::Clamp;
state.tm1.min_lod = 0;
state.tm1.max_lod = 255;
state.tm0.lod_bias = 0;
state.tm0.anisotropic_filtering = false;
state.tm0.diag_lod = LODType::Edge;
state.tm0.lod_clamp = false;
return state;
}
SamplerState GetLinearSamplerState()
{
SamplerState state = {};
state.min_filter = SamplerState::Filter::Linear;
state.mag_filter = SamplerState::Filter::Linear;
state.mipmap_filter = SamplerState::Filter::Linear;
state.wrap_u = SamplerState::AddressMode::Clamp;
state.wrap_v = SamplerState::AddressMode::Clamp;
state.min_lod = 0;
state.max_lod = 255;
state.lod_bias = 0;
state.anisotropic_filtering = false;
state.tm0.min_filter = FilterMode::Linear;
state.tm0.mag_filter = FilterMode::Linear;
state.tm0.mipmap_filter = FilterMode::Linear;
state.tm0.wrap_u = WrapMode::Clamp;
state.tm0.wrap_v = WrapMode::Clamp;
state.tm1.min_lod = 0;
state.tm1.max_lod = 255;
state.tm0.lod_bias = 0;
state.tm0.anisotropic_filtering = false;
state.tm0.diag_lod = LODType::Edge;
state.tm0.lod_clamp = false;
return state;
}

View file

@ -22,11 +22,24 @@ union RasterizationState
{
void Generate(const BPMemory& bp, PrimitiveType primitive_type);
RasterizationState& operator=(const RasterizationState& rhs);
RasterizationState() = default;
RasterizationState(const RasterizationState&) = default;
RasterizationState& operator=(const RasterizationState& rhs)
{
hex = rhs.hex;
return *this;
}
RasterizationState(RasterizationState&&) = default;
RasterizationState& operator=(RasterizationState&& rhs)
{
hex = rhs.hex;
return *this;
}
bool operator==(const RasterizationState& rhs) const { return hex == rhs.hex; }
bool operator!=(const RasterizationState& rhs) const { return hex != rhs.hex; }
bool operator!=(const RasterizationState& rhs) const { return !operator==(rhs); }
bool operator<(const RasterizationState& rhs) const { return hex < rhs.hex; }
BitField<0, 2, CullMode> cullmode;
BitField<3, 2, PrimitiveType> primitive;
@ -35,15 +48,28 @@ union RasterizationState
union FramebufferState
{
FramebufferState() = default;
FramebufferState(const FramebufferState&) = default;
FramebufferState& operator=(const FramebufferState& rhs)
{
hex = rhs.hex;
return *this;
}
FramebufferState(FramebufferState&&) = default;
FramebufferState& operator=(FramebufferState&& rhs)
{
hex = rhs.hex;
return *this;
}
bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; }
bool operator!=(const FramebufferState& rhs) const { return !operator==(rhs); }
BitField<0, 8, AbstractTextureFormat> color_texture_format;
BitField<8, 8, AbstractTextureFormat> depth_texture_format;
BitField<16, 8, u32> samples;
BitField<24, 1, u32> per_sample_shading;
bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; }
bool operator!=(const FramebufferState& rhs) const { return hex != rhs.hex; }
FramebufferState& operator=(const FramebufferState& rhs);
u32 hex;
};
@ -51,11 +77,24 @@ union DepthState
{
void Generate(const BPMemory& bp);
DepthState& operator=(const DepthState& rhs);
DepthState() = default;
DepthState(const DepthState&) = default;
DepthState& operator=(const DepthState& rhs)
{
hex = rhs.hex;
return *this;
}
DepthState(DepthState&&) = default;
DepthState& operator=(DepthState&& rhs)
{
hex = rhs.hex;
return *this;
}
bool operator==(const DepthState& rhs) const { return hex == rhs.hex; }
bool operator!=(const DepthState& rhs) const { return hex != rhs.hex; }
bool operator!=(const DepthState& rhs) const { return !operator==(rhs); }
bool operator<(const DepthState& rhs) const { return hex < rhs.hex; }
BitField<0, 1, u32> testenable;
BitField<1, 1, u32> updateenable;
BitField<2, 3, CompareMode> func;
@ -71,11 +110,24 @@ union BlendingState
// Will not be bit-correct, and in some cases not even remotely in the same ballpark.
void ApproximateLogicOpWithBlending();
BlendingState& operator=(const BlendingState& rhs);
BlendingState() = default;
BlendingState(const BlendingState&) = default;
BlendingState& operator=(const BlendingState& rhs)
{
hex = rhs.hex;
return *this;
}
BlendingState(BlendingState&&) = default;
BlendingState& operator=(BlendingState&& rhs)
{
hex = rhs.hex;
return *this;
}
bool operator==(const BlendingState& rhs) const { return hex == rhs.hex; }
bool operator!=(const BlendingState& rhs) const { return hex != rhs.hex; }
bool operator!=(const BlendingState& rhs) const { return !operator==(rhs); }
bool operator<(const BlendingState& rhs) const { return hex < rhs.hex; }
BitField<0, 1, u32> blendenable;
BitField<1, 1, u32> logicopenable;
BitField<2, 1, u32> dstalpha;
@ -93,43 +145,74 @@ union BlendingState
u32 hex;
};
union SamplerState
struct SamplerState
{
using StorageType = u64;
enum class Filter : StorageType
{
Point,
Linear
};
enum class AddressMode : StorageType
{
Clamp,
Repeat,
MirroredRepeat
};
void Generate(const BPMemory& bp, u32 index);
SamplerState& operator=(const SamplerState& rhs);
SamplerState() = default;
SamplerState(const SamplerState&) = default;
SamplerState& operator=(const SamplerState& rhs)
{
tm0.hex = rhs.tm0.hex;
tm1.hex = rhs.tm1.hex;
return *this;
}
SamplerState(SamplerState&&) = default;
SamplerState& operator=(SamplerState&& rhs)
{
tm0.hex = rhs.tm0.hex;
tm1.hex = rhs.tm1.hex;
return *this;
}
bool operator==(const SamplerState& rhs) const { return hex == rhs.hex; }
bool operator!=(const SamplerState& rhs) const { return hex != rhs.hex; }
bool operator<(const SamplerState& rhs) const { return hex < rhs.hex; }
BitField<0, 1, Filter> min_filter;
BitField<1, 1, Filter> mag_filter;
BitField<2, 1, Filter> mipmap_filter;
BitField<3, 2, AddressMode> wrap_u;
BitField<5, 2, AddressMode> wrap_v;
BitField<7, 16, s64> lod_bias; // multiplied by 256
BitField<23, 8, u64> min_lod; // multiplied by 16
BitField<31, 8, u64> max_lod; // multiplied by 16
BitField<39, 1, u64> anisotropic_filtering;
bool operator==(const SamplerState& rhs) const { return Hex() == rhs.Hex(); }
bool operator!=(const SamplerState& rhs) const { return !operator==(rhs); }
bool operator<(const SamplerState& rhs) const { return Hex() < rhs.Hex(); }
StorageType hex;
constexpr u64 Hex() const { return tm0.hex | (static_cast<u64>(tm1.hex) << 32); }
// Based on BPMemory TexMode0/TexMode1, but with slightly higher precision and some
// simplifications
union TM0
{
// BP's mipmap_filter can be None, but that is represented here by setting min_lod and max_lod
// to 0
BitField<0, 1, FilterMode> min_filter;
BitField<1, 1, FilterMode> mag_filter;
BitField<2, 1, FilterMode> mipmap_filter;
// Guaranteed to be valid values (i.e. not 3)
BitField<3, 2, WrapMode> wrap_u;
BitField<5, 2, WrapMode> wrap_v;
BitField<7, 1, LODType> diag_lod;
BitField<8, 16, s32> lod_bias; // multiplied by 256, higher precision than normal
BitField<24, 1, bool, u32> lod_clamp; // TODO: This isn't currently implemented
BitField<25, 1, bool, u32> anisotropic_filtering; // TODO: This doesn't use the BP one yet
u32 hex;
};
union TM1
{
// Min is guaranteed to be less than or equal to max
BitField<0, 8, u32> min_lod; // multiplied by 16
BitField<8, 8, u32> max_lod; // multiplied by 16
u32 hex;
};
TM0 tm0;
TM1 tm1;
};
namespace std
{
template <>
struct hash<SamplerState>
{
std::size_t operator()(SamplerState const& state) const noexcept
{
return std::hash<u64>{}(state.Hex());
}
};
} // namespace std
namespace RenderState
{
RasterizationState GetInvalidRasterizationState();

View file

@ -1,27 +0,0 @@
// Copyright 2016 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
namespace SamplerCommon
{
// Helper for checking if a BPMemory TexMode0 register is set to Point
// Filtering modes. This is used to decide whether Anisotropic enhancements
// are (mostly) safe in the VideoBackends.
// If both the minification and magnification filters are set to POINT modes
// then applying anisotropic filtering is equivalent to forced filtering. Point
// mode textures are usually some sort of 2D UI billboard which will end up
// misaligned from the correct pixels when filtered anisotropically.
template <class T>
constexpr bool IsBpTexMode0PointFiltering(const T& tm0)
{
return tm0.min_filter == FilterMode::Near && tm0.mag_filter == FilterMode::Near;
}
// Check if the minification filter has mipmap based filtering modes enabled.
template <class T>
constexpr bool AreBpTexMode0MipmapsEnabled(const T& tm0)
{
return tm0.mipmap_filter != MipMode::None;
}
} // namespace SamplerCommon

View file

@ -39,6 +39,9 @@ ShaderHostConfig ShaderHostConfig::GetCurrent()
bits.backend_logic_op = g_ActiveConfig.backend_info.bSupportsLogicOp;
bits.backend_palette_conversion = g_ActiveConfig.backend_info.bSupportsPaletteConversion;
bits.enable_validation_layer = g_ActiveConfig.bEnableValidationLayer;
bits.manual_texture_sampling = !g_ActiveConfig.bFastTextureSampling;
bits.manual_texture_sampling_custom_texture_sizes =
g_ActiveConfig.ManualTextureSamplingWithHiResTextures();
return bits;
}
@ -105,6 +108,30 @@ void WriteIsNanHeader(ShaderCode& out, APIType api_type)
}
}
void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config)
{
// ==============================================
// BitfieldExtract for APIs which don't have it
// ==============================================
if (!host_config.backend_bitfield)
{
out.Write("uint bitfieldExtract(uint val, int off, int size) {{\n"
" // This built-in function is only supported in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
" uint mask = uint((1 << size) - 1);\n"
" return uint(val >> off) & mask;\n"
"}}\n\n");
out.Write("int bitfieldExtract(int val, int off, int size) {{\n"
" // This built-in function is only supported in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
" return ((val << (32 - size - off)) >> (32 - size));\n"
"}}\n\n");
}
}
static void DefineOutputMember(ShaderCode& object, APIType api_type, std::string_view qualifier,
std::string_view type, std::string_view name, int var_index,
std::string_view semantic = {}, int semantic_index = -1)

View file

@ -14,6 +14,7 @@
#include "Common/BitField.h"
#include "Common/CommonTypes.h"
#include "Common/StringUtil.h"
#include "Common/TypeUtils.h"
enum class APIType;
@ -168,6 +169,8 @@ union ShaderHostConfig
BitField<21, 1, bool, u32> backend_logic_op;
BitField<22, 1, bool, u32> backend_palette_conversion;
BitField<23, 1, bool, u32> enable_validation_layer;
BitField<24, 1, bool, u32> manual_texture_sampling;
BitField<25, 1, bool, u32> manual_texture_sampling_custom_texture_sizes;
static ShaderHostConfig GetCurrent();
};
@ -177,6 +180,8 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool
bool include_host_config, bool include_api = true);
void WriteIsNanHeader(ShaderCode& out, APIType api_type);
void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config);
void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens,
const ShaderHostConfig& host_config, std::string_view qualifier);
@ -195,6 +200,16 @@ void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_v
const char* GetInterpolationQualifier(bool msaa, bool ssaa, bool in_glsl_interface_block = false,
bool in = false);
// bitfieldExtract generator for BitField types
template <auto ptr_to_bitfield_member>
std::string BitfieldExtract(std::string_view source)
{
using BitFieldT = Common::MemberType<ptr_to_bitfield_member>;
return fmt::format("bitfieldExtract({}({}), {}, {})", BitFieldT::IsSigned() ? "int" : "uint",
source, static_cast<u32>(BitFieldT::StartBit()),
static_cast<u32>(BitFieldT::NumBits()));
}
// Constant variable names
#define I_COLORS "color"
#define I_KCOLORS "k"

View file

@ -40,7 +40,6 @@
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/ShaderCache.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/TMEM.h"
@ -966,6 +965,18 @@ void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, uns
entry->texture->Save(filename, level);
}
// Helper for checking if a BPMemory TexMode0 register is set to Point
// Filtering modes. This is used to decide whether Anisotropic enhancements
// are (mostly) safe in the VideoBackends.
// If both the minification and magnification filters are set to POINT modes
// then applying anisotropic filtering is equivalent to forced filtering. Point
// mode textures are usually some sort of 2D UI billboard which will end up
// misaligned from the correct pixels when filtered anisotropically.
static bool IsAnisostropicEnhancementSafe(const TexMode0& tm0)
{
return !(tm0.min_filter == FilterMode::Near && tm0.mag_filter == FilterMode::Near);
}
static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex,
bool has_arbitrary_mips)
{
@ -977,19 +988,18 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex,
// Force texture filtering config option.
if (g_ActiveConfig.bForceFiltering)
{
state.min_filter = SamplerState::Filter::Linear;
state.mag_filter = SamplerState::Filter::Linear;
state.mipmap_filter = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ?
SamplerState::Filter::Linear :
SamplerState::Filter::Point;
state.tm0.min_filter = FilterMode::Linear;
state.tm0.mag_filter = FilterMode::Linear;
state.tm0.mipmap_filter =
tm0.mipmap_filter != MipMode::None ? FilterMode::Linear : FilterMode::Near;
}
// Custom textures may have a greater number of mips
if (custom_tex)
state.max_lod = 255;
state.tm1.max_lod = 255;
// Anisotropic filtering option.
if (g_ActiveConfig.iMaxAnisotropy != 0 && !SamplerCommon::IsBpTexMode0PointFiltering(tm0))
if (g_ActiveConfig.iMaxAnisotropy != 0 && IsAnisostropicEnhancementSafe(tm0))
{
// https://www.opengl.org/registry/specs/EXT/texture_filter_anisotropic.txt
// For predictable results on all hardware/drivers, only use one of:
@ -998,31 +1008,32 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex,
// Letting the game set other combinations will have varying arbitrary results;
// possibly being interpreted as equal to bilinear/trilinear, implicitly
// disabling anisotropy, or changing the anisotropic algorithm employed.
state.min_filter = SamplerState::Filter::Linear;
state.mag_filter = SamplerState::Filter::Linear;
if (SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0))
state.mipmap_filter = SamplerState::Filter::Linear;
state.anisotropic_filtering = 1;
state.tm0.min_filter = FilterMode::Linear;
state.tm0.mag_filter = FilterMode::Linear;
if (tm0.mipmap_filter != MipMode::None)
state.tm0.mipmap_filter = FilterMode::Linear;
state.tm0.anisotropic_filtering = true;
}
else
{
state.anisotropic_filtering = 0;
state.tm0.anisotropic_filtering = false;
}
if (has_arbitrary_mips && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0))
if (has_arbitrary_mips && tm0.mipmap_filter != MipMode::None)
{
// Apply a secondary bias calculated from the IR scale to pull inwards mipmaps
// that have arbitrary contents, eg. are used for fog effects where the
// distance they kick in at is important to preserve at any resolution.
// Correct this with the upscaling factor of custom textures.
s64 lod_offset = std::log2(g_renderer->GetEFBScale() / custom_tex_scale) * 256.f;
state.lod_bias = std::clamp<s64>(state.lod_bias + lod_offset, -32768, 32767);
s32 lod_offset = std::log2(g_renderer->GetEFBScale() / custom_tex_scale) * 256.f;
state.tm0.lod_bias = std::clamp<s32>(state.tm0.lod_bias + lod_offset, -32768, 32767);
// Anisotropic also pushes mips farther away so it cannot be used either
state.anisotropic_filtering = 0;
state.tm0.anisotropic_filtering = false;
}
g_renderer->SetSamplerState(index, state);
PixelShaderManager::SetSamplerState(index, state.tm0.hex, state.tm1.hex);
}
void TextureCacheBase::BindTextures(BitSet32 used_textures)

View file

@ -9,7 +9,6 @@
#include "Common/Align.h"
#include "Core/HW/Memmap.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/TextureDecoder.h"
TextureInfo TextureInfo::FromStage(u32 stage)
@ -28,7 +27,7 @@ TextureInfo TextureInfo::FromStage(u32 stage)
const u8* tlut_ptr = &texMem[tlutaddr];
std::optional<u32> mip_count;
const bool has_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0);
const bool has_mipmaps = tex.texMode0.mipmap_filter != MipMode::None;
if (has_mipmaps)
{
mip_count = (tex.texMode1.max_lod + 0xf) / 0x10;

View file

@ -9,24 +9,6 @@
namespace UberShader
{
void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config)
{
// ==============================================
// BitfieldExtract for APIs which don't have it
// ==============================================
if (!host_config.backend_bitfield)
{
out.Write("uint bitfieldExtract(uint val, int off, int size) {{\n"
" // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
" uint mask = uint((1 << size) - 1);\n"
" return uint(val >> off) & mask;\n"
"}}\n\n");
}
}
void WriteLightingFunction(ShaderCode& out)
{
// ==============================================

View file

@ -3,37 +3,18 @@
#pragma once
#include <string>
#include <string_view>
#include <fmt/format.h>
#include "Common/CommonTypes.h"
#include "Common/TypeUtils.h"
class ShaderCode;
enum class APIType;
union ShaderHostConfig;
namespace UberShader
{
// Common functions across all ubershaders
void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config);
// Vertex lighting
void WriteLightingFunction(ShaderCode& out);
void WriteVertexLighting(ShaderCode& out, APIType api_type, std::string_view world_pos_var,
std::string_view normal_var, std::string_view in_color_0_var,
std::string_view in_color_1_var, std::string_view out_color_0_var,
std::string_view out_color_1_var);
// bitfieldExtract generator for BitField types
template <auto ptr_to_bitfield_member>
std::string BitfieldExtract(std::string_view source)
{
using BitFieldT = Common::MemberType<ptr_to_bitfield_member>;
return fmt::format("bitfieldExtract({}, {}, {})", source, static_cast<u32>(BitFieldT::StartBit()),
static_cast<u32>(BitFieldT::NumBits()));
}
} // namespace UberShader

View file

@ -63,8 +63,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
out.Write("// Pixel UberShader for {} texgens{}{}\n", numTexgen,
early_depth ? ", early-depth" : "", per_pixel_depth ? ", per-pixel depth" : "");
WriteBitfieldExtractHeader(out, api_type, host_config);
WritePixelShaderCommonHeader(out, api_type, host_config, bounding_box);
WriteUberShaderCommonHeader(out, api_type, host_config);
if (per_pixel_lighting)
WriteLightingFunction(out);
@ -226,17 +226,17 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
{
// Doesn't look like DirectX supports this. Oh well the code path is here just in case it
// supports this in the future.
out.Write("int4 sampleTexture(uint sampler_num, float3 uv) {{\n");
out.Write("int4 sampleTextureWrapper(uint texmap, int2 uv, int layer) {{\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write(" return iround(texture(samp[sampler_num], uv) * 255.0);\n");
out.Write(" return sampleTexture(texmap, samp[texmap], uv, layer);\n");
else if (api_type == APIType::D3D)
out.Write(" return iround(Tex[sampler_num].Sample(samp[sampler_num], uv) * 255.0);\n");
out.Write(" return sampleTexture(texmap, tex[texmap], samp[texmap], uv, layer);\n");
out.Write("}}\n\n");
}
else
{
out.Write("int4 sampleTexture(uint sampler_num, float3 uv) {{\n"
" // This is messy, but DirectX, OpenGL 3.3 and OpenGL ES 3.0 doesn't support "
out.Write("int4 sampleTextureWrapper(uint sampler_num, int2 uv, int layer) {{\n"
" // This is messy, but DirectX, OpenGL 3.3, and OpenGL ES 3.0 don't support "
"dynamic indexing of the sampler array\n"
" // With any luck the shader compiler will optimise this if the hardware supports "
"dynamic indexing.\n"
@ -244,9 +244,14 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
for (int i = 0; i < 8; i++)
{
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write(" case {}u: return iround(texture(samp[{}], uv) * 255.0);\n", i, i);
{
out.Write(" case {0}u: return sampleTexture({0}u, samp[{0}u], uv, layer);\n", i);
}
else if (api_type == APIType::D3D)
out.Write(" case {}u: return iround(Tex[{}].Sample(samp[{}], uv) * 255.0);\n", i, i, i);
{
out.Write(" case {0}u: return sampleTexture({0}u, tex[{0}u], samp[{0}u], uv, layer);\n",
i);
}
}
out.Write(" }}\n"
"}}\n\n");
@ -284,8 +289,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
// ======================
// Indirect Lookup
// ======================
const auto LookupIndirectTexture = [&out, stereo](std::string_view out_var_name,
std::string_view in_index_name) {
const auto LookupIndirectTexture = [&out](std::string_view out_var_name,
std::string_view in_index_name) {
// in_index_name is the indirect stage, not the tev stage
// bpmem_iref is packed differently from RAS1_IREF
// This function assumes bpmem_iref is nonzero (i.e. matrix is not off, and the
@ -301,11 +306,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
" else\n"
" fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].zw;\n"
"\n"
" {} = sampleTexture(texmap, float3(float2(fixedPoint_uv) * " I_TEXDIMS
"[texmap].xy, {})).abg;\n"
"}}",
in_index_name, in_index_name, in_index_name, in_index_name, out_var_name,
stereo ? "float(layer)" : "0.0");
" {} = sampleTextureWrapper(texmap, fixedPoint_uv, layer).abg;\n"
"}}\n",
in_index_name, in_index_name, in_index_name, in_index_name, out_var_name);
};
// ======================
@ -729,6 +732,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n");
out.Write("\n ) {{\n");
}
if (!stereo)
out.Write(" int layer = 0;\n");
out.Write(" int3 tevcoord = int3(0, 0, 0);\n"
" State s;\n"
@ -786,7 +791,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
{
out.Write(" int2 fixpoint_uv{} = int2(", i);
out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i);
out.Write(" * " I_TEXDIMS "[{}].zw);\n", i);
out.Write(" * float2(" I_TEXDIMS "[{}].zw * 128));\n", i);
// TODO: S24 overflows here?
}
@ -820,7 +825,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
// For the undefined case, we just skip applying the indirect operation, which is close enough.
// Viewtiful Joe hits the undefined case (bug 12525).
// Wrapping and add to previous still apply in this case (and when the stage is disabled).
out.Write(" if (bpmem_iref(bt) != 0u) {{");
out.Write(" if (bpmem_iref(bt) != 0u) {{\n");
out.Write(" int3 indcoord;\n");
LookupIndirectTexture("indcoord", "bt");
out.Write(" if (bs != 0u)\n"
@ -910,10 +915,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
" uint sampler_num = {};\n",
BitfieldExtract<&TwoTevStageOrders::texmap0>("ss.order"));
out.Write("\n"
" float2 uv = (float2(tevcoord.xy)) * " I_TEXDIMS "[sampler_num].xy;\n");
out.Write(" int4 color = sampleTexture(sampler_num, float3(uv, {}));\n",
stereo ? "float(layer)" : "0.0");
out.Write(" uint swap = {};\n",
" int4 color = sampleTextureWrapper(sampler_num, tevcoord.xy, layer);\n"
" uint swap = {};\n",
BitfieldExtract<&TevStageCombiner::AlphaCombiner::tswap>("ss.ac"));
out.Write(" s.TexColor = Swizzle(swap, color);\n");
out.Write(" }} else {{\n"

View file

@ -49,8 +49,8 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "");
out.Write("}};\n\n");
WriteUberShaderCommonHeader(out, api_type, host_config);
WriteIsNanHeader(out, api_type);
WriteBitfieldExtractHeader(out, api_type, host_config);
WriteLightingFunction(out);
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)

View file

@ -27,7 +27,6 @@
#include "VideoCommon/PerfQueryBase.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VertexLoaderManager.h"

View file

@ -135,6 +135,7 @@ void VideoConfig::Refresh()
bVertexRounding = Config::Get(Config::GFX_HACK_VERTEX_ROUDING);
iEFBAccessTileSize = Config::Get(Config::GFX_HACK_EFB_ACCESS_TILE_SIZE);
iMissingColorValue = Config::Get(Config::GFX_HACK_MISSING_COLOR_VALUE);
bFastTextureSampling = Config::Get(Config::GFX_HACK_FAST_TEXTURE_SAMPLING);
bPerfQueriesEnable = Config::Get(Config::GFX_PERF_QUERIES_ENABLE);

View file

@ -135,6 +135,7 @@ struct VideoConfig final
int iLog = 0; // CONF_ bits
int iSaveTargetId = 0; // TODO: Should be dropped
u32 iMissingColorValue = 0;
bool bFastTextureSampling = false;
// Stereoscopy
StereoMode stereo_mode{};
@ -230,6 +231,8 @@ struct VideoConfig final
bool bSupportsDepthReadback = false;
bool bSupportsShaderBinaries = false;
bool bSupportsPipelineCacheData = false;
bool bSupportsCoarseDerivatives = false;
bool bSupportsTextureQueryLevels = false;
} backend_info;
// Utility
@ -243,6 +246,16 @@ struct VideoConfig final
return backend_info.bSupportsGPUTextureDecoding && bEnableGPUTextureDecoding;
}
bool UseVertexRounding() const { return bVertexRounding && iEFBScale != 1; }
bool ManualTextureSamplingWithHiResTextures() const
{
// Hi-res textures (including hi-res EFB copies, but not native-resolution EFB copies at higher
// internal resolutions) breaks the wrapping logic used by manual texture sampling.
if (bFastTextureSampling)
return false;
if (iEFBScale != 1 && bCopyEFBScaled)
return true;
return bHiresTextures;
}
bool UsingUberShaders() const;
u32 GetShaderCompilerThreads() const;
u32 GetShaderPrecompilerThreads() const;