Merge pull request #6141 from JonnyH/WIP/shader-framebuffer-fetch2

Implement dual-source blending in shader
This commit is contained in:
Markus Wick 2018-01-05 23:42:43 +01:00 committed by GitHub
commit dc08b73db1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 454 additions and 101 deletions

View file

@ -82,6 +82,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsBitfield = false;
g_Config.backend_info.bSupportsDynamicSamplerIndexing = false;
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsFramebufferFetch = false;
IDXGIFactory2* factory;
IDXGIAdapter* ad;

View file

@ -46,6 +46,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
g_Config.backend_info.bSupportsST3CTextures = false;
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsFramebufferFetch = false;
// aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear();

View file

@ -64,26 +64,26 @@ static std::string s_glsl_header = "";
static std::string GetGLSLVersionString()
{
GLSL_VERSION v = g_ogl_config.eSupportedGLSLVersion;
GlslVersion v = g_ogl_config.eSupportedGLSLVersion;
switch (v)
{
case GLSLES_300:
case GlslEs300:
return "#version 300 es";
case GLSLES_310:
case GlslEs310:
return "#version 310 es";
case GLSLES_320:
case GlslEs320:
return "#version 320 es";
case GLSL_130:
case Glsl130:
return "#version 130";
case GLSL_140:
case Glsl140:
return "#version 140";
case GLSL_150:
case Glsl150:
return "#version 150";
case GLSL_330:
case Glsl330:
return "#version 330";
case GLSL_400:
case Glsl400:
return "#version 400";
case GLSL_430:
case Glsl430:
return "#version 430";
default:
// Shouldn't ever hit this
@ -431,7 +431,7 @@ bool ProgramShaderCache::CompileComputeShader(SHADER& shader, const std::string&
// but not GLSL 4.3. Mesa is one example.
std::string header;
if (g_ActiveConfig.backend_info.bSupportsComputeShaders &&
g_ogl_config.eSupportedGLSLVersion < GLSL_430)
g_ogl_config.eSupportedGLSLVersion < Glsl430)
{
header = "#extension GL_ARB_compute_shader : enable\n";
}
@ -839,8 +839,8 @@ void ProgramShaderCache::DestroyShaders()
void ProgramShaderCache::CreateHeader()
{
GLSL_VERSION v = g_ogl_config.eSupportedGLSLVersion;
bool is_glsles = v >= GLSLES_300;
GlslVersion v = g_ogl_config.eSupportedGLSLVersion;
bool is_glsles = v >= GlslEs300;
std::string SupportedESPointSize;
std::string SupportedESTextureBuffer;
switch (g_ogl_config.SupportedESPointSize)
@ -858,14 +858,14 @@ void ProgramShaderCache::CreateHeader()
switch (g_ogl_config.SupportedESTextureBuffer)
{
case ES_TEXBUF_TYPE::TEXBUF_EXT:
case EsTexbufType::TexbufExt:
SupportedESTextureBuffer = "#extension GL_EXT_texture_buffer : enable";
break;
case ES_TEXBUF_TYPE::TEXBUF_OES:
case EsTexbufType::TexbufOes:
SupportedESTextureBuffer = "#extension GL_OES_texture_buffer : enable";
break;
case ES_TEXBUF_TYPE::TEXBUF_CORE:
case ES_TEXBUF_TYPE::TEXBUF_NONE:
case EsTexbufType::TexbufCore:
case EsTexbufType::TexbufNone:
SupportedESTextureBuffer = "";
break;
}
@ -885,6 +885,24 @@ void ProgramShaderCache::CreateHeader()
}
}
std::string framebuffer_fetch_string;
switch (g_ogl_config.SupportedFramebufferFetch)
{
case EsFbFetchType::FbFetchExt:
framebuffer_fetch_string = "#extension GL_EXT_shader_framebuffer_fetch: enable\n"
"#define FB_FETCH_VALUE real_ocol0\n"
"#define FRAGMENT_INOUT inout";
break;
case EsFbFetchType::FbFetchArm:
framebuffer_fetch_string = "#extension GL_ARM_shader_framebuffer_fetch: enable\n"
"#define FB_FETCH_VALUE gl_LastFragColorARM\n"
"#define FRAGMENT_INOUT out";
break;
case EsFbFetchType::FbFetchNone:
framebuffer_fetch_string = "";
break;
}
s_glsl_header = StringFromFormat(
"%s\n"
"%s\n" // ubo
@ -902,6 +920,7 @@ void ProgramShaderCache::CreateHeader()
"%s\n" // ES texture buffer
"%s\n" // ES dual source blend
"%s\n" // shader image load store
"%s\n" // shader framebuffer fetch
// Precision defines for GLSL ES
"%s\n"
@ -928,11 +947,11 @@ void ProgramShaderCache::CreateHeader()
,
GetGLSLVersionString().c_str(),
v < GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : "", earlyz_string.c_str(),
(g_ActiveConfig.backend_info.bSupportsBindingLayout && v < GLSLES_310) ?
v < Glsl140 ? "#extension GL_ARB_uniform_buffer_object : enable" : "", earlyz_string.c_str(),
(g_ActiveConfig.backend_info.bSupportsBindingLayout && v < GlslEs310) ?
"#extension GL_ARB_shading_language_420pack : enable" :
"",
(g_ogl_config.bSupportsMSAA && v < GLSL_150) ?
(g_ogl_config.bSupportsMSAA && v < Glsl150) ?
"#extension GL_ARB_texture_multisample : enable" :
"",
// Attribute and fragment output bindings are still done via glBindAttribLocation and
@ -955,15 +974,15 @@ void ProgramShaderCache::CreateHeader()
!is_glsles && g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics ?
"#extension GL_ARB_shader_storage_buffer_object : enable" :
"",
v < GLSL_400 && g_ActiveConfig.backend_info.bSupportsGSInstancing ?
v < Glsl400 && g_ActiveConfig.backend_info.bSupportsGSInstancing ?
"#extension GL_ARB_gpu_shader5 : enable" :
"",
v < GLSL_400 && g_ActiveConfig.backend_info.bSupportsSSAA ?
v < Glsl400 && g_ActiveConfig.backend_info.bSupportsSSAA ?
"#extension GL_ARB_sample_shading : enable" :
"",
SupportedESPointSize.c_str(),
g_ogl_config.bSupportsAEP ? "#extension GL_ANDROID_extension_pack_es31a : enable" : "",
v < GLSL_140 && g_ActiveConfig.backend_info.bSupportsPaletteConversion ?
v < Glsl140 && g_ActiveConfig.backend_info.bSupportsPaletteConversion ?
"#extension GL_ARB_texture_buffer_object : enable" :
"",
SupportedESTextureBuffer.c_str(),
@ -973,16 +992,16 @@ void ProgramShaderCache::CreateHeader()
,
g_ogl_config.bSupportsImageLoadStore &&
((!is_glsles && v < GLSL_430) || (is_glsles && v < GLSLES_310)) ?
((!is_glsles && v < Glsl430) || (is_glsles && v < GlslEs310)) ?
"#extension GL_ARB_shader_image_load_store : enable" :
"",
is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "",
is_glsles ? "precision highp sampler2DArray;" : "",
framebuffer_fetch_string.c_str(), is_glsles ? "precision highp float;" : "",
is_glsles ? "precision highp int;" : "", is_glsles ? "precision highp sampler2DArray;" : "",
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
"precision highp usamplerBuffer;" :
"",
v > GLSLES_300 ? "precision highp sampler2DMS;" : "",
v >= GLSLES_310 ? "precision highp image2DArray;" : "");
v > GlslEs300 ? "precision highp sampler2DMS;" : "",
v >= GlslEs310 ? "precision highp image2DArray;" : "");
}
void ProgramShaderCache::PrecompileUberShaders()

View file

@ -508,12 +508,12 @@ Renderer::Renderer()
1 :
GLExtensions::Supports("GL_EXT_geometry_point_size") ? 2 : 0;
g_ogl_config.SupportedESTextureBuffer = GLExtensions::Supports("VERSION_GLES_3_2") ?
ES_TEXBUF_TYPE::TEXBUF_CORE :
EsTexbufType::TexbufCore :
GLExtensions::Supports("GL_OES_texture_buffer") ?
ES_TEXBUF_TYPE::TEXBUF_OES :
EsTexbufType::TexbufOes :
GLExtensions::Supports("GL_EXT_texture_buffer") ?
ES_TEXBUF_TYPE::TEXBUF_EXT :
ES_TEXBUF_TYPE::TEXBUF_NONE;
EsTexbufType::TexbufExt :
EsTexbufType::TexbufNone;
g_ogl_config.bSupportsGLSLCache = true;
g_ogl_config.bSupportsGLSync = true;
@ -522,16 +522,31 @@ Renderer::Renderer()
// depth clamping.
g_Config.backend_info.bSupportsDepthClamp = false;
if (GLExtensions::Supports("GL_EXT_shader_framebuffer_fetch"))
{
g_ogl_config.SupportedFramebufferFetch = EsFbFetchType::FbFetchExt;
}
else if (GLExtensions::Supports("GL_ARM_shader_framebuffer_fetch"))
{
g_ogl_config.SupportedFramebufferFetch = EsFbFetchType::FbFetchArm;
}
else
{
g_ogl_config.SupportedFramebufferFetch = EsFbFetchType::FbFetchNone;
}
g_Config.backend_info.bSupportsFramebufferFetch =
g_ogl_config.SupportedFramebufferFetch != EsFbFetchType::FbFetchNone;
if (GLExtensions::Version() == 300)
{
g_ogl_config.eSupportedGLSLVersion = GLSLES_300;
g_ogl_config.eSupportedGLSLVersion = GlslEs300;
g_ogl_config.bSupportsAEP = false;
g_ogl_config.bSupportsTextureStorage = true;
g_Config.backend_info.bSupportsGeometryShaders = false;
}
else if (GLExtensions::Version() == 310)
{
g_ogl_config.eSupportedGLSLVersion = GLSLES_310;
g_ogl_config.eSupportedGLSLVersion = GlslEs310;
g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a");
g_Config.backend_info.bSupportsBindingLayout = true;
g_ogl_config.bSupportsImageLoadStore = true;
@ -556,7 +571,7 @@ Renderer::Renderer()
}
else
{
g_ogl_config.eSupportedGLSLVersion = GLSLES_320;
g_ogl_config.eSupportedGLSLVersion = GlslEs320;
g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a");
g_Config.backend_info.bSupportsBindingLayout = true;
g_ogl_config.bSupportsImageLoadStore = true;
@ -589,7 +604,7 @@ Renderer::Renderer()
}
else if (GLExtensions::Version() == 300)
{
g_ogl_config.eSupportedGLSLVersion = GLSL_130;
g_ogl_config.eSupportedGLSLVersion = Glsl130;
g_ogl_config.bSupportsImageLoadStore = false; // layout keyword is only supported on glsl150+
g_ogl_config.bSupportsConservativeDepth =
false; // layout keyword is only supported on glsl150+
@ -598,7 +613,7 @@ Renderer::Renderer()
}
else if (GLExtensions::Version() == 310)
{
g_ogl_config.eSupportedGLSLVersion = GLSL_140;
g_ogl_config.eSupportedGLSLVersion = Glsl140;
g_ogl_config.bSupportsImageLoadStore = false; // layout keyword is only supported on glsl150+
g_ogl_config.bSupportsConservativeDepth =
false; // layout keyword is only supported on glsl150+
@ -607,16 +622,16 @@ Renderer::Renderer()
}
else if (GLExtensions::Version() == 320)
{
g_ogl_config.eSupportedGLSLVersion = GLSL_150;
g_ogl_config.eSupportedGLSLVersion = Glsl150;
}
else if (GLExtensions::Version() == 330)
{
g_ogl_config.eSupportedGLSLVersion = GLSL_330;
g_ogl_config.eSupportedGLSLVersion = Glsl330;
}
else if (GLExtensions::Version() >= 430)
{
// TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
g_ogl_config.eSupportedGLSLVersion = GLSL_430;
g_ogl_config.eSupportedGLSLVersion = Glsl430;
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsSSAA = true;
@ -628,7 +643,7 @@ Renderer::Renderer()
}
else
{
g_ogl_config.eSupportedGLSLVersion = GLSL_400;
g_ogl_config.eSupportedGLSLVersion = Glsl400;
g_Config.backend_info.bSupportsSSAA = true;
if (GLExtensions::Version() == 420)
@ -1275,44 +1290,54 @@ void Renderer::SetBlendingState(const BlendingState& state)
bool useDualSource =
state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha);
// Only use shader blend if we need to and we don't support dual-source blending directly
bool useShaderBlend = !useDualSource && state.usedualsrc && state.dstalpha &&
g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
const GLenum src_factors[8] = {
GL_ZERO,
GL_ONE,
GL_DST_COLOR,
GL_ONE_MINUS_DST_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
const GLenum dst_factors[8] = {
GL_ZERO,
GL_ONE,
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
if (state.blendenable)
{
glEnable(GL_BLEND);
}
else
if (useShaderBlend)
{
glDisable(GL_BLEND);
}
else
{
const GLenum src_factors[8] = {
GL_ZERO,
GL_ONE,
GL_DST_COLOR,
GL_ONE_MINUS_DST_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
const GLenum dst_factors[8] = {
GL_ZERO,
GL_ONE,
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
// Always call glBlendEquationSeparate and glBlendFuncSeparate, even when
// GL_BLEND is disabled, as a workaround for some bugs (possibly graphics
// driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic
// Adventure 2 Battle: graphics crash when loading first Dark level"
GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
glBlendEquationSeparate(equation, equationAlpha);
glBlendFuncSeparate(src_factors[state.srcfactor], dst_factors[state.dstfactor],
src_factors[state.srcfactoralpha], dst_factors[state.dstfactoralpha]);
if (state.blendenable)
{
glEnable(GL_BLEND);
}
else
{
glDisable(GL_BLEND);
}
// Always call glBlendEquationSeparate and glBlendFuncSeparate, even when
// GL_BLEND is disabled, as a workaround for some bugs (possibly graphics
// driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic
// Adventure 2 Battle: graphics crash when loading first Dark level"
GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
glBlendEquationSeparate(equation, equationAlpha);
glBlendFuncSeparate(src_factors[state.srcfactor], dst_factors[state.dstfactor],
src_factors[state.srcfactoralpha], dst_factors[state.dstfactoralpha]);
}
const GLenum logic_op_codes[16] = {
GL_CLEAR, GL_AND, GL_AND_REVERSE, GL_COPY, GL_AND_INVERTED, GL_NOOP,

View file

@ -15,24 +15,31 @@ namespace OGL
{
void ClearEFBCache();
enum GLSL_VERSION
enum GlslVersion
{
GLSL_130,
GLSL_140,
GLSL_150,
GLSL_330,
GLSL_400, // and above
GLSL_430,
GLSLES_300, // GLES 3.0
GLSLES_310, // GLES 3.1
GLSLES_320, // GLES 3.2
Glsl130,
Glsl140,
Glsl150,
Glsl330,
Glsl400, // and above
Glsl430,
GlslEs300, // GLES 3.0
GlslEs310, // GLES 3.1
GlslEs320, // GLES 3.2
};
enum class ES_TEXBUF_TYPE
enum class EsTexbufType
{
TEXBUF_NONE,
TEXBUF_CORE,
TEXBUF_OES,
TEXBUF_EXT
TexbufNone,
TexbufCore,
TexbufOes,
TexbufExt
};
enum class EsFbFetchType
{
FbFetchNone,
FbFetchExt,
FbFetchArm,
};
// ogl-only config, so not in VideoConfig.h
@ -44,13 +51,13 @@ struct VideoConfig
bool bSupportsGLBaseVertex;
bool bSupportsGLBufferStorage;
bool bSupportsMSAA;
GLSL_VERSION eSupportedGLSLVersion;
GlslVersion eSupportedGLSLVersion;
bool bSupportViewportFloat;
bool bSupportsAEP;
bool bSupportsDebug;
bool bSupportsCopySubImage;
u8 SupportedESPointSize;
ES_TEXBUF_TYPE SupportedESTextureBuffer;
EsTexbufType SupportedESTextureBuffer;
bool bSupportsTextureStorage;
bool bSupports2DTextureStorageMultisample;
bool bSupports3DTextureStorageMultisample;
@ -59,6 +66,7 @@ struct VideoConfig
bool bSupportsAniso;
bool bSupportsBitfield;
bool bSupportsTextureSubImage;
EsFbFetchType SupportedFramebufferFetch;
const char* gl_vendor;
const char* gl_renderer;

View file

@ -73,6 +73,7 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsCopyToVram = false;
g_Config.backend_info.bForceCopyToRam = true;
g_Config.backend_info.bSupportsFramebufferFetch = false;
// aamodes
g_Config.backend_info.AAModes = {1};

View file

@ -247,6 +247,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsReversedDepthRange = false; // No support yet due to driver bugs.
config->backend_info.bSupportsCopyToVram = true; // Assumed support.
config->backend_info.bForceCopyToRam = false;
config->backend_info.bSupportsFramebufferFetch = false;
}
void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list)

View file

@ -153,9 +153,7 @@ static void BPWritten(const BPCmd& bp)
SetBlendMode();
// Dither
if (bp.changes & 0x04)
PixelShaderManager::SetBlendModeChanged();
PixelShaderManager::SetBlendModeChanged();
}
return;
case BPMEM_CONSTANTALPHA: // Set Destination Alpha

View file

@ -42,6 +42,14 @@ struct PixelShaderConstants
std::array<uint4, 16> pack1; // .xy - combiners, .z - tevind, .w - iref
std::array<uint4, 8> pack2; // .x - tevorder, .y - tevksel
std::array<int4, 32> konst; // .rgba
// The following are used in ubershaders when using shader_framebuffer_fetch blending
u32 blend_enable;
u32 blend_src_factor;
u32 blend_src_factor_alpha;
u32 blend_dst_factor;
u32 blend_dst_factor_alpha;
u32 blend_subtract;
u32 blend_subtract_alpha;
};
struct VertexShaderConstants

View file

@ -16,6 +16,7 @@
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/LightingShaderGen.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
@ -319,6 +320,21 @@ PixelShaderUid GetPixelShaderUid()
uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj;
uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled;
BlendingState state = {};
state.Generate(bpmem);
if (state.usedualsrc && state.dstalpha && g_ActiveConfig.backend_info.bSupportsFramebufferFetch &&
!g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
uid_data->blend_enable = state.blendenable;
uid_data->blend_src_factor = state.srcfactor;
uid_data->blend_src_factor_alpha = state.srcfactoralpha;
uid_data->blend_dst_factor = state.dstfactor;
uid_data->blend_dst_factor_alpha = state.dstfactoralpha;
uid_data->blend_subtract = state.subtract;
uid_data->blend_subtract_alpha = state.subtractAlpha;
}
return out;
}
@ -397,6 +413,13 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texg
"\tuint4 bpmem_pack1[16];\n" // .xy - combiners, .z - tevind
"\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel
"\tint4 konstLookup[32];\n"
"\tbool blend_enable;\n"
"\tuint blend_src_factor;\n"
"\tuint blend_src_factor_alpha;\n"
"\tuint blend_dst_factor;\n"
"\tuint blend_dst_factor_alpha;\n"
"\tbool blend_subtract;\n"
"\tbool blend_subtract_alpha;\n"
"};\n\n");
out.Write("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n"
"#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n"
@ -447,6 +470,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data);
static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data,
bool use_dual_source);
static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data);
ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host_config,
const pixel_shader_uid_data* uid_data)
@ -519,6 +543,8 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
host_config.backend_dual_source_blend &&
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) ||
uid_data->useDstAlpha);
const bool use_shader_blend =
!use_dual_source && (uid_data->useDstAlpha && host_config.backend_shader_framebuffer_fetch);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
@ -535,6 +561,21 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n");
}
}
else if (use_shader_blend)
{
// QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
// intermediate value with multiple reads & modifications, so pull out the "real" output value
// and use a temporary for calculations, then set the output value once at the end of the
// shader
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
{
out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n");
}
else
{
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n");
}
}
else
{
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
@ -575,6 +616,13 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
out.Write("void main()\n{\n");
out.Write("\tfloat4 rawpos = gl_FragCoord;\n");
if (use_shader_blend)
{
// Store off a copy of the initial fb value for blending
out.Write("\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n");
out.Write("\tfloat4 ocol0;\n");
out.Write("\tfloat4 ocol1;\n");
}
}
else // D3D
{
@ -710,7 +758,8 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
// testing result)
if (uid_data->Pretest == AlphaTest::UNDETERMINED ||
(uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest))
WriteAlphaTest(out, uid_data, ApiType, uid_data->per_pixel_depth, use_dual_source);
WriteAlphaTest(out, uid_data, ApiType, uid_data->per_pixel_depth,
use_dual_source || use_shader_blend);
if (uid_data->zfreeze)
{
@ -793,7 +842,11 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
WriteFog(out, uid_data);
// Write the color and alpha values to the framebuffer
WriteColor(out, ApiType, uid_data, use_dual_source);
// If using shader blend, we still use the separate alpha
WriteColor(out, ApiType, uid_data, use_dual_source || use_shader_blend);
if (use_shader_blend)
WriteBlend(out, uid_data);
if (uid_data->bounding_box)
{
@ -1358,3 +1411,79 @@ static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid
}
}
}
static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
{
if (uid_data->blend_enable)
{
static const std::array<const char*, 8> blendSrcFactor = {
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"initial_ocol0.rgb;", // DSTCLR
"float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR
"ocol1.aaa;", // SRCALPHA
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendSrcFactorAlpha = {
"0.0;", // ZERO
"1.0;", // ONE
"initial_ocol0.a;", // DSTCLR
"1.0 - initial_ocol0.a;", // INVDSTCLR
"ocol1.a;", // SRCALPHA
"1.0 - ocol1.a;", // INVSRCALPHA
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendDstFactor = {
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"ocol0.rgb;", // SRCCLR
"float3(1,1,1) - ocol0.rgb;", // INVSRCCLR
"ocol1.aaa;", // SRCALHA
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendDstFactorAlpha = {
"0.0;", // ZERO
"1.0;", // ONE
"ocol0.a;", // SRCCLR
"1.0 - ocol0.a;", // INVSRCCLR
"ocol1.a;", // SRCALPHA
"1.0 - ocol1.a;", // INVSRCALPHA
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
};
out.Write("\tfloat4 blend_src;\n");
out.Write("\tblend_src.rgb = %s\n", blendSrcFactor[uid_data->blend_src_factor]);
out.Write("\tblend_src.a = %s\n", blendSrcFactorAlpha[uid_data->blend_src_factor_alpha]);
out.Write("\tfloat4 blend_dst;\n");
out.Write("\tblend_dst.rgb = %s\n", blendDstFactor[uid_data->blend_dst_factor]);
out.Write("\tblend_dst.a = %s\n", blendDstFactorAlpha[uid_data->blend_dst_factor_alpha]);
out.Write("\tfloat4 blend_result;\n");
if (uid_data->blend_subtract)
{
out.Write("\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * "
"blend_src.rgb;\n");
}
else
{
out.Write(
"\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * blend_src.rgb;\n");
}
if (uid_data->blend_subtract_alpha)
out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n");
else
out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n");
}
else
{
out.Write("\tfloat4 blend_result = ocol0;\n");
}
out.Write("\treal_ocol0 = blend_result;\n");
}

View file

@ -44,7 +44,13 @@ struct pixel_shader_uid_data
u32 rgba6_format : 1;
u32 dither : 1;
u32 uint_output : 1;
u32 pad : 15;
u32 blend_enable : 1; // Only used with shader_framebuffer_fetch blend
u32 blend_src_factor : 3; // Only used with shader_framebuffer_fetch blend
u32 blend_src_factor_alpha : 3; // Only used with shader_framebuffer_fetch blend
u32 blend_dst_factor : 3; // Only used with shader_framebuffer_fetch blend
u32 blend_dst_factor_alpha : 3; // Only used with shader_framebuffer_fetch blend
u32 blend_subtract : 1; // Only used with shader_framebuffer_fetch blend
u32 blend_subtract_alpha : 1; // Only used with shader_framebuffer_fetch blend
u32 texMtxInfo_n_projection : 8; // 8x1 bit
u32 tevindref_bi0 : 3;

View file

@ -473,6 +473,43 @@ void PixelShaderManager::SetBlendModeChanged()
constants.dither = dither;
dirty = true;
}
BlendingState state = {};
state.Generate(bpmem);
if (constants.blend_enable != state.blendenable)
{
constants.blend_enable = state.blendenable;
dirty = true;
}
if (constants.blend_src_factor != state.srcfactor)
{
constants.blend_src_factor = state.srcfactor;
dirty = true;
}
if (constants.blend_src_factor_alpha != state.srcfactoralpha)
{
constants.blend_src_factor_alpha = state.srcfactoralpha;
dirty = true;
}
if (constants.blend_dst_factor != state.dstfactor)
{
constants.blend_dst_factor = state.dstfactor;
dirty = true;
}
if (constants.blend_dst_factor_alpha != state.dstfactoralpha)
{
constants.blend_dst_factor_alpha = state.dstfactoralpha;
dirty = true;
}
if (constants.blend_subtract != state.subtract)
{
constants.blend_subtract = state.subtract;
dirty = true;
}
if (constants.blend_subtract_alpha != state.subtractAlpha)
{
constants.blend_subtract_alpha = state.subtractAlpha;
dirty = true;
}
s_bDestAlphaDirty = true;
}

View file

@ -32,6 +32,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent()
bits.backend_bitfield = g_ActiveConfig.backend_info.bSupportsBitfield;
bits.backend_dynamic_sampler_indexing =
g_ActiveConfig.backend_info.bSupportsDynamicSamplerIndexing;
bits.backend_shader_framebuffer_fetch = g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
return bits;
}
@ -68,9 +69,9 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool
if (include_host_config)
{
// We're using 20 bits, so 5 hex characters.
// We're using 21 bits, so 6 hex characters.
ShaderHostConfig host_config = ShaderHostConfig::GetCurrent();
filename += StringFromFormat("-%05X", host_config.bits);
filename += StringFromFormat("-%06X", host_config.bits);
}
filename += ".cache";

View file

@ -178,7 +178,8 @@ union ShaderHostConfig
u32 backend_reversed_depth_range : 1;
u32 backend_bitfield : 1;
u32 backend_dynamic_sampler_indexing : 1;
u32 pad : 12;
u32 backend_shader_framebuffer_fetch : 1;
u32 pad : 11;
};
static ShaderHostConfig GetCurrent();

View file

@ -47,6 +47,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
const bool ssaa = host_config.ssaa;
const bool stereo = host_config.stereo;
const bool use_dual_source = host_config.backend_dual_source_blend;
const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch;
const bool early_depth = uid_data->early_depth != 0;
const bool per_pixel_depth = uid_data->per_pixel_depth != 0;
const bool bounding_box =
@ -77,6 +78,21 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n");
}
}
else if (use_shader_blend)
{
// QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
// intermediate value with multiple reads & modifications, so pull out the "real" output value
// and use a temporary for calculations, then set the output value once at the end of the
// shader
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
{
out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n");
}
else
{
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n");
}
}
else
{
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
@ -658,6 +674,13 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write("void main()\n{\n");
out.Write(" float4 rawpos = gl_FragCoord;\n");
if (use_shader_blend)
{
// Store off a copy of the initial fb value for blending
out.Write(" float4 initial_ocol0 = FB_FETCH_VALUE;\n");
out.Write(" float4 ocol0;\n");
out.Write(" float4 ocol1;\n");
}
}
else // D3D
{
@ -1203,7 +1226,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" ocol0.a = float(TevResult.a >> 2) / 63.0;\n"
" \n");
if (use_dual_source)
if (use_dual_source || use_shader_blend)
{
out.Write(" // Dest alpha override (dual source blending)\n"
" // Colors will be blended against the alpha from ocol1 and\n"
@ -1228,6 +1251,99 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write(" }\n");
}
if (use_shader_blend)
{
static const std::array<const char*, 8> blendSrcFactor = {
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"initial_ocol0.rgb;", // DSTCLR
"float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR
"ocol1.aaa;", // SRCALPHA
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendSrcFactorAlpha = {
"0.0;", // ZERO
"1.0;", // ONE
"initial_ocol0.a;", // DSTCLR
"1.0 - initial_ocol0.a;", // INVDSTCLR
"ocol1.a;", // SRCALPHA
"1.0 - ocol1.a;", // INVSRCALPHA
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendDstFactor = {
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"ocol0.rgb;", // SRCCLR
"float3(1,1,1) - ocol0.rgb;", // INVSRCCLR
"ocol1.aaa;", // SRCALHA
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendDstFactorAlpha = {
"0.0;", // ZERO
"1.0;", // ONE
"ocol0.a;", // SRCCLR
"1.0 - ocol0.a;", // INVSRCCLR
"ocol1.a;", // SRCALPHA
"1.0 - ocol1.a;", // INVSRCALPHA
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
};
out.Write(" if (blend_enable) {\n"
" float4 blend_src;\n"
" switch (blend_src_factor) {\n");
for (unsigned i = 0; i < blendSrcFactor.size(); i++)
{
out.Write(" case %uu: blend_src.rgb = %s; break;\n", i, blendSrcFactor[i]);
}
out.Write(" }\n"
" switch (blend_src_factor_alpha) {\n");
for (unsigned i = 0; i < blendSrcFactorAlpha.size(); i++)
{
out.Write(" case %uu: blend_src.a = %s; break;\n", i, blendSrcFactorAlpha[i]);
}
out.Write(" }\n"
" float4 blend_dst;\n"
" switch (blend_dst_factor) {\n");
for (unsigned i = 0; i < blendDstFactor.size(); i++)
{
out.Write(" case %uu: blend_dst.rgb = %s; break;\n", i, blendDstFactor[i]);
}
out.Write(" }\n"
" switch (blend_dst_factor_alpha) {\n");
for (unsigned i = 0; i < blendDstFactorAlpha.size(); i++)
{
out.Write(" case %uu: blend_dst.a = %s; break;\n", i, blendDstFactorAlpha[i]);
}
out.Write(
" }\n"
" float4 blend_result;\n"
" if (blend_subtract)\n"
" blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * blend_src.rgb;\n"
" else\n"
" blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * "
"blend_src.rgb;\n");
out.Write(" if (blend_subtract_alpha)\n"
" blend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n"
" else\n"
" blend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n");
out.Write(" real_ocol0 = blend_result;\n");
out.Write(" } else {\n"
" real_ocol0 = ocol0;\n"
" }\n");
}
out.Write("}\n"
"\n"
"int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) {\n"

View file

@ -227,6 +227,7 @@ struct VideoConfig final
bool bSupportsBitfield; // Needed by UberShaders, so must stay in VideoCommon
bool bSupportsDynamicSamplerIndexing; // Needed by UberShaders, so must stay in VideoCommon
bool bSupportsBPTCTextures;
bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES
} backend_info;
// Utility