Enable shader_framebuffer_fetch blend path on ubershaders

Tested on a linux Intel Skylake integrated graphics with
blend_func_extended force-disabled, as it's the only platform I have
that doesn't crash with ubershaders and supports fb_fetch
This commit is contained in:
Jonathan Hamilton 2017-12-26 12:30:22 -08:00
parent 8d68adcaf3
commit ceb1f8c8cb
5 changed files with 170 additions and 4 deletions

View file

@ -153,9 +153,7 @@ static void BPWritten(const BPCmd& bp)
SetBlendMode();
// Dither
if (bp.changes & 0x04)
PixelShaderManager::SetBlendModeChanged();
PixelShaderManager::SetBlendModeChanged();
}
return;
case BPMEM_CONSTANTALPHA: // Set Destination Alpha

View file

@ -42,6 +42,14 @@ struct PixelShaderConstants
std::array<uint4, 16> pack1; // .xy - combiners, .z - tevind, .w - iref
std::array<uint4, 8> pack2; // .x - tevorder, .y - tevksel
std::array<int4, 32> konst; // .rgba
// The following are used in ubershaders when using shader_framebuffer_fetch blending
u32 blend_enable;
u32 blend_src_factor;
u32 blend_src_factor_alpha;
u32 blend_dst_factor;
u32 blend_dst_factor_alpha;
u32 blend_subtract;
u32 blend_subtract_alpha;
};
struct VertexShaderConstants

View file

@ -413,6 +413,13 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texg
"\tuint4 bpmem_pack1[16];\n" // .xy - combiners, .z - tevind
"\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel
"\tint4 konstLookup[32];\n"
"\tbool blend_enable;\n"
"\tuint blend_src_factor;\n"
"\tuint blend_src_factor_alpha;\n"
"\tuint blend_dst_factor;\n"
"\tuint blend_dst_factor_alpha;\n"
"\tbool blend_subtract;\n"
"\tbool blend_subtract_alpha;\n"
"};\n\n");
out.Write("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n"
"#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n"

View file

@ -473,6 +473,43 @@ void PixelShaderManager::SetBlendModeChanged()
constants.dither = dither;
dirty = true;
}
BlendingState state = {};
state.Generate(bpmem);
if (constants.blend_enable != state.blendenable)
{
constants.blend_enable = state.blendenable;
dirty = true;
}
if (constants.blend_src_factor != state.srcfactor)
{
constants.blend_src_factor = state.srcfactor;
dirty = true;
}
if (constants.blend_src_factor_alpha != state.srcfactoralpha)
{
constants.blend_src_factor_alpha = state.srcfactoralpha;
dirty = true;
}
if (constants.blend_dst_factor != state.dstfactor)
{
constants.blend_dst_factor = state.dstfactor;
dirty = true;
}
if (constants.blend_dst_factor_alpha != state.dstfactoralpha)
{
constants.blend_dst_factor_alpha = state.dstfactoralpha;
dirty = true;
}
if (constants.blend_subtract != state.subtract)
{
constants.blend_subtract = state.subtract;
dirty = true;
}
if (constants.blend_subtract_alpha != state.subtractAlpha)
{
constants.blend_subtract_alpha = state.subtractAlpha;
dirty = true;
}
s_bDestAlphaDirty = true;
}

View file

@ -47,6 +47,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
const bool ssaa = host_config.ssaa;
const bool stereo = host_config.stereo;
const bool use_dual_source = host_config.backend_dual_source_blend;
const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch;
const bool early_depth = uid_data->early_depth != 0;
const bool per_pixel_depth = uid_data->per_pixel_depth != 0;
const bool bounding_box =
@ -77,6 +78,21 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n");
}
}
else if (use_shader_blend)
{
// QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
// intermediate value with multiple reads & modifications, so pull out the "real" output value
// and use a temporary for calculations, then set the output value once at the end of the
// shader
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
{
out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n");
}
else
{
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n");
}
}
else
{
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
@ -658,6 +674,13 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write("void main()\n{\n");
out.Write(" float4 rawpos = gl_FragCoord;\n");
if (use_shader_blend)
{
// Store off a copy of the initial fb value for blending
out.Write(" float4 initial_ocol0 = FB_FETCH_VALUE;\n");
out.Write(" float4 ocol0;\n");
out.Write(" float4 ocol1;\n");
}
}
else // D3D
{
@ -1203,7 +1226,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" ocol0.a = float(TevResult.a >> 2) / 63.0;\n"
" \n");
if (use_dual_source)
if (use_dual_source || use_shader_blend)
{
out.Write(" // Dest alpha override (dual source blending)\n"
" // Colors will be blended against the alpha from ocol1 and\n"
@ -1228,6 +1251,99 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write(" }\n");
}
if (use_shader_blend)
{
static const std::array<const char*, 8> blendSrcFactor = {
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"initial_ocol0.rgb;", // DSTCLR
"float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR
"ocol1.aaa;", // SRCALPHA
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendSrcFactorAlpha = {
"0.0;", // ZERO
"1.0;", // ONE
"initial_ocol0.a;", // DSTCLR
"1.0 - initial_ocol0.a;", // INVDSTCLR
"ocol1.a;", // SRCALPHA
"1.0 - ocol1.a;", // INVSRCALPHA
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendDstFactor = {
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"ocol0.rgb;", // SRCCLR
"float3(1,1,1) - ocol0.rgb;", // INVSRCCLR
"ocol1.aaa;", // SRCALHA
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendDstFactorAlpha = {
"0.0;", // ZERO
"1.0;", // ONE
"ocol0.a;", // SRCCLR
"1.0 - ocol0.a;", // INVSRCCLR
"ocol1.a;", // SRCALPHA
"1.0 - ocol1.a;", // INVSRCALPHA
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
};
out.Write(" if (blend_enable) {\n"
" float4 blend_src;\n"
" switch (blend_src_factor) {\n");
for (unsigned i = 0; i < blendSrcFactor.size(); i++)
{
out.Write(" case %uu: blend_src.rgb = %s; break;\n", i, blendSrcFactor[i]);
}
out.Write(" }\n"
" switch (blend_src_factor_alpha) {\n");
for (unsigned i = 0; i < blendSrcFactorAlpha.size(); i++)
{
out.Write(" case %uu: blend_src.a = %s; break;\n", i, blendSrcFactorAlpha[i]);
}
out.Write(" }\n"
" float4 blend_dst;\n"
" switch (blend_dst_factor) {\n");
for (unsigned i = 0; i < blendDstFactor.size(); i++)
{
out.Write(" case %uu: blend_dst.rgb = %s; break;\n", i, blendDstFactor[i]);
}
out.Write(" }\n"
" switch (blend_dst_factor_alpha) {\n");
for (unsigned i = 0; i < blendDstFactorAlpha.size(); i++)
{
out.Write(" case %uu: blend_dst.a = %s; break;\n", i, blendDstFactorAlpha[i]);
}
out.Write(
" }\n"
" float4 blend_result;\n"
" if (blend_subtract)\n"
" blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * blend_src.rgb;\n"
" else\n"
" blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * "
"blend_src.rgb;\n");
out.Write(" if (blend_subtract_alpha)\n"
" blend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n"
" else\n"
" blend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n");
out.Write(" real_ocol0 = blend_result;\n");
out.Write(" } else {\n"
" real_ocol0 = ocol0;\n"
" }\n");
}
out.Write("}\n"
"\n"
"int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) {\n"