dolphin/Source/Core/VideoBackends/OGL/OGLBoundingBox.cpp
Connor McLaughlin 6393f157f0 OGL: Force memory barrier when reading back bounding box values
We also need to ensure the the CPU does not receive stale values
which have been updated by the GPU. Apparently the buffer here
is not coherent on NVIDIA drivers. Not sure if this is a driver
bug/spec violation or not, one would think that
glGetBufferSubData() would invalidate any caches as needed, but
this path is only used on NVIDIA anyway, so it's fine. A point
to note is that according to ARB_debug_report, it's moved from
video to host memory, which would explain why it needs the
cache invalidate.
2021-05-29 16:57:40 +10:00

153 lines
4.2 KiB
C++

// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstring>
#include "Common/GL/GLUtil.h"
#include "VideoBackends/OGL/OGLBoundingBox.h"
#include "VideoBackends/OGL/OGLRender.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/VideoConfig.h"
enum : u32
{
NUM_BBOX_VALUES = 4,
};
static GLuint s_bbox_buffer_id;
static std::array<s32, NUM_BBOX_VALUES> s_bbox_values;
static std::array<bool, NUM_BBOX_VALUES> s_bbox_dirty;
static bool s_bbox_valid = false;
namespace OGL
{
void BoundingBox::Init()
{
if (!g_ActiveConfig.backend_info.bSupportsBBox)
return;
const s32 initial_values[NUM_BBOX_VALUES] = {0, 0, 0, 0};
std::memcpy(s_bbox_values.data(), initial_values, sizeof(s_bbox_values));
s_bbox_dirty = {};
s_bbox_valid = true;
glGenBuffers(1, &s_bbox_buffer_id);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(initial_values), initial_values, GL_DYNAMIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id);
}
void BoundingBox::Shutdown()
{
if (!g_ActiveConfig.backend_info.bSupportsBBox)
return;
glDeleteBuffers(1, &s_bbox_buffer_id);
}
void BoundingBox::Flush()
{
s_bbox_valid = false;
if (std::none_of(s_bbox_dirty.begin(), s_bbox_dirty.end(), [](bool dirty) { return dirty; }))
return;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
for (u32 start = 0; start < NUM_BBOX_VALUES;)
{
if (!s_bbox_dirty[start])
{
start++;
continue;
}
u32 end = start + 1;
s_bbox_dirty[start] = false;
for (; end < NUM_BBOX_VALUES; end++)
{
if (!s_bbox_dirty[end])
break;
s_bbox_dirty[end] = false;
}
glBufferSubData(GL_SHADER_STORAGE_BUFFER, start * sizeof(s32), (end - start) * sizeof(s32),
&s_bbox_values[start]);
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
}
void BoundingBox::Readback()
{
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
// Using glMapBufferRange to read back the contents of the SSBO is extremely slow
// on nVidia drivers. This is more noticeable at higher internal resolutions.
// Using glGetBufferSubData instead does not seem to exhibit this slowdown.
if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA) &&
!static_cast<Renderer*>(g_renderer.get())->IsGLES())
{
// We also need to ensure the the CPU does not receive stale values which have been updated by
// the GPU. Apparently the buffer here is not coherent on NVIDIA drivers. Not sure if this is a
// driver bug/spec violation or not, one would think that glGetBufferSubData() would invalidate
// any caches as needed, but this path is only used on NVIDIA anyway, so it's fine. A point to
// note is that according to ARB_debug_report, it's moved from video to host memory, which would
// explain why it needs the cache invalidate.
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
std::array<s32, NUM_BBOX_VALUES> gpu_values;
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(s32) * NUM_BBOX_VALUES,
gpu_values.data());
for (u32 i = 0; i < NUM_BBOX_VALUES; i++)
{
if (!s_bbox_dirty[i])
s_bbox_values[i] = gpu_values[i];
}
}
else
{
// Using glMapBufferRange is faster on AMD cards by a measurable margin.
void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(s32) * NUM_BBOX_VALUES,
GL_MAP_READ_BIT);
if (ptr)
{
for (u32 i = 0; i < NUM_BBOX_VALUES; i++)
{
if (!s_bbox_dirty[i])
{
std::memcpy(&s_bbox_values[i], reinterpret_cast<const u8*>(ptr) + sizeof(s32) * i,
sizeof(s32));
}
}
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
s_bbox_valid = true;
}
void BoundingBox::Set(int index, int value)
{
if (s_bbox_valid && s_bbox_values[index] == value)
return;
s_bbox_values[index] = value;
s_bbox_dirty[index] = true;
}
int BoundingBox::Get(int index)
{
if (!s_bbox_valid)
Readback();
return s_bbox_values[index];
}
}; // namespace OGL