From 0c11dca1218b4695025d826dac50538288a579b1 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 18 Apr 2021 03:14:43 +1000 Subject: [PATCH 1/2] VideoBackends/D3D: Cache bounding box values between reads --- .../Core/VideoBackends/D3D/D3DBoundingBox.cpp | 146 ++++++++++++------ .../Core/VideoBackends/D3D/D3DBoundingBox.h | 3 + Source/Core/VideoBackends/D3D/D3DRender.cpp | 5 + Source/Core/VideoBackends/D3D/D3DRender.h | 1 + 4 files changed, 112 insertions(+), 43 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/D3DBoundingBox.cpp b/Source/Core/VideoBackends/D3D/D3DBoundingBox.cpp index ad9caba647..8218aa65cc 100644 --- a/Source/Core/VideoBackends/D3D/D3DBoundingBox.cpp +++ b/Source/Core/VideoBackends/D3D/D3DBoundingBox.cpp @@ -2,18 +2,25 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. -#include "VideoBackends/D3D/D3DBoundingBox.h" +#include +#include + #include "Common/CommonTypes.h" #include "Common/MsgHandler.h" +#include "VideoBackends/D3D/D3DBoundingBox.h" #include "VideoBackends/D3D/D3DState.h" #include "VideoBackends/D3DCommon/D3DCommon.h" #include "VideoCommon/VideoConfig.h" namespace DX11 { +static constexpr u32 NUM_BBOX_VALUES = 4; static ComPtr s_bbox_buffer; static ComPtr s_bbox_staging_buffer; static ComPtr s_bbox_uav; +static std::array s_bbox_values; +static std::array s_bbox_dirty; +static bool s_bbox_valid = false; ID3D11UnorderedAccessView* BBox::GetUAV() { @@ -22,42 +29,45 @@ ID3D11UnorderedAccessView* BBox::GetUAV() void BBox::Init() { - if (g_ActiveConfig.backend_info.bSupportsBBox) - { - // Create 2 buffers here. - // First for unordered access on default pool. - auto desc = CD3D11_BUFFER_DESC(4 * sizeof(s32), D3D11_BIND_UNORDERED_ACCESS, - D3D11_USAGE_DEFAULT, 0, 0, 4); - int initial_values[4] = {0, 0, 0, 0}; - D3D11_SUBRESOURCE_DATA data; - data.pSysMem = initial_values; - data.SysMemPitch = 4 * sizeof(s32); - data.SysMemSlicePitch = 0; - HRESULT hr; - hr = D3D::device->CreateBuffer(&desc, &data, &s_bbox_buffer); - CHECK(SUCCEEDED(hr), "Create BoundingBox Buffer."); - D3DCommon::SetDebugObjectName(s_bbox_buffer.Get(), "BoundingBox Buffer"); + if (!g_ActiveConfig.backend_info.bSupportsBBox) + return; - // Second to use as a staging buffer. - desc.Usage = D3D11_USAGE_STAGING; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; - desc.BindFlags = 0; - hr = D3D::device->CreateBuffer(&desc, nullptr, &s_bbox_staging_buffer); - CHECK(SUCCEEDED(hr), "Create BoundingBox Staging Buffer."); - D3DCommon::SetDebugObjectName(s_bbox_staging_buffer.Get(), "BoundingBox Staging Buffer"); + // Create 2 buffers here. + // First for unordered access on default pool. + auto desc = CD3D11_BUFFER_DESC(NUM_BBOX_VALUES * sizeof(s32), D3D11_BIND_UNORDERED_ACCESS, + D3D11_USAGE_DEFAULT, 0, 0, sizeof(s32)); + const s32 initial_values[NUM_BBOX_VALUES] = {0, 0, 0, 0}; + D3D11_SUBRESOURCE_DATA data; + data.pSysMem = initial_values; + data.SysMemPitch = NUM_BBOX_VALUES * sizeof(s32); + data.SysMemSlicePitch = 0; + HRESULT hr; + hr = D3D::device->CreateBuffer(&desc, &data, &s_bbox_buffer); + CHECK(SUCCEEDED(hr), "Create BoundingBox Buffer."); + D3DCommon::SetDebugObjectName(s_bbox_buffer.Get(), "BoundingBox Buffer"); - // UAV is required to allow concurrent access. - D3D11_UNORDERED_ACCESS_VIEW_DESC UAVdesc = {}; - UAVdesc.Format = DXGI_FORMAT_R32_SINT; - UAVdesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; - UAVdesc.Buffer.FirstElement = 0; - UAVdesc.Buffer.Flags = 0; - UAVdesc.Buffer.NumElements = 4; - hr = D3D::device->CreateUnorderedAccessView(s_bbox_buffer.Get(), &UAVdesc, &s_bbox_uav); - CHECK(SUCCEEDED(hr), "Create BoundingBox UAV."); - D3DCommon::SetDebugObjectName(s_bbox_uav.Get(), "BoundingBox UAV"); - D3D::stateman->SetOMUAV(s_bbox_uav.Get()); - } + // Second to use as a staging buffer. + desc.Usage = D3D11_USAGE_STAGING; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc.BindFlags = 0; + hr = D3D::device->CreateBuffer(&desc, nullptr, &s_bbox_staging_buffer); + CHECK(SUCCEEDED(hr), "Create BoundingBox Staging Buffer."); + D3DCommon::SetDebugObjectName(s_bbox_staging_buffer.Get(), "BoundingBox Staging Buffer"); + + // UAV is required to allow concurrent access. + D3D11_UNORDERED_ACCESS_VIEW_DESC UAVdesc = {}; + UAVdesc.Format = DXGI_FORMAT_R32_SINT; + UAVdesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; + UAVdesc.Buffer.FirstElement = 0; + UAVdesc.Buffer.Flags = 0; + UAVdesc.Buffer.NumElements = NUM_BBOX_VALUES; + hr = D3D::device->CreateUnorderedAccessView(s_bbox_buffer.Get(), &UAVdesc, &s_bbox_uav); + CHECK(SUCCEEDED(hr), "Create BoundingBox UAV."); + D3DCommon::SetDebugObjectName(s_bbox_uav.Get(), "BoundingBox UAV"); + D3D::stateman->SetOMUAV(s_bbox_uav.Get()); + + s_bbox_dirty = {}; + s_bbox_valid = true; } void BBox::Shutdown() @@ -67,23 +77,73 @@ void BBox::Shutdown() s_bbox_buffer.Reset(); } -void BBox::Set(int index, int value) +void BBox::Flush() { - D3D11_BOX box{index * sizeof(s32), 0, 0, (index + 1) * sizeof(s32), 1, 1}; - D3D::context->UpdateSubresource(s_bbox_buffer.Get(), 0, &box, &value, 0, 0); + s_bbox_valid = false; + + if (std::none_of(s_bbox_dirty.begin(), s_bbox_dirty.end(), [](bool dirty) { return dirty; })) + return; + + for (u32 start = 0; start < NUM_BBOX_VALUES;) + { + if (!s_bbox_dirty[start]) + { + start++; + continue; + } + + u32 end = start + 1; + s_bbox_dirty[start] = false; + for (; end < NUM_BBOX_VALUES; end++) + { + if (!s_bbox_dirty[end]) + break; + + s_bbox_dirty[end] = false; + } + + D3D11_BOX box{start * sizeof(s32), 0, 0, end * sizeof(s32), 1, 1}; + D3D::context->UpdateSubresource(s_bbox_buffer.Get(), 0, &box, &s_bbox_values[start], 0, 0); + } } -int BBox::Get(int index) +void BBox::Readback() { - int data = 0; D3D::context->CopyResource(s_bbox_staging_buffer.Get(), s_bbox_buffer.Get()); + D3D11_MAPPED_SUBRESOURCE map; HRESULT hr = D3D::context->Map(s_bbox_staging_buffer.Get(), 0, D3D11_MAP_READ, 0, &map); if (SUCCEEDED(hr)) { - data = ((s32*)map.pData)[index]; + for (u32 i = 0; i < NUM_BBOX_VALUES; i++) + { + if (!s_bbox_dirty[i]) + { + std::memcpy(&s_bbox_values[i], reinterpret_cast(map.pData) + sizeof(s32) * i, + sizeof(s32)); + } + } + + D3D::context->Unmap(s_bbox_staging_buffer.Get(), 0); } - D3D::context->Unmap(s_bbox_staging_buffer.Get(), 0); - return data; + + s_bbox_valid = true; +} + +void BBox::Set(int index, int value) +{ + if (s_bbox_valid && s_bbox_values[index] == value) + return; + + s_bbox_values[index] = value; + s_bbox_dirty[index] = true; +} + +int BBox::Get(int index) +{ + if (!s_bbox_valid) + Readback(); + + return s_bbox_values[index]; } }; // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DBoundingBox.h b/Source/Core/VideoBackends/D3D/D3DBoundingBox.h index 5cf1da41e2..b80edfd938 100644 --- a/Source/Core/VideoBackends/D3D/D3DBoundingBox.h +++ b/Source/Core/VideoBackends/D3D/D3DBoundingBox.h @@ -14,6 +14,9 @@ public: static void Init(); static void Shutdown(); + static void Flush(); + static void Readback(); + static void Set(int index, int value); static int Get(int index); }; diff --git a/Source/Core/VideoBackends/D3D/D3DRender.cpp b/Source/Core/VideoBackends/D3D/D3DRender.cpp index ccecec79f9..904de1953c 100644 --- a/Source/Core/VideoBackends/D3D/D3DRender.cpp +++ b/Source/Core/VideoBackends/D3D/D3DRender.cpp @@ -274,6 +274,11 @@ void Renderer::BBoxWriteImpl(int index, u16 value) BBox::Set(index, value); } +void Renderer::BBoxFlushImpl() +{ + BBox::Flush(); +} + void Renderer::Flush() { D3D::context->Flush(); diff --git a/Source/Core/VideoBackends/D3D/D3DRender.h b/Source/Core/VideoBackends/D3D/D3DRender.h index 45fad0e1b9..727e1243b9 100644 --- a/Source/Core/VideoBackends/D3D/D3DRender.h +++ b/Source/Core/VideoBackends/D3D/D3DRender.h @@ -63,6 +63,7 @@ public: u16 BBoxReadImpl(int index) override; void BBoxWriteImpl(int index, u16 value) override; + void BBoxFlushImpl() override; void Flush() override; void WaitForGPUIdle() override; From 7fd0a526e1a6c48d0180eb8368cc9aed5f6d0ece Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 18 Apr 2021 03:14:52 +1000 Subject: [PATCH 2/2] VideoBackends/OGL: Cache bounding box values between reads --- .../Core/VideoBackends/OGL/OGLBoundingBox.cpp | 94 ++++++++++++++++--- .../Core/VideoBackends/OGL/OGLBoundingBox.h | 3 + Source/Core/VideoBackends/OGL/OGLRender.cpp | 5 + Source/Core/VideoBackends/OGL/OGLRender.h | 1 + 4 files changed, 89 insertions(+), 14 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/OGLBoundingBox.cpp b/Source/Core/VideoBackends/OGL/OGLBoundingBox.cpp index 9e88767afc..cee200d4ad 100644 --- a/Source/Core/VideoBackends/OGL/OGLBoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/OGLBoundingBox.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include +#include #include #include "Common/GL/GLUtil.h" @@ -12,7 +14,15 @@ #include "VideoCommon/DriverDetails.h" #include "VideoCommon/VideoConfig.h" +enum : u32 +{ + NUM_BBOX_VALUES = 4, +}; + static GLuint s_bbox_buffer_id; +static std::array s_bbox_values; +static std::array s_bbox_dirty; +static bool s_bbox_valid = false; namespace OGL { @@ -21,10 +31,14 @@ void BoundingBox::Init() if (!g_ActiveConfig.backend_info.bSupportsBBox) return; - int initial_values[4] = {0, 0, 0, 0}; + const s32 initial_values[NUM_BBOX_VALUES] = {0, 0, 0, 0}; + std::memcpy(s_bbox_values.data(), initial_values, sizeof(s_bbox_values)); + s_bbox_dirty = {}; + s_bbox_valid = true; + glGenBuffers(1, &s_bbox_buffer_id); glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW); + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(initial_values), initial_values, GL_DYNAMIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id); } @@ -36,22 +50,42 @@ void BoundingBox::Shutdown() glDeleteBuffers(1, &s_bbox_buffer_id); } -void BoundingBox::Set(int index, int value) +void BoundingBox::Flush() { - if (!g_ActiveConfig.backend_info.bSupportsBBox) + s_bbox_valid = false; + + if (std::none_of(s_bbox_dirty.begin(), s_bbox_dirty.end(), [](bool dirty) { return dirty; })) return; glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); + + for (u32 start = 0; start < NUM_BBOX_VALUES;) + { + if (!s_bbox_dirty[start]) + { + start++; + continue; + } + + u32 end = start + 1; + s_bbox_dirty[start] = false; + for (; end < NUM_BBOX_VALUES; end++) + { + if (!s_bbox_dirty[end]) + break; + + s_bbox_dirty[end] = false; + } + + glBufferSubData(GL_SHADER_STORAGE_BUFFER, start * sizeof(s32), (end - start) * sizeof(s32), + &s_bbox_values[start]); + } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); } -int BoundingBox::Get(int index) +void BoundingBox::Readback() { - if (!g_ActiveConfig.backend_info.bSupportsBBox) - return 0; - - int data = 0; glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA) && !static_cast(g_renderer.get())->IsGLES()) @@ -59,20 +93,52 @@ int BoundingBox::Get(int index) // Using glMapBufferRange to read back the contents of the SSBO is extremely slow // on nVidia drivers. This is more noticeable at higher internal resolutions. // Using glGetBufferSubData instead does not seem to exhibit this slowdown. - glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); + std::array gpu_values; + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(s32) * NUM_BBOX_VALUES, + gpu_values.data()); + for (u32 i = 0; i < NUM_BBOX_VALUES; i++) + { + if (!s_bbox_dirty[i]) + s_bbox_values[i] = gpu_values[i]; + } } else { // Using glMapBufferRange is faster on AMD cards by a measurable margin. - void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), + void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(s32) * NUM_BBOX_VALUES, GL_MAP_READ_BIT); if (ptr) { - memcpy(&data, ptr, sizeof(int)); + for (u32 i = 0; i < NUM_BBOX_VALUES; i++) + { + if (!s_bbox_dirty[i]) + { + std::memcpy(&s_bbox_values[i], reinterpret_cast(ptr) + sizeof(s32) * i, + sizeof(s32)); + } + } + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); } } glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); - return data; + s_bbox_valid = true; +} + +void BoundingBox::Set(int index, int value) +{ + if (s_bbox_valid && s_bbox_values[index] == value) + return; + + s_bbox_values[index] = value; + s_bbox_dirty[index] = true; +} + +int BoundingBox::Get(int index) +{ + if (!s_bbox_valid) + Readback(); + + return s_bbox_values[index]; } }; // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/OGLBoundingBox.h b/Source/Core/VideoBackends/OGL/OGLBoundingBox.h index cbf54074ab..c2bc026785 100644 --- a/Source/Core/VideoBackends/OGL/OGLBoundingBox.h +++ b/Source/Core/VideoBackends/OGL/OGLBoundingBox.h @@ -12,6 +12,9 @@ public: static void Init(); static void Shutdown(); + static void Flush(); + static void Readback(); + static void Set(int index, int value); static int Get(int index); }; diff --git a/Source/Core/VideoBackends/OGL/OGLRender.cpp b/Source/Core/VideoBackends/OGL/OGLRender.cpp index 49f848c913..d3435c51d5 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.cpp +++ b/Source/Core/VideoBackends/OGL/OGLRender.cpp @@ -882,6 +882,11 @@ void Renderer::BBoxWriteImpl(int index, u16 value) BoundingBox::Set(index, swapped_value); } +void Renderer::BBoxFlushImpl() +{ + BoundingBox::Flush(); +} + void Renderer::SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) { diff --git a/Source/Core/VideoBackends/OGL/OGLRender.h b/Source/Core/VideoBackends/OGL/OGLRender.h index 9c6623f254..c9b3b04896 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.h +++ b/Source/Core/VideoBackends/OGL/OGLRender.h @@ -128,6 +128,7 @@ public: u16 BBoxReadImpl(int index) override; void BBoxWriteImpl(int index, u16 value) override; + void BBoxFlushImpl() override; void BeginUtilityDrawing() override; void EndUtilityDrawing() override;