From 02a4e3d70fb3f71a1acfc1e4cb22a8e3a0dee963 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 5 Jun 2014 11:06:41 +0200 Subject: [PATCH] OGL-StreamBuffer: make the SLOT calculation much easier The size of the buffer is now power of 2, so we can use a shift instead of a division. This was at about 2% of the global CPU usage. --- .../Core/VideoBackends/OGL/StreamBuffer.cpp | 20 +++++++++---------- Source/Core/VideoBackends/OGL/StreamBuffer.h | 5 ++++- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/StreamBuffer.cpp b/Source/Core/VideoBackends/OGL/StreamBuffer.cpp index 85320c14b0..57a1cc0036 100644 --- a/Source/Core/VideoBackends/OGL/StreamBuffer.cpp +++ b/Source/Core/VideoBackends/OGL/StreamBuffer.cpp @@ -23,7 +23,7 @@ static u32 genBuffer() } StreamBuffer::StreamBuffer(u32 type, size_t size) -: m_buffer(genBuffer()), m_buffertype(type), m_size(size) +: m_buffer(genBuffer()), m_buffertype(type), m_size(ROUND_UP_POW2(size)), m_bit_per_slot(Log2(ROUND_UP_POW2(size) / SYNC_POINTS)) { m_iterator = 0; m_used_iterator = 0; @@ -59,19 +59,20 @@ StreamBuffer::~StreamBuffer() * As ring buffers have an ugly behavoir on rollover, have fun to read this code ;) */ -#define SLOT(x) ((x)*SYNC_POINTS/m_size) void StreamBuffer::CreateFences() { - for (u32 i=0; i= m_size) { // insert waiting slots in unused space at the end of the buffer - for (size_t i = SLOT(m_used_iterator); i < SYNC_POINTS; i++) + for (int i = SLOT(m_used_iterator); i < SYNC_POINTS; i++) { fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } @@ -106,7 +107,7 @@ void StreamBuffer::AllocMemory(size_t size) m_used_iterator = m_iterator = 0; // offset 0 is always aligned // wait for space at the start - for (u32 i = 0; i <= SLOT(m_iterator + size); i++) + for (int i = 0; i <= SLOT(m_iterator + size); i++) { glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); glDeleteSync(fences[i]); @@ -114,7 +115,6 @@ void StreamBuffer::AllocMemory(size_t size) m_free_iterator = m_iterator + size; } } -#undef SLOT void StreamBuffer::Align(u32 stride) { diff --git a/Source/Core/VideoBackends/OGL/StreamBuffer.h b/Source/Core/VideoBackends/OGL/StreamBuffer.h index fa7a29eac2..66747eafd9 100644 --- a/Source/Core/VideoBackends/OGL/StreamBuffer.h +++ b/Source/Core/VideoBackends/OGL/StreamBuffer.h @@ -46,7 +46,10 @@ protected: size_t m_free_iterator; private: - static const u32 SYNC_POINTS = 16; + static const int SYNC_POINTS = 16; + inline int SLOT(size_t x) const { return x >> m_bit_per_slot; } + const int m_bit_per_slot; + GLsync fences[SYNC_POINTS]; };