From 40851682cede3b94b5ea0fb964d6c0701c984634 Mon Sep 17 00:00:00 2001 From: PabloMK7 Date: Sat, 13 Jul 2024 23:47:11 +0200 Subject: [PATCH] Workaround a performance bug in older Mali GPUs (#185) --- src/video_core/renderer_opengl/gl_driver.cpp | 9 +++++++++ src/video_core/renderer_opengl/gl_driver.h | 3 +++ .../renderer_opengl/gl_rasterizer.cpp | 19 ++++++++++++------- .../renderer_opengl/gl_rasterizer.h | 1 - 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_driver.cpp b/src/video_core/renderer_opengl/gl_driver.cpp index 63b05bd85..1e110fd58 100644 --- a/src/video_core/renderer_opengl/gl_driver.cpp +++ b/src/video_core/renderer_opengl/gl_driver.cpp @@ -199,6 +199,15 @@ void Driver::FindBugs() { if (vendor == Vendor::Intel && !is_linux) { bugs |= DriverBug::BrokenClearTexture; } + + if (vendor == Vendor::ARM && gpu_model.find("Mali") != gpu_model.npos) { + constexpr GLint MIN_TEXTURE_BUFFER_SIZE = static_cast((1 << 16)); + GLint max_texel_buffer_size; + glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_texel_buffer_size); + if (max_texel_buffer_size == MIN_TEXTURE_BUFFER_SIZE) { + bugs |= DriverBug::SlowTextureBufferWithBigSize; + } + } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_driver.h b/src/video_core/renderer_opengl/gl_driver.h index 2d67c3dc9..3801babad 100644 --- a/src/video_core/renderer_opengl/gl_driver.h +++ b/src/video_core/renderer_opengl/gl_driver.h @@ -36,6 +36,9 @@ enum class DriverBug { BrokenTextureView = 1 << 2, // On Haswell and Broadwell Intel drivers glClearTexSubImage produces a black screen BrokenClearTexture = 1 << 3, + // On some Mali GPUs, the texture buffer size is small and has reduced performance + // if the buffer is close to the maximum texture size + SlowTextureBufferWithBigSize = 1 << 4, }; /** diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0487905de..3a9dc7831 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -63,12 +63,19 @@ GLenum MakeAttributeType(Pica::PipelineRegs::VertexAttributeFormat format) { return GL_UNSIGNED_BYTE; } -[[nodiscard]] GLsizeiptr TextureBufferSize() { +[[nodiscard]] GLsizeiptr TextureBufferSize(const Driver& driver, bool is_lf) { // Use the smallest texel size from the texel views // which corresponds to GL_RG32F GLint max_texel_buffer_size; glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_texel_buffer_size); - return std::min(max_texel_buffer_size * 8ULL, TEXTURE_BUFFER_SIZE); + GLsizeiptr candidate = std::min(max_texel_buffer_size * 8ULL, TEXTURE_BUFFER_SIZE); + + if (driver.HasBug(DriverBug::SlowTextureBufferWithBigSize) && !is_lf) { + constexpr GLsizeiptr FIXUP_TEXTURE_BUFFER_SIZE = static_cast(1 << 14); // 16384 + return FIXUP_TEXTURE_BUFFER_SIZE; + } + + return candidate; } } // Anonymous namespace @@ -79,13 +86,11 @@ RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory, Pica::PicaCore& : VideoCore::RasterizerAccelerated{memory, pica}, driver{driver_}, shader_manager{renderer.GetRenderWindow(), driver, !driver.IsOpenGLES()}, runtime{driver, renderer}, res_cache{memory, custom_tex_manager, runtime, regs, renderer}, - texture_buffer_size{TextureBufferSize()}, vertex_buffer{driver, GL_ARRAY_BUFFER, - VERTEX_BUFFER_SIZE}, + vertex_buffer{driver, GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE}, uniform_buffer{driver, GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE}, index_buffer{driver, GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE}, - texture_buffer{driver, GL_TEXTURE_BUFFER, texture_buffer_size}, texture_lf_buffer{ - driver, GL_TEXTURE_BUFFER, - texture_buffer_size} { + texture_buffer{driver, GL_TEXTURE_BUFFER, TextureBufferSize(driver, false)}, + texture_lf_buffer{driver, GL_TEXTURE_BUFFER, TextureBufferSize(driver, true)} { // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 state.clip_distance[0] = true; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 3fe5e8dde..b63b34e86 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -145,7 +145,6 @@ private: OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw std::array hw_vao_enabled_attributes{}; - GLsizeiptr texture_buffer_size; OGLStreamBuffer vertex_buffer; OGLStreamBuffer uniform_buffer; OGLStreamBuffer index_buffer;