From a41345127f697515ceb65ed349032e9ed3abc7f9 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Thu, 21 Jul 2022 20:07:23 -0500 Subject: [PATCH] VideoBackends:Metal: Remove unified memory config Not worth the extra code --- .../VideoBackends/Metal/MTLBoundingBox.mm | 35 +---- .../VideoBackends/Metal/MTLStateTracker.h | 19 +-- .../VideoBackends/Metal/MTLStateTracker.mm | 120 ++---------------- Source/Core/VideoBackends/Metal/MTLTexture.mm | 3 +- Source/Core/VideoBackends/Metal/MTLUtil.h | 1 - Source/Core/VideoBackends/Metal/MTLUtil.mm | 8 -- 6 files changed, 21 insertions(+), 165 deletions(-) diff --git a/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm b/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm index 89c446b040..4c1a6ebd48 100644 --- a/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm +++ b/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm @@ -16,28 +16,16 @@ Metal::BoundingBox::~BoundingBox() bool Metal::BoundingBox::Initialize() { - const MTLResourceOptions gpu_storage_mode = - g_features.unified_memory ? MTLResourceStorageModeShared : MTLResourceStorageModePrivate; - const MTLResourceOptions gpu_options = gpu_storage_mode | MTLResourceHazardTrackingModeUntracked; + const MTLResourceOptions gpu_options = + MTLResourceStorageModeShared | MTLResourceHazardTrackingModeUntracked; const id dev = g_device; m_upload_fence = MRCTransfer([dev newFence]); [m_upload_fence setLabel:@"BBox Upload Fence"]; m_download_fence = MRCTransfer([dev newFence]); [m_download_fence setLabel:@"BBox Download Fence"]; m_gpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE options:gpu_options]); - if (g_features.unified_memory) - { - [m_gpu_buffer setLabel:@"BBox Buffer"]; - m_cpu_buffer_ptr = static_cast([m_gpu_buffer contents]); - } - else - { - m_cpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE - options:MTLResourceStorageModeShared]); - m_cpu_buffer_ptr = static_cast([m_cpu_buffer contents]); - [m_gpu_buffer setLabel:@"BBox GPU Buffer"]; - [m_cpu_buffer setLabel:@"BBox CPU Buffer"]; - } + [m_gpu_buffer setLabel:@"BBox Buffer"]; + m_cpu_buffer_ptr = static_cast([m_gpu_buffer contents]); g_state_tracker->SetBBoxBuffer(m_gpu_buffer, m_upload_fence, m_download_fence); return true; } @@ -47,18 +35,6 @@ std::vector Metal::BoundingBox::Read(u32 index, u32 length) @autoreleasepool { g_state_tracker->EndRenderPass(); - if (!g_features.unified_memory) - { - id download = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder]; - [download setLabel:@"BBox Download"]; - [download waitForFence:m_download_fence]; - [download copyFromBuffer:m_gpu_buffer - sourceOffset:0 - toBuffer:m_cpu_buffer - destinationOffset:0 - size:BUFFER_SIZE]; - [download endEncoding]; - } g_state_tracker->FlushEncoders(); g_state_tracker->WaitForFlushedEncoders(); return std::vector(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length); @@ -68,8 +44,7 @@ std::vector Metal::BoundingBox::Read(u32 index, u32 length) void Metal::BoundingBox::Write(u32 index, const std::vector& values) { const u32 size = values.size() * sizeof(BBoxType); - if (g_features.unified_memory && !g_state_tracker->HasUnflushedData() && - !g_state_tracker->GPUBusy()) + if (!g_state_tracker->HasUnflushedData() && !g_state_tracker->GPUBusy()) { // We can just write directly to the buffer! memcpy(m_cpu_buffer_ptr + index, values.data(), size); diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.h b/Source/Core/VideoBackends/Metal/MTLStateTracker.h index 1807f345e4..3e7bac832d 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.h +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.h @@ -34,6 +34,7 @@ public: Uniform, Vertex, Index, + TextureData, Texels, Last = Texels }; @@ -105,7 +106,6 @@ public: { return (amt + static_cast(align)) & ~static_cast(align); } - Map AllocateForTextureUpload(size_t amt); Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align) { Preallocate(buffer_idx, amt); @@ -119,7 +119,6 @@ public: static_cast(align)) == 0); return CommitPreallocation(buffer_idx, Align(amt, align)); } - id GetUploadEncoder(); id GetTextureUploadEncoder(); id GetRenderCmdBuf(); @@ -143,28 +142,18 @@ private: void Reset(size_t new_size); }; - struct CPUBuffer + struct Buffer { UsageTracker usage; MRCOwned> mtlbuffer; void* buffer = nullptr; }; - struct BufferPair - { - UsageTracker usage; - MRCOwned> cpubuffer; - MRCOwned> gpubuffer; - void* buffer = nullptr; - size_t last_upload = 0; - }; - struct Backref; struct PerfQueryTracker; std::shared_ptr m_backref; std::vector> m_perf_query_tracker_cache; - MRCOwned> m_fence; MRCOwned> m_upload_cmdbuf; MRCOwned> m_upload_encoder; MRCOwned> m_texture_upload_cmdbuf; @@ -176,8 +165,7 @@ private: MRCOwned m_render_pass_desc[3]; MRCOwned m_resolve_pass_desc; Framebuffer* m_current_framebuffer; - CPUBuffer m_texture_upload_buffer; - BufferPair m_upload_buffers[static_cast(UploadBuffer::Last) + 1]; + Buffer m_upload_buffers[static_cast(UploadBuffer::Last) + 1]; u64 m_current_draw = 1; std::atomic m_last_finished_draw{0}; @@ -264,7 +252,6 @@ private: std::shared_ptr NewPerfQueryTracker(); void SetSamplerForce(u32 idx, const SamplerState& sampler); - void Sync(BufferPair& buffer); Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt); void CheckViewport(); void CheckScissor(); diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm index b221644640..3ab6224f83 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm @@ -44,11 +44,12 @@ static NSString* GetName(Metal::StateTracker::UploadBuffer buffer) // clang-format off switch (buffer) { - case Metal::StateTracker::UploadBuffer::Texels: return @"Texels"; - case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices"; - case Metal::StateTracker::UploadBuffer::Index: return @"Indices"; - case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms"; - case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload"; + case Metal::StateTracker::UploadBuffer::TextureData: return @"Texture Data"; + case Metal::StateTracker::UploadBuffer::Texels: return @"Texels"; + case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices"; + case Metal::StateTracker::UploadBuffer::Index: return @"Indices"; + case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms"; + case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload"; } // clang-format on } @@ -103,7 +104,6 @@ void Metal::StateTracker::UsageTracker::Reset(size_t new_size) Metal::StateTracker::StateTracker() : m_backref(std::make_shared(this)) { m_flags.should_apply_label = true; - m_fence = MRCTransfer([g_device newFence]); for (MRCOwned& rpdesc : m_render_pass_desc) { rpdesc = MRCTransfer([MTLRenderPassDescriptor new]); @@ -140,10 +140,9 @@ Metal::StateTracker::~StateTracker() // MARK: BufferPair Ops -Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t amt) +std::pair Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt) { - amt = (amt + 15) & ~15ull; - CPUBuffer& buffer = m_texture_upload_buffer; + Buffer& buffer = m_upload_buffers[static_cast(buffer_idx)]; u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); if (__builtin_expect(needs_new, false)) @@ -155,61 +154,11 @@ Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t am MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); - [buffer.mtlbuffer setLabel:@"Texture Upload Buffer"]; + [buffer.mtlbuffer setLabel:GetName(buffer_idx)]; ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)"); buffer.buffer = [buffer.mtlbuffer contents]; buffer.usage.Reset(newsize); } - - size_t pos = buffer.usage.Allocate(m_current_draw, amt); - - Map ret = {buffer.mtlbuffer, pos, reinterpret_cast(buffer.buffer) + pos}; - DEBUG_ASSERT(pos <= buffer.usage.Size() && - "Previous code should have guaranteed there was enough space"); - return ret; -} - -std::pair Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt) -{ - BufferPair& buffer = m_upload_buffers[static_cast(buffer_idx)]; - u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); - size_t base_pos = buffer.usage.Pos(); - bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); - bool needs_upload = needs_new || buffer.usage.Pos() == 0; - if (!g_features.unified_memory && needs_upload) - { - if (base_pos != buffer.last_upload) - { - id encoder = GetUploadEncoder(); - [encoder copyFromBuffer:buffer.cpubuffer - sourceOffset:buffer.last_upload - toBuffer:buffer.gpubuffer - destinationOffset:buffer.last_upload - size:base_pos - buffer.last_upload]; - } - buffer.last_upload = 0; - } - if (__builtin_expect(needs_new, false)) - { - // Orphan buffer - size_t newsize = std::max(buffer.usage.Size() * 2, 4096); - while (newsize < amt) - newsize *= 2; - MTLResourceOptions options = - MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; - buffer.cpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); - [buffer.cpubuffer setLabel:GetName(buffer_idx)]; - ASSERT_MSG(VIDEO, buffer.cpubuffer, "Failed to allocate MTLBuffer (out of memory?)"); - buffer.buffer = [buffer.cpubuffer contents]; - buffer.usage.Reset(newsize); - if (!g_features.unified_memory) - { - options = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked; - buffer.gpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); - [buffer.gpubuffer setLabel:GetName(buffer_idx)]; - ASSERT_MSG(VIDEO, buffer.gpubuffer, "Failed to allocate MTLBuffer (out of memory?)"); - } - } size_t pos = buffer.usage.Pos(); return std::make_pair(reinterpret_cast(buffer.buffer) + pos, pos); } @@ -217,46 +166,17 @@ std::pair Metal::StateTracker::Preallocate(UploadBuffer buffer_id Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx, size_t amt) { - BufferPair& buffer = m_upload_buffers[static_cast(buffer_idx)]; + Buffer& buffer = m_upload_buffers[static_cast(buffer_idx)]; size_t pos = buffer.usage.Allocate(m_current_draw, amt); Map ret = {nil, pos, reinterpret_cast(buffer.buffer) + pos}; - ret.gpu_buffer = g_features.unified_memory ? buffer.cpubuffer : buffer.gpubuffer; + ret.gpu_buffer = buffer.mtlbuffer; DEBUG_ASSERT(pos <= buffer.usage.Size() && "Previous code should have guaranteed there was enough space"); return ret; } -void Metal::StateTracker::Sync(BufferPair& buffer) -{ - if (g_features.unified_memory || buffer.usage.Pos() == buffer.last_upload) - return; - - id encoder = GetUploadEncoder(); - [encoder copyFromBuffer:buffer.cpubuffer - sourceOffset:buffer.last_upload - toBuffer:buffer.gpubuffer - destinationOffset:buffer.last_upload - size:buffer.usage.Pos() - buffer.last_upload]; - buffer.last_upload = buffer.usage.Pos(); -} - // MARK: Render Pass / Encoder Management -id Metal::StateTracker::GetUploadEncoder() -{ - if (!m_upload_cmdbuf) - { - @autoreleasepool - { - m_upload_cmdbuf = MRCRetain([g_queue commandBuffer]); - [m_upload_cmdbuf setLabel:@"Vertex Upload"]; - m_upload_encoder = MRCRetain([m_upload_cmdbuf blitCommandEncoder]); - [m_upload_encoder setLabel:@"Vertex Upload"]; - } - } - return m_upload_encoder; -} - id Metal::StateTracker::GetTextureUploadEncoder() { if (!m_texture_upload_cmdbuf) @@ -349,8 +269,6 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor) MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]); if (m_current_perf_query) [descriptor setVisibilityResultBuffer:nil]; - if (!g_features.unified_memory) - [m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex]; AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment(); if (!attachment) attachment = m_current_framebuffer->GetDepthAttachment(); @@ -380,8 +298,6 @@ void Metal::StateTracker::BeginComputePass() EndRenderPass(); m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]); [m_current_compute_encoder setLabel:@"Compute"]; - if (!g_features.unified_memory) - [m_current_compute_encoder waitForFence:m_fence]; m_flags.NewEncoder(); m_dirty_samplers = 0xff; m_dirty_textures = 0xff; @@ -409,20 +325,6 @@ void Metal::StateTracker::FlushEncoders() if (!m_current_render_cmdbuf) return; EndRenderPass(); - for (int i = 0; i <= static_cast(UploadBuffer::Last); ++i) - Sync(m_upload_buffers[i]); - if (g_features.unified_memory) - { - ASSERT(!m_upload_cmdbuf && "Should never be used!"); - } - else if (m_upload_cmdbuf) - { - [m_upload_encoder updateFence:m_fence]; - [m_upload_encoder endEncoding]; - [m_upload_cmdbuf commit]; - m_upload_encoder = nullptr; - m_upload_cmdbuf = nullptr; - } if (m_texture_upload_cmdbuf) { [m_texture_upload_encoder endEncoding]; diff --git a/Source/Core/VideoBackends/Metal/MTLTexture.mm b/Source/Core/VideoBackends/Metal/MTLTexture.mm index 67c114caa7..fd0358e10e 100644 --- a/Source/Core/VideoBackends/Metal/MTLTexture.mm +++ b/Source/Core/VideoBackends/Metal/MTLTexture.mm @@ -59,7 +59,8 @@ void Metal::Texture::Load(u32 level, u32 width, u32 height, u32 row_length, // const u32 num_rows = Common::AlignUp(height, block_size) / block_size; const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length); const u32 upload_size = source_pitch * num_rows; - StateTracker::Map map = g_state_tracker->AllocateForTextureUpload(upload_size); + StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::TextureData, + upload_size, StateTracker::AlignMask::Other); memcpy(map.cpu_buffer, buffer, upload_size); id encoder = g_state_tracker->GetTextureUploadEncoder(); [encoder copyFromBuffer:map.gpu_buffer diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.h b/Source/Core/VideoBackends/Metal/MTLUtil.h index 385f508648..dfedecd7c6 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.h +++ b/Source/Core/VideoBackends/Metal/MTLUtil.h @@ -16,7 +16,6 @@ namespace Metal { struct DeviceFeatures { - bool unified_memory; bool subgroup_ops; }; diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index 85ac96b342..50916a38e3 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -211,14 +211,6 @@ void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id config->backend_info.AAModes.push_back(i); } - // The unified memory path (using shared buffers for everything) performs noticeably better with - // bbox even on discrete GPUs (20fps vs 15fps in Super Paper Mario elevator), so default to that. - // The separate buffer + manual upload path is left available for testing and comparison. - if (char* env = getenv("MTL_UNIFIED_MEMORY")) - g_features.unified_memory = env[0] == '1' || env[0] == 'y' || env[0] == 'Y'; - else - g_features.unified_memory = true; - g_features.subgroup_ops = false; if (@available(macOS 10.15, iOS 13, *)) {