From 6f3573dda87d2796a031fb7afc5ac1cecbb7ee2f Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 6 Mar 2016 18:48:35 +1000 Subject: [PATCH] D3D12: Implement XFB encoding/decoding (support Real XFB) --- Source/Core/VideoBackends/D3D12/D3D12.vcxproj | 2 - .../VideoBackends/D3D12/D3D12.vcxproj.filters | 6 - Source/Core/VideoBackends/D3D12/D3DUtil.cpp | 5 - Source/Core/VideoBackends/D3D12/D3DUtil.h | 6 +- .../D3D12/FramebufferManager.cpp | 14 +- Source/Core/VideoBackends/D3D12/Render.cpp | 66 +++---- .../VideoBackends/D3D12/StaticShaderCache.cpp | 116 +++++++++++- .../VideoBackends/D3D12/StaticShaderCache.h | 2 + .../Core/VideoBackends/D3D12/Television.cpp | 45 ----- Source/Core/VideoBackends/D3D12/Television.h | 37 ---- .../Core/VideoBackends/D3D12/XFBEncoder.cpp | 167 ++++++++++++++++-- Source/Core/VideoBackends/D3D12/XFBEncoder.h | 26 ++- Source/Core/VideoBackends/D3D12/main.cpp | 3 + Source/Core/VideoBackends/OGL/Render.cpp | 5 + .../VideoCommon/FramebufferManagerBase.cpp | 6 +- 15 files changed, 342 insertions(+), 164 deletions(-) delete mode 100644 Source/Core/VideoBackends/D3D12/Television.cpp delete mode 100644 Source/Core/VideoBackends/D3D12/Television.h diff --git a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj index d0f3787378..ff026f977c 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj +++ b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj @@ -67,7 +67,6 @@ - @@ -91,7 +90,6 @@ - diff --git a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters index 83038e5a7b..f9b7d8e624 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters +++ b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters @@ -39,9 +39,6 @@ Render - - Render - Render @@ -105,9 +102,6 @@ Render - - Render - Render diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp index bad1d7688c..bf58b084b3 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp @@ -26,11 +26,6 @@ namespace DX12 namespace D3D { -unsigned int AlignValue(unsigned int value, unsigned int alignment) -{ - return (value + (alignment - 1)) & ~(alignment - 1); -} - void ResourceBarrier(ID3D12GraphicsCommandList* command_list, ID3D12Resource* resource, D3D12_RESOURCE_STATES state_before, D3D12_RESOURCE_STATES state_after, UINT subresource) { if (state_before == state_after) diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.h b/Source/Core/VideoBackends/D3D12/D3DUtil.h index fac639698a..961a583adc 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.h +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.h @@ -22,7 +22,11 @@ extern StateCache gx_state_cache; namespace D3D { -unsigned int AlignValue(unsigned int value, unsigned int alignment); +constexpr unsigned int AlignValue(unsigned int value, unsigned int alignment) +{ + return (value + (alignment - 1)) & ~(alignment - 1); +} + void ResourceBarrier(ID3D12GraphicsCommandList* command_list, ID3D12Resource* resource, D3D12_RESOURCE_STATES state_before, D3D12_RESOURCE_STATES state_after, UINT subresource); // Font creation flags diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp index 49fe92fa57..6f8698a76c 100644 --- a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp @@ -15,8 +15,6 @@ namespace DX12 { -static XFBEncoder s_xfbEncoder; - FramebufferManager::Efb FramebufferManager::m_efb; unsigned int FramebufferManager::m_target_width; unsigned int FramebufferManager::m_target_height; @@ -133,14 +131,10 @@ FramebufferManager::FramebufferManager() } InitializeEFBAccessCopies(); - - s_xfbEncoder.Init(); } FramebufferManager::~FramebufferManager() { - s_xfbEncoder.Shutdown(); - DestroyEFBAccessCopies(); SAFE_RELEASE(m_efb.color_tex); @@ -153,7 +147,9 @@ FramebufferManager::~FramebufferManager() void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc, float gamma) { u8* dst = Memory::GetPointer(xfbAddr); - s_xfbEncoder.Encode(dst, fbStride/2, fbHeight, sourceRc, gamma); + D3DTexture2D* src_texture = GetResolvedEFBColorTexture(); + TargetRectangle scaled_rect = g_renderer->ConvertEFBRectangle(sourceRc); + g_xfb_encoder->EncodeTextureToRam(dst, fbStride, fbHeight, src_texture, scaled_rect, m_target_width, m_target_height, gamma); } std::unique_ptr FramebufferManager::CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) @@ -412,8 +408,8 @@ void FramebufferManager::DestroyEFBAccessCopies() void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) { - // DX12's XFB decoder does not use this function. - // YUYV data is decoded in Render::Swap. + u8* src = Memory::GetPointer(xfbAddr); + g_xfb_encoder->DecodeToTexture(m_tex, src, fbWidth, fbHeight); } void XFBSource::CopyEFB(float gamma) diff --git a/Source/Core/VideoBackends/D3D12/Render.cpp b/Source/Core/VideoBackends/D3D12/Render.cpp index 2b98788a04..a4f17a84b3 100644 --- a/Source/Core/VideoBackends/D3D12/Render.cpp +++ b/Source/Core/VideoBackends/D3D12/Render.cpp @@ -29,7 +29,6 @@ #include "VideoBackends/D3D12/ShaderCache.h" #include "VideoBackends/D3D12/ShaderConstantsManager.h" #include "VideoBackends/D3D12/StaticShaderCache.h" -#include "VideoBackends/D3D12/Television.h" #include "VideoBackends/D3D12/TextureCache.h" #include "VideoCommon/AVIDump.h" @@ -50,8 +49,6 @@ static u32 s_last_multisamples = 1; static bool s_last_stereo_mode = false; static bool s_last_xfb_mode = false; -static Television s_television; - enum CLEAR_BLEND_DESC { CLEAR_BLEND_DESC_ALL_CHANNELS_ENABLED = 0, @@ -104,8 +101,6 @@ StateCache gx_state_cache; static void SetupDeviceObjects() { - s_television.Init(); - g_framebuffer_manager = std::make_unique(); D3D12_DEPTH_STENCIL_DESC depth_desc; @@ -175,8 +170,6 @@ static void TeardownDeviceObjects() s_screenshot_texture = nullptr; } - s_television.Shutdown(); - gx_state_cache.Clear(); } @@ -750,15 +743,7 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height // activate linear filtering for the buffer copies D3D::SetLinearCopySampler(); - if (g_ActiveConfig.bUseXFB && g_ActiveConfig.bUseRealXFB) - { - // EXISTINGD3D11TODO: Television should be used to render Virtual XFB mode as well. - D3D::SetViewportAndScissor(target_rc.left, target_rc.top, target_rc.GetWidth(), target_rc.GetHeight()); - - s_television.Submit(xfb_addr, fb_stride, fb_width, fb_height); - s_television.Render(); - } - else if (g_ActiveConfig.bUseXFB) + if (g_ActiveConfig.bUseXFB) { const XFBSource* xfb_source; @@ -768,33 +753,40 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height xfb_source = static_cast(xfb_source_list[i]); TargetRectangle drawRc; - - // use virtual xfb with offset - int xfb_height = xfb_source->srcHeight; - int xfb_width = xfb_source->srcWidth; - int hOffset = (static_cast(xfb_source->srcAddr) - static_cast(xfb_addr)) / (static_cast(fb_stride) * 2); - - drawRc.top = target_rc.top + hOffset * target_rc.GetHeight() / static_cast(fb_height); - drawRc.bottom = target_rc.top + (hOffset + xfb_height) * target_rc.GetHeight() / static_cast(fb_height); - drawRc.left = target_rc.left + (target_rc.GetWidth() - xfb_width * target_rc.GetWidth() / static_cast(fb_stride)) / 2; - drawRc.right = target_rc.left + (target_rc.GetWidth() + xfb_width * target_rc.GetWidth() / static_cast(fb_stride)) / 2; - - // The following code disables auto stretch. Kept for reference. - // scale draw area for a 1 to 1 pixel mapping with the draw target - //float vScale = static_cast(fbHeight) / static_cast(s_backbuffer_height); - //float hScale = static_cast(fbWidth) / static_cast(s_backbuffer_width); - //drawRc.top *= vScale; - //drawRc.bottom *= vScale; - //drawRc.left *= hScale; - //drawRc.right *= hScale; - TargetRectangle source_rc; source_rc.left = xfb_source->sourceRc.left; source_rc.top = xfb_source->sourceRc.top; source_rc.right = xfb_source->sourceRc.right; source_rc.bottom = xfb_source->sourceRc.bottom; - source_rc.right -= Renderer::EFBToScaledX(fb_stride - fb_width); + // use virtual xfb with offset + int xfb_height = xfb_source->srcHeight; + int xfb_width = xfb_source->srcWidth; + int hOffset = (static_cast(xfb_source->srcAddr) - static_cast(xfb_addr)) / (static_cast(fb_stride) * 2); + + if (g_ActiveConfig.bUseRealXFB) + { + drawRc = target_rc; + source_rc.right -= fb_stride - fb_width; + } + else + { + drawRc.top = target_rc.top + hOffset * target_rc.GetHeight() / static_cast(fb_height); + drawRc.bottom = target_rc.top + (hOffset + xfb_height) * target_rc.GetHeight() / static_cast(fb_height); + drawRc.left = target_rc.left + (target_rc.GetWidth() - xfb_width * target_rc.GetWidth() / static_cast(fb_stride)) / 2; + drawRc.right = target_rc.left + (target_rc.GetWidth() + xfb_width * target_rc.GetWidth() / static_cast(fb_stride)) / 2; + + // The following code disables auto stretch. Kept for reference. + // scale draw area for a 1 to 1 pixel mapping with the draw target + //float vScale = static_cast(fbHeight) / static_cast(s_backbuffer_height); + //float hScale = static_cast(fbWidth) / static_cast(s_backbuffer_width); + //drawRc.top *= vScale; + //drawRc.bottom *= vScale; + //drawRc.left *= hScale; + //drawRc.right *= hScale; + + source_rc.right -= Renderer::EFBToScaledX(fb_stride - fb_width); + } BlitScreen(source_rc, drawRc, xfb_source->m_tex, xfb_source->texWidth, xfb_source->texHeight, gamma); } diff --git a/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp b/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp index e2bd65aac9..38de296e87 100644 --- a/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp @@ -18,6 +18,8 @@ static ID3DBlob* s_depth_matrix_program_blob[2] = {}; static ID3DBlob* s_depth_resolve_to_color_program_blob = {}; static ID3DBlob* s_clear_program_blob = {}; static ID3DBlob* s_anaglyph_program_blob = {}; +static ID3DBlob* s_xfb_encode_shader_blob = {}; +static ID3DBlob* s_xfb_decode_shader_blob = {}; static ID3DBlob* s_rgba6_to_rgb8_program_blob[2] = {}; static ID3DBlob* s_rgb8_to_rgba6_program_blob[2] = {}; @@ -411,6 +413,93 @@ static constexpr const char s_copy_geometry_shader_hlsl[] = { "}\n" }; +static const char s_xfb_encode_shader_hlsl[] = R"( + +Texture2DArray tex0 : register(t0); +SamplerState samp0 : register(s0); + +cbuffer EncodeParams : register(b0) +{ + float4 srcRect; + float2 texelSize; +} + +// GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see +// +static const float3x4 RGB_TO_YCBCR = float3x4( + 0.257, 0.504, 0.098, 16.0/255.0, + -0.148, -0.291, 0.439, 128.0/255.0, + 0.439, -0.368, -0.071, 128.0/255.0 +); + +void main( + out float4 ocol0 : SV_Target, + in float4 pos : SV_Position, + in float3 uv0 : TEXCOORD0, + in float gamma : TEXCOORD1) +{ + // Load three input pixels, emulate clamp sampler by clamping to the source rectangle. + // Subtract 0.5 from the x coordinate because we're doubling the width, and want the pixel center shifted back to 0.5. + // The native resolution is used as a reference here so bilinear filtering works as expected. + float2 baseCoords = lerp(srcRect.xy, srcRect.zw, float2(uv0.x - 0.5 * texelSize.x, uv0.y)); + float3 sampleL = tex0.Sample(samp0, float3(max(srcRect.xy, baseCoords - float2(texelSize.x, 0)), 0)).rgb; + float3 sampleM = tex0.Sample(samp0, float3(baseCoords, 0)).rgb; + float3 sampleR = tex0.Sample(samp0, float3(min(srcRect.zw, baseCoords + float2(texelSize.x, 0)), 0)).rgb; + + // Gamma correction (gamma is already rcp(gamma)) + // abs() here because the HLSL compiler throws a warning otherwise. + sampleL = pow(abs(sampleL), gamma); + sampleM = pow(abs(sampleM), gamma); + sampleR = pow(abs(sampleR), gamma); + + // RGB -> YUV + float3 yuvL = mul(RGB_TO_YCBCR, float4(sampleL,1)); + float3 yuvM = mul(RGB_TO_YCBCR, float4(sampleM,1)); + float3 yuvR = mul(RGB_TO_YCBCR, float4(sampleR,1)); + + // The Y components correspond to two EFB pixels, while the U and V are + // made from a blend of three EFB pixels. + float y0 = yuvM.r; + float y1 = yuvR.r; + float u0 = 0.25*yuvL.g + 0.5*yuvM.g + 0.25*yuvR.g; + float v0 = 0.25*yuvL.b + 0.5*yuvM.b + 0.25*yuvR.b; + ocol0 = float4(y0, u0, y1, v0); +} + +)"; + +static const char s_xfb_decode_shader_hlsl[] = R"( + +Texture2DArray tex0 : register(t0); + +static const float3x3 YCBCR_TO_RGB = float3x3( + 1.164, 0.000, 1.596, + 1.164, -0.392, -0.813, + 1.164, 2.017, 0.000 +); + +void main( + out float4 ocol0 : SV_Target, + in float4 pos : SV_Position, + in float3 uv0 : TEXCOORD0) +{ + // Divide coordinates by 2 due to half-width YUYV texure. + int2 ipos = int2(pos.xy); + int2 texpos = int2(ipos.x >> 1, ipos.y); + float4 yuyv = tex0.Load(int4(texpos, 0, 0)); + + // Select U for even pixels, V for odd pixels. + float y = lerp(yuyv.r, yuyv.b, float(ipos.x & 1)); + + // Recover RGB components + float3 yuv_601_sub = float3(y, yuyv.ga) - float3(16.0/255.0, 128.0/255.0, 128.0/255.0); + float3 rgb_601 = mul(YCBCR_TO_RGB, yuv_601_sub); + + ocol0 = float4(rgb_601, 1); +} + +)"; + D3D12_SHADER_BYTECODE StaticShaderCache::GetReinterpRGBA6ToRGB8PixelShader(bool multisampled) { D3D12_SHADER_BYTECODE bytecode = {}; @@ -625,6 +714,28 @@ D3D12_SHADER_BYTECODE StaticShaderCache::GetCopyGeometryShader() return bytecode; } +D3D12_SHADER_BYTECODE StaticShaderCache::GetXFBEncodePixelShader() +{ + D3D12_SHADER_BYTECODE bytecode = + { + s_xfb_encode_shader_blob->GetBufferPointer(), + s_xfb_encode_shader_blob->GetBufferSize() + }; + + return bytecode; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetXFBDecodePixelShader() +{ + D3D12_SHADER_BYTECODE bytecode = + { + s_xfb_decode_shader_blob->GetBufferPointer(), + s_xfb_decode_shader_blob->GetBufferSize() + }; + + return bytecode; +} + void StaticShaderCache::Init() { // Compile static pixel shaders @@ -633,6 +744,8 @@ void StaticShaderCache::Init() D3D::CompilePixelShader(s_color_copy_program_hlsl, &s_color_copy_program_blob[0]); D3D::CompilePixelShader(s_color_matrix_program_hlsl, &s_color_matrix_program_blob[0]); D3D::CompilePixelShader(s_depth_matrix_program_hlsl, &s_depth_matrix_program_blob[0]); + D3D::CompilePixelShader(s_xfb_encode_shader_hlsl, &s_xfb_encode_shader_blob); + D3D::CompilePixelShader(s_xfb_decode_shader_hlsl, &s_xfb_decode_shader_blob); // Compile static vertex shaders D3D::CompileVertexShader(s_simple_vertex_shader_hlsl, &s_simple_vertex_shader_blob); @@ -657,7 +770,8 @@ void StaticShaderCache::InvalidateMSAAShaders() void StaticShaderCache::Shutdown() { // Free pixel shader blobs - + SAFE_RELEASE(s_xfb_decode_shader_blob); + SAFE_RELEASE(s_xfb_encode_shader_blob); SAFE_RELEASE(s_clear_program_blob); SAFE_RELEASE(s_anaglyph_program_blob); SAFE_RELEASE(s_depth_resolve_to_color_program_blob); diff --git a/Source/Core/VideoBackends/D3D12/StaticShaderCache.h b/Source/Core/VideoBackends/D3D12/StaticShaderCache.h index 4b9f6959a8..492363f0a8 100644 --- a/Source/Core/VideoBackends/D3D12/StaticShaderCache.h +++ b/Source/Core/VideoBackends/D3D12/StaticShaderCache.h @@ -23,6 +23,8 @@ public: static D3D12_SHADER_BYTECODE GetAnaglyphPixelShader(); static D3D12_SHADER_BYTECODE GetReinterpRGBA6ToRGB8PixelShader(bool multisampled); static D3D12_SHADER_BYTECODE GetReinterpRGB8ToRGBA6PixelShader(bool multisampled); + static D3D12_SHADER_BYTECODE GetXFBEncodePixelShader(); + static D3D12_SHADER_BYTECODE GetXFBDecodePixelShader(); // Vertex shaders static D3D12_SHADER_BYTECODE GetSimpleVertexShader(); diff --git a/Source/Core/VideoBackends/D3D12/Television.cpp b/Source/Core/VideoBackends/D3D12/Television.cpp deleted file mode 100644 index 548859a1ac..0000000000 --- a/Source/Core/VideoBackends/D3D12/Television.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Core/HW/Memmap.h" -#include "VideoBackends/D3D12/D3DBase.h" -#include "VideoBackends/D3D12/D3DShader.h" -#include "VideoBackends/D3D12/D3DState.h" -#include "VideoBackends/D3D12/D3DUtil.h" -#include "VideoBackends/D3D12/Television.h" -#include "VideoCommon/VideoConfig.h" - -// D3D12TODO: Add DX12 path for this file. - -namespace DX12 -{ - -Television::Television() -{ - // D3D12TODO: Add DX12 path for this file. -} - -void Television::Init() -{ - // D3D12TODO: Add DX12 path for this file. -} - -void Television::Shutdown() -{ - // D3D12TODO: Add DX12 path for this file. -} - -void Television::Submit(u32 xfb_address, u32 stride, u32 width, u32 height) -{ - // D3D12TODO: Add DX12 path for this file. -} - -void Television::Render() -{ - // D3D12TODO: Add DX12 path for this file. -} - -} diff --git a/Source/Core/VideoBackends/D3D12/Television.h b/Source/Core/VideoBackends/D3D12/Television.h deleted file mode 100644 index 1bfbbb7c7d..0000000000 --- a/Source/Core/VideoBackends/D3D12/Television.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "VideoCommon/VideoCommon.h" - -// D3D12TODO: Add DX12 path for this file. - -namespace DX12 -{ - -class Television final -{ - -public: - - Television(); - - void Init(); - void Shutdown(); - - // Submit video data to be drawn. This will change the current state of the - // TV. xfbAddr points to YUYV data stored in GameCube/Wii RAM, but the XFB - // may be virtualized when rendering so the RAM may not actually be read. - void Submit(u32 xfb_address, u32 stride, u32 width, u32 height); - - // Render the current state of the TV. - void Render(); - -private: - - -}; - -} diff --git a/Source/Core/VideoBackends/D3D12/XFBEncoder.cpp b/Source/Core/VideoBackends/D3D12/XFBEncoder.cpp index a2933ed268..f7147c0c04 100644 --- a/Source/Core/VideoBackends/D3D12/XFBEncoder.cpp +++ b/Source/Core/VideoBackends/D3D12/XFBEncoder.cpp @@ -1,4 +1,4 @@ -// Copyright 2011 Dolphin Emulator Project +// Copyright 2016 Dolphin Emulator Project // Licensed under GPLv2+ // Refer to the license.txt file included. @@ -6,33 +6,178 @@ #include "Common/MsgHandler.h" #include "Common/Logging/Log.h" #include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" #include "VideoBackends/D3D12/D3DShader.h" #include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DUtil.h" #include "VideoBackends/D3D12/FramebufferManager.h" #include "VideoBackends/D3D12/Render.h" +#include "VideoBackends/D3D12/StaticShaderCache.h" #include "VideoBackends/D3D12/XFBEncoder.h" -// D3D12TODO: Convert this file.. - namespace DX12 { +// YUYV data is packed into half-width RGBA, with Y values in (R,B) and UV in (G,A) +constexpr size_t XFB_TEXTURE_WIDTH = MAX_XFB_WIDTH / 2; +constexpr size_t XFB_TEXTURE_HEIGHT = MAX_XFB_HEIGHT; + +// Buffer enough space for 2 XFB buffers (our frame latency) +constexpr size_t XFB_UPLOAD_BUFFER_SIZE = D3D::AlignValue(XFB_TEXTURE_WIDTH * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) * XFB_TEXTURE_HEIGHT * 2; +constexpr size_t XFB_ENCODER_PARAMS_BUFFER_SIZE = 64 * 1024; + +std::unique_ptr g_xfb_encoder; + XFBEncoder::XFBEncoder() -{ } - -void XFBEncoder::Init() { - // D3D12TODO: Convert this file.. + ID3D12Resource* texture; + + CheckHR(D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, XFB_TEXTURE_WIDTH, XFB_TEXTURE_HEIGHT, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET), + D3D12_RESOURCE_STATE_RENDER_TARGET, + nullptr, + IID_PPV_ARGS(&texture))); + + m_yuyv_texture = new D3DTexture2D(texture, + (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET), + DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM); + SAFE_RELEASE(texture); + + CheckHR(D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(D3D::AlignValue(XFB_TEXTURE_WIDTH * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) * MAX_XFB_HEIGHT), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_readback_buffer))); + + m_upload_buffer = std::make_unique(XFB_UPLOAD_BUFFER_SIZE, XFB_UPLOAD_BUFFER_SIZE, nullptr); + m_encode_params_buffer = std::make_unique(XFB_ENCODER_PARAMS_BUFFER_SIZE, XFB_ENCODER_PARAMS_BUFFER_SIZE, nullptr); } -void XFBEncoder::Shutdown() +XFBEncoder::~XFBEncoder() { - // D3D12TODO: Convert this file.. + SAFE_RELEASE(m_yuyv_texture); + SAFE_RELEASE(m_readback_buffer); } -void XFBEncoder::Encode(u8* dst, u32 width, u32 height, const EFBRectangle& srcRect, float gamma) +void XFBEncoder::EncodeTextureToRam(u8* dst, u32 dst_pitch, u32 dst_height, + D3DTexture2D* src_texture, const TargetRectangle& src_rect, + u32 src_width, u32 src_height, float gamma) { - // D3D12TODO: Convert this file.. + // src_rect is in native coordinates + // dst_pitch is in words + u32 dst_width = dst_pitch / 2; + u32 dst_texture_width = dst_width / 2; + _assert_msg_(VIDEO, dst_width <= MAX_XFB_WIDTH && dst_height <= MAX_XFB_HEIGHT, "XFB destination does not exceed maximum size"); + + // Encode parameters constant buffer used by shader + struct EncodeParameters + { + float srcRect[4]; + float texelSize[2]; + float pad[2]; + }; + EncodeParameters parameters = + { + { + static_cast(src_rect.left) / static_cast(src_width), + static_cast(src_rect.top) / static_cast(src_height), + static_cast(src_rect.right) / static_cast(src_width), + static_cast(src_rect.bottom) / static_cast(src_height) + }, + { + 1.0f / EFB_WIDTH, + 1.0f / EFB_HEIGHT + }, + { + 0.0f, + 0.0f + } + }; + m_encode_params_buffer->AllocateSpaceInBuffer(sizeof(parameters), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + memcpy(m_encode_params_buffer->GetCPUAddressOfCurrentAllocation(), ¶meters, sizeof(parameters)); + + // Convert RGBA texture to YUYV intermediate texture. + // Performs downscaling through a linear filter. Probably not ideal, but it's not going to look perfect anyway. + CD3DX12_RECT src_texture_rect(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom); + D3D12_RESOURCE_STATES src_texture_state = src_texture->GetResourceUsageState(); + m_yuyv_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &m_yuyv_texture->GetRTV12(), FALSE, nullptr); + D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, m_encode_params_buffer->GetGPUAddressOfCurrentAllocation()); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true); + D3D::SetViewportAndScissor(0, 0, dst_texture_width, dst_height); + D3D::SetLinearCopySampler(); + D3D::DrawShadedTexQuad( + src_texture, &src_texture_rect, src_rect.GetWidth(), src_rect.GetHeight(), + StaticShaderCache::GetXFBEncodePixelShader(), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), + {}, gamma, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); + + src_texture->TransitionToResourceState(D3D::current_command_list, src_texture_state); + + // Copy from YUYV intermediate texture to readback buffer. It's likely the pitch here is going to be different to dst_pitch. + u32 readback_pitch = D3D::AlignValue(dst_width * sizeof(u16), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = { 0, { DXGI_FORMAT_R8G8B8A8_UNORM, dst_texture_width, dst_height, 1, readback_pitch } }; + CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_readback_buffer, dst_footprint); + CD3DX12_TEXTURE_COPY_LOCATION src_location(m_yuyv_texture->GetTex12(), 0); + CD3DX12_BOX src_box(0, 0, dst_texture_width, dst_height); + m_yuyv_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); + + // Wait until the GPU completes the copy. Resets back to known state automatically. + D3D::command_list_mgr->ExecuteQueuedWork(true); + + // Copy from the readback buffer to dst. + // Can't be done as one memcpy due to pitch difference. + void* readback_texture_map; + CheckHR(m_readback_buffer->Map(0, nullptr, &readback_texture_map)); + + for (u32 row = 0; row < dst_height; row++) + { + const u8* row_src = reinterpret_cast(readback_texture_map) + readback_pitch * row; + u8* row_dst = dst + dst_pitch * row; + memcpy(row_dst, row_src, std::min(dst_pitch, readback_pitch)); + } + + m_readback_buffer->Unmap(0, nullptr); +} + +void XFBEncoder::DecodeToTexture(D3DTexture2D* dst_texture, const u8* src, u32 src_width, u32 src_height) +{ + _assert_msg_(VIDEO, src_width <= MAX_XFB_WIDTH && src_height <= MAX_XFB_HEIGHT, "XFB source does not exceed maximum size"); + + // Copy to XFB upload buffer. Each row has to be done separately due to pitch differences. + u32 buffer_pitch = D3D::AlignValue(src_width / 2 * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + m_upload_buffer->AllocateSpaceInBuffer(buffer_pitch * src_height, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + for (u32 row = 0; row < src_height; row++) + { + const u8* row_src = src + (src_width * 2) * row; + u8* row_dst = reinterpret_cast(m_upload_buffer->GetCPUAddressOfCurrentAllocation()) + buffer_pitch * row; + memcpy(row_dst, row_src, src_width * 2); + } + + // Copy from upload buffer to intermediate YUYV texture. + D3D12_PLACED_SUBRESOURCE_FOOTPRINT src_footprint = { m_upload_buffer->GetOffsetOfCurrentAllocation(), { DXGI_FORMAT_R8G8B8A8_UNORM, src_width / 2, src_height, 1, buffer_pitch } }; + CD3DX12_TEXTURE_COPY_LOCATION src_location(m_upload_buffer->GetBuffer(), src_footprint); + CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_yuyv_texture->GetTex12(), 0); + CD3DX12_BOX src_box(0, 0, src_width / 2, src_height); + m_yuyv_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_DEST); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); + + // Convert YUYV texture to RGBA texture with pixel shader. + CD3DX12_RECT src_texture_rect(0, 0, src_width / 2, src_height); + dst_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &dst_texture->GetRTV12(), FALSE, nullptr); + D3D::SetViewportAndScissor(0, 0, src_width, src_height); + D3D::DrawShadedTexQuad( + m_yuyv_texture, &src_texture_rect, XFB_TEXTURE_WIDTH, XFB_TEXTURE_HEIGHT, + StaticShaderCache::GetXFBDecodePixelShader(), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), + {}, 1.0f, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); + + // XFB source textures are expected to be in shader resource state. + dst_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } } diff --git a/Source/Core/VideoBackends/D3D12/XFBEncoder.h b/Source/Core/VideoBackends/D3D12/XFBEncoder.h index 68d2cd3839..cb4f712cd6 100644 --- a/Source/Core/VideoBackends/D3D12/XFBEncoder.h +++ b/Source/Core/VideoBackends/D3D12/XFBEncoder.h @@ -1,28 +1,42 @@ -// Copyright 2011 Dolphin Emulator Project +// Copyright 2016 Dolphin Emulator Project // Licensed under GPLv2+ // Refer to the license.txt file included. #pragma once +#include +#include + +#include "VideoBackends/D3D12/D3DStreamBuffer.h" +#include "VideoBackends/D3D12/D3DTexture.h" #include "VideoCommon/VideoCommon.h" namespace DX12 { +class D3DTexture2D; + class XFBEncoder { - public: XFBEncoder(); + ~XFBEncoder(); - void Init(); - void Shutdown(); + void EncodeTextureToRam(u8* dst, u32 dst_pitch, u32 dst_height, + D3DTexture2D* src_texture, const TargetRectangle& src_rect, + u32 src_width, u32 src_height, float gamma); - void Encode(u8* dst, u32 width, u32 height, const EFBRectangle& src_rect, float gamma); + void DecodeToTexture(D3DTexture2D* dst_texture, const u8* src, u32 src_width, u32 src_height); private: - // D3D12TODO: Implement this class + D3DTexture2D* m_yuyv_texture; + ID3D12Resource* m_readback_buffer; + + std::unique_ptr m_upload_buffer; + std::unique_ptr m_encode_params_buffer; }; +extern std::unique_ptr g_xfb_encoder; + } diff --git a/Source/Core/VideoBackends/D3D12/main.cpp b/Source/Core/VideoBackends/D3D12/main.cpp index 1f75854a33..3cd04ba656 100644 --- a/Source/Core/VideoBackends/D3D12/main.cpp +++ b/Source/Core/VideoBackends/D3D12/main.cpp @@ -23,6 +23,7 @@ #include "VideoBackends/D3D12/TextureCache.h" #include "VideoBackends/D3D12/VertexManager.h" #include "VideoBackends/D3D12/VideoBackend.h" +#include "VideoBackends/D3D12/XFBEncoder.h" #include "VideoCommon/BPStructs.h" #include "VideoCommon/CommandProcessor.h" @@ -179,6 +180,7 @@ void VideoBackend::Video_Prepare() g_texture_cache = std::make_unique(); g_vertex_manager = std::make_unique(); g_perf_query = std::make_unique(); + g_xfb_encoder = std::make_unique(); ShaderCache::Init(); ShaderConstantsManager::Init(); StaticShaderCache::Init(); @@ -228,6 +230,7 @@ void VideoBackend::Shutdown() StaticShaderCache::Shutdown(); BBox::Shutdown(); + g_xfb_encoder.reset(); g_perf_query.reset(); g_vertex_manager.reset(); g_texture_cache.reset(); diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 0960bbd5f7..1bf8b8e10e 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -1309,6 +1309,11 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, co { drawRc = flipped_trc; sourceRc.right -= fbStride - fbWidth; + + // RealXFB doesn't call ConvertEFBRectangle for sourceRc, therefore it is still assuming a top-left origin. + // The top offset is always zero (see FramebufferManagerBase::GetRealXFBSource). + sourceRc.top = sourceRc.bottom; + sourceRc.bottom = 0; } else { diff --git a/Source/Core/VideoCommon/FramebufferManagerBase.cpp b/Source/Core/VideoCommon/FramebufferManagerBase.cpp index c5597dddba..ab095f1020 100644 --- a/Source/Core/VideoCommon/FramebufferManagerBase.cpp +++ b/Source/Core/VideoCommon/FramebufferManagerBase.cpp @@ -67,12 +67,10 @@ const XFBSourceBase* const* FramebufferManagerBase::GetRealXFBSource(u32 xfbAddr m_realXFBSource->texWidth = fbWidth; m_realXFBSource->texHeight = fbHeight; - // OpenGL texture coordinates originate at the lower left, which is why - // sourceRc.top = fbHeight and sourceRc.bottom = 0. m_realXFBSource->sourceRc.left = 0; - m_realXFBSource->sourceRc.top = fbHeight; + m_realXFBSource->sourceRc.top = 0; m_realXFBSource->sourceRc.right = fbWidth; - m_realXFBSource->sourceRc.bottom = 0; + m_realXFBSource->sourceRc.bottom = fbHeight; // Decode YUYV data from GameCube RAM m_realXFBSource->DecodeToTexture(xfbAddr, fbWidth, fbHeight);