From 214aea1aeac5de10b285c288d0e8f01b3a19ba25 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 24 Dec 2016 17:34:33 +1000 Subject: [PATCH] DSPHWInterface: Use SSSE3 function targeting --- Source/Core/Core/DSP/DSPHWInterface.cpp | 46 ++++++++++++++++--------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/Source/Core/Core/DSP/DSPHWInterface.cpp b/Source/Core/Core/DSP/DSPHWInterface.cpp index 7620f3bee9..5e19da22db 100644 --- a/Source/Core/Core/DSP/DSPHWInterface.cpp +++ b/Source/Core/Core/DSP/DSPHWInterface.cpp @@ -252,25 +252,41 @@ static const u8* gdsp_idma_out(u16 dsp_addr, u32 addr, u32 size) return nullptr; } -#if _M_SSE >= 0x301 +#if defined(_M_X86) || defined(_M_X86_64) static const __m128i s_mask = _mm_set_epi32(0x0E0F0C0DL, 0x0A0B0809L, 0x06070405L, 0x02030001L); + +FUNCTION_TARGET_SSSE3 +static void gdsp_ddma_in_SSSE3(u16 dsp_addr, u32 addr, u32 size, u8* dst) +{ + for (u32 i = 0; i < size; i += 16) + { + _mm_storeu_si128( + (__m128i*)&dst[dsp_addr + i], + _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF]), + s_mask)); + } +} + +FUNCTION_TARGET_SSSE3 +static void gdsp_ddma_out_SSSE3(u16 dsp_addr, u32 addr, u32 size, const u8* src) +{ + for (u32 i = 0; i < size; i += 16) + { + _mm_storeu_si128((__m128i*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF], + _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)&src[dsp_addr + i]), s_mask)); + } +} #endif // TODO: These should eat clock cycles. static const u8* gdsp_ddma_in(u16 dsp_addr, u32 addr, u32 size) { - u8* dst = ((u8*)g_dsp.dram); + u8* dst = reinterpret_cast(g_dsp.dram); -#if _M_SSE >= 0x301 +#if defined(_M_X86) || defined(_M_X86_64) if (cpu_info.bSSSE3 && !(size % 16)) { - for (u32 i = 0; i < size; i += 16) - { - _mm_storeu_si128( - (__m128i*)&dst[dsp_addr + i], - _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF]), - s_mask)); - } + gdsp_ddma_in_SSSE3(dsp_addr, addr, size, dst); } else #endif @@ -289,16 +305,12 @@ static const u8* gdsp_ddma_in(u16 dsp_addr, u32 addr, u32 size) static const u8* gdsp_ddma_out(u16 dsp_addr, u32 addr, u32 size) { - const u8* src = ((const u8*)g_dsp.dram); + const u8* src = reinterpret_cast(g_dsp.dram); -#if _M_SSE >= 0x301 +#ifdef _M_X86 if (cpu_info.bSSSE3 && !(size % 16)) { - for (u32 i = 0; i < size; i += 16) - { - _mm_storeu_si128((__m128i*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF], - _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)&src[dsp_addr + i]), s_mask)); - } + gdsp_ddma_out_SSSE3(dsp_addr, addr, size, src); } else #endif