dolphin/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp
Lioncash 8da425b008 Formatting cleanup for VideoCommon.
Block braces on new lines.

Also killed off trailing whitespace and dangling elses.

Spaced some things out to make them more readable (only in places where it looked like a bit of a clusterfuck).
2013-04-24 09:21:54 -04:00

196 lines
5.2 KiB
C++

// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include <limits>
#include "Common.h"
#include "VideoCommon.h"
#include "VertexLoader.h"
#include "VertexLoader_Position.h"
#include "VertexManagerBase.h"
#include "CPUDetect.h"
extern float posScale;
extern TVtxAttr *pVtxAttr;
// Thoughts on the implementation of a vertex loader compiler.
// s_pCurBufferPointer should definitely be in a register.
// Could load the position scale factor in XMM7, for example.
// The pointer inside DataReadU8 in another.
// Let's check out Pos_ReadDirect_UByte(). For Byte, replace MOVZX with MOVSX.
/*
MOVZX(32, R(EAX), MOffset(ESI, 0));
MOVZX(32, R(EBX), MOffset(ESI, 1));
MOVZX(32, R(ECX), MOffset(ESI, 2));
MOVD(XMM0, R(EAX));
MOVD(XMM1, R(EBX));
MOVD(XMM2, R(ECX));
CVTDQ2PS(XMM0, XMM0);
CVTDQ2PS(XMM1, XMM1);
CVTDQ2PS(XMM2, XMM2);
MULSS(XMM0, XMM7);
MULSS(XMM1, XMM7);
MULSS(XMM2, XMM7);
MOVSS(MOffset(EDI, 0), XMM0);
MOVSS(MOffset(EDI, 4), XMM1);
MOVSS(MOffset(EDI, 8), XMM2);
Alternatively, lookup table:
MOVZX(32, R(EAX), MOffset(ESI, 0));
MOVZX(32, R(EBX), MOffset(ESI, 1));
MOVZX(32, R(ECX), MOffset(ESI, 2));
MOV(32, R(EAX), MComplex(LUTREG, EAX, 4));
MOV(32, R(EBX), MComplex(LUTREG, EBX, 4));
MOV(32, R(ECX), MComplex(LUTREG, ECX, 4));
MOV(MOffset(EDI, 0), XMM0);
MOV(MOffset(EDI, 4), XMM1);
MOV(MOffset(EDI, 8), XMM2);
SSE4:
PINSRB(XMM0, MOffset(ESI, 0), 0);
PINSRB(XMM0, MOffset(ESI, 1), 4);
PINSRB(XMM0, MOffset(ESI, 2), 8);
CVTDQ2PS(XMM0, XMM0);
<two unpacks here to sign extend>
MULPS(XMM0, XMM7);
MOVUPS(MOffset(EDI, 0), XMM0);
*/
template <typename T>
float PosScale(T val)
{
return val * posScale;
}
template <>
float PosScale(float val)
{
return val;
}
template <typename T, int N>
void LOADERDECL Pos_ReadDirect()
{
static_assert(N <= 3, "N > 3 is not sane!");
for (int i = 0; i < 3; ++i)
DataWrite(i<N ? PosScale(DataRead<T>()) : 0.f);
LOG_VTX();
}
template <typename I, typename T, int N>
void LOADERDECL Pos_ReadIndex()
{
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
static_assert(N <= 3, "N > 3 is not sane!");
auto const index = DataRead<I>();
if (index < std::numeric_limits<I>::max())
{
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
for (int i = 0; i < 3; ++i)
DataWrite(i<N ? PosScale(Common::FromBigEndian(data[i])) : 0.f);
LOG_VTX();
}
}
#if _M_SSE >= 0x301
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
template <typename I, bool three>
void LOADERDECL Pos_ReadIndex_Float_SSSE3()
{
auto const index = DataRead<I>();
if (index < std::numeric_limits<I>::max())
{
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData));
GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2));
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
VertexManager::s_pCurBufferPointer += sizeof(float) * 3;
LOG_VTX();
}
}
#endif
static TPipelineFunction tableReadPosition[4][8][2] = {
{
{NULL, NULL,},
{NULL, NULL,},
{NULL, NULL,},
{NULL, NULL,},
{NULL, NULL,},
},
{
{Pos_ReadDirect<u8, 2>, Pos_ReadDirect<u8, 3>,},
{Pos_ReadDirect<s8, 2>, Pos_ReadDirect<s8, 3>,},
{Pos_ReadDirect<u16, 2>, Pos_ReadDirect<u16, 3>,},
{Pos_ReadDirect<s16, 2>, Pos_ReadDirect<s16, 3>,},
{Pos_ReadDirect<float, 2>, Pos_ReadDirect<float, 3>,},
},
{
{Pos_ReadIndex<u8, u8, 2>, Pos_ReadIndex<u8, u8, 3>,},
{Pos_ReadIndex<u8, s8, 2>, Pos_ReadIndex<u8, s8, 3>,},
{Pos_ReadIndex<u8, u16, 2>, Pos_ReadIndex<u8, u16, 3>,},
{Pos_ReadIndex<u8, s16, 2>, Pos_ReadIndex<u8, s16, 3>,},
{Pos_ReadIndex<u8, float, 2>, Pos_ReadIndex<u8, float, 3>,},
},
{
{Pos_ReadIndex<u16, u8, 2>, Pos_ReadIndex<u16, u8, 3>,},
{Pos_ReadIndex<u16, s8, 2>, Pos_ReadIndex<u16, s8, 3>,},
{Pos_ReadIndex<u16, u16, 2>, Pos_ReadIndex<u16, u16, 3>,},
{Pos_ReadIndex<u16, s16, 2>, Pos_ReadIndex<u16, s16, 3>,},
{Pos_ReadIndex<u16, float, 2>, Pos_ReadIndex<u16, float, 3>,},
},
};
static int tableReadPositionVertexSize[4][8][2] = {
{
{0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,},
},
{
{2, 3,}, {2, 3,}, {4, 6,}, {4, 6,}, {8, 12,},
},
{
{1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,},
},
{
{2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,},
},
};
void VertexLoader_Position::Init(void)
{
#if _M_SSE >= 0x301
if (cpu_info.bSSSE3)
{
tableReadPosition[2][4][0] = Pos_ReadIndex_Float_SSSE3<u8, false>;
tableReadPosition[2][4][1] = Pos_ReadIndex_Float_SSSE3<u8, true>;
tableReadPosition[3][4][0] = Pos_ReadIndex_Float_SSSE3<u16, false>;
tableReadPosition[3][4][1] = Pos_ReadIndex_Float_SSSE3<u16, true>;
}
#endif
}
unsigned int VertexLoader_Position::GetSize(unsigned int _type, unsigned int _format, unsigned int _elements)
{
return tableReadPositionVertexSize[_type][_format][_elements];
}
TPipelineFunction VertexLoader_Position::GetFunction(unsigned int _type, unsigned int _format, unsigned int _elements)
{
return tableReadPosition[_type][_format][_elements];
}