Added OpenCL texture decoding to RGBA usable by DX11 for formats RGB565, RGBA8, RGB5A3 and CMPR

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5765 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Orphis 2010-06-22 03:10:43 +00:00
parent c2e32371f6
commit 2cc5b98f07
3 changed files with 118 additions and 3 deletions

View file

@ -99,6 +99,25 @@ kernel void DecodeRGBA8(global uchar *dst,
}
}
kernel void DecodeRGBA8_RGBA(global uchar *dst,
const global uchar *src, int width)
{
int x = get_global_id(0) * 4, y = get_global_id(1) * 4;
int srcOffset = (x * 2) + (y * width) / 2;
for (int iy = 0; iy < 4; iy++)
{
uchar8 ar = vload8(srcOffset, src);
uchar8 gb = vload8(srcOffset + 4, src);
uchar16 res;
res.even.even = ar.odd;
res.even.odd = gb.odd;
res.odd.even = gb.even;
res.odd.odd = ar.even;
vstore16(res, 0, dst + ((y + iy)*width + x) * 4);
srcOffset++;
}
}
kernel void DecodeRGB565(global ushort *dst,
const global uchar *src, int width)
{
@ -111,6 +130,25 @@ kernel void DecodeRGB565(global ushort *dst,
}
}
kernel void DecodeRGB565_RGBA(global uchar *dst,
const global uchar *src, int width)
{
int x = get_global_id(0) * 4, y = get_global_id(1) * 4;
int srcOffset = x + (y * width) / 4;
for (int iy = 0; iy < 4; iy++)
{
uchar8 val = vload8(srcOffset++, src);
uchar16 res;
res.even.even = bitselect(val.even, val.even >> (uchar4)5, (uchar4)7);
res.odd.even = bitselect((val.odd >> (uchar4)3) | (val.even << (uchar4)5), val.even >> (uchar4)1, (uchar4)3);
res.even.odd = bitselect(val.odd << (uchar4)3, val.odd >> (uchar4)2, (uchar4)7);
res.odd.odd = (uchar4)0xFF;
vstore16(res, 0, dst + ((y + iy)*width + x) * 4);
}
}
kernel void DecodeRGB5A3(global uchar *dst,
const global uchar *src, int width)
{
@ -141,6 +179,36 @@ kernel void DecodeRGB5A3(global uchar *dst,
iterateRGB5A3();
}
kernel void DecodeRGB5A3_RGBA(global uchar *dst,
const global uchar *src, int width)
{
int x = get_global_id(0) * 4, y = get_global_id(1) * 4;
int srcOffset = x + (y * width) / 4;
uchar8 val;
uchar16 resNoAlpha, resAlpha, res, choice;
#define iterateRGB5A3() \
val = vload8(srcOffset++, src); \
resNoAlpha.s048C = val.even << (uchar4)1; \
resNoAlpha.s159D = val.even << (uchar4)6 | val.odd >> (uchar4)2; \
resNoAlpha.s26AE = val.odd << (uchar4)3; \
resNoAlpha = bitselect(resNoAlpha, resNoAlpha >> (uchar16)5, (uchar16)0x3); \
resNoAlpha.s37BF = (uchar4)(0xFF); \
resAlpha.s048C = bitselect(val.even << (uchar4)4, val.even, (uchar4)0xF); \
resAlpha.s159D = bitselect(val.odd, val.odd >> (uchar4)4, (uchar4)0xF); \
resAlpha.s26AE = bitselect(val.odd << (uchar4)4, val.odd, (uchar4)0xF); \
resAlpha.s37BF = bitselect(val.even << (uchar4)1, val.even >> (uchar4)2, (uchar4)0x1C); \
resAlpha.s37BF = bitselect(resAlpha.s37BF, val.even >> (uchar4)5, (uchar4)0x3); \
choice = (uchar16)((uchar4)(val.even.s0), \
(uchar4)(val.even.s1), \
(uchar4)(val.even.s2), \
(uchar4)(val.even.s3)); \
vstore16(select(resAlpha, resNoAlpha, choice), 0, dst + (y * width + x) * 4);
iterateRGB5A3(); dst += width*4;
iterateRGB5A3(); dst += width*4;
iterateRGB5A3(); dst += width*4;
iterateRGB5A3();
}
uint16 unpack(uchar b)
{
return (uint16)((uint4)(b >> 3 & 0x18),
@ -193,3 +261,48 @@ kernel void DecodeCMPR(global uchar *dst,
decodeCMPRBlock(dst + 16 * width, src, width); src += 8;
decodeCMPRBlock(dst + 16 * (width + 1), src, width);
}
kernel void decodeCMPRBlock_RGBA(global uchar *dst,
const global uchar *src, int width)
{
int x = get_global_id(0) * 4, y = get_global_id(1) * 4;
uchar8 val = vload8(0, src);
uchar2 colora565 = (uchar2)(val.s1, val.s3);
uchar2 colorb565 = (uchar2)(val.s0, val.s2);
uchar8 color32 = (uchar8)(bitselect(colorb565, colorb565 >> (uchar2)5, (uchar2)7),
bitselect((colora565 >> (uchar2)3) | (colorb565 << (uchar2)5), colorb565 >> (uchar2)1, (uchar2)3),
bitselect(colora565 << (uchar2)3, colora565 >> (uchar2)2, (uchar2)7),
(uchar2)0xFF);
ushort4 frac2 = convert_ushort4(color32.even & (uchar4)0xFF) - convert_ushort4(color32.odd & (uchar4)0xFF);
uchar4 frac = convert_uchar4((frac2 * (ushort4)3) / (ushort4)8);
ushort4 colorAlpha = upsample((uchar4)0, rhadd(color32.odd, color32.even));
colorAlpha.s3 = 0xFF;
ushort4 colorNoAlpha = upsample(color32.odd + frac, color32.even - frac);
uint4 colors = upsample((upsample(val.s0,val.s1) > upsample(val.s2,val.s3))?colorNoAlpha:colorAlpha,
upsample(color32.odd, color32.even));
uint16 colorsFull = (uint16)(colors, colors, colors, colors);
vstore16(convert_uchar16(colorsFull >> unpack(val.s4)), 0, dst);
vstore16(convert_uchar16(colorsFull >> unpack(val.s5)), 0, dst+=width*4);
vstore16(convert_uchar16(colorsFull >> unpack(val.s6)), 0, dst+=width*4);
vstore16(convert_uchar16(colorsFull >> unpack(val.s7)), 0, dst+=width*4);
}
kernel void DecodeCMPR_RGBA(global uchar *dst,
const global uchar *src, int width)
{
int x = get_global_id(0) * 8, y = get_global_id(1) * 8;
src += x * 4 + (y * width) / 2;
dst += (y * width + x) * 4;
decodeCMPRBlock_RGBA(dst, src, width); src += 8;
decodeCMPRBlock_RGBA(dst + 16, src, width); src += 8;
decodeCMPRBlock_RGBA(dst + 16 * width, src, width); src += 8;
decodeCMPRBlock_RGBA(dst + 16 * (width + 1), src, width);
}

View file

@ -163,7 +163,9 @@ cl_kernel CompileKernel(cl_program program, const char *Function)
cl_kernel kernel = clCreateKernel(program, Function, &err);
if (!kernel || err != CL_SUCCESS)
{
HandleCLError(err, "Failed to create compute kernel!");
char buffer[1024];
sprintf(buffer, "Failed to create compute kernel '%s' !", Function);
HandleCLError(err, buffer);
return NULL;
}
NOTICE_LOG(COMMON, "OpenCL CompileKernel took %.3f seconds", (float)(Common::Timer::GetTimeMs() - compileStart) / 1000.0);

View file

@ -102,11 +102,11 @@ void TexDecoder_OpenCL_Initialize() {
g_program = OpenCL::CompileProgram(code.c_str());
int i = 0;
for(int i = 0; i < GX_TF_CMPR; ++i) {
for(int i = 0; i <= GX_TF_CMPR; ++i) {
if(g_DecodeParametersNative[i].name)
g_DecodeParametersNative[i].kernel = OpenCL::CompileKernel(g_program, g_DecodeParametersNative[i].name);
if(false && g_DecodeParametersRGBA[i].name)
if(g_DecodeParametersRGBA[i].name)
g_DecodeParametersRGBA[i].kernel = OpenCL::CompileKernel(g_program, g_DecodeParametersRGBA[i].name);
}