// Copyright (C) 2003-2008 Dolphin Project.

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License 2.0 for more details.

// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/

// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/

#include "PixelShader.h"
#include "BPStructs.h"
#include "XFStructs.h"

/*
   old tev->pixelshader notes

   color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0
   konstant for this stage (alpha, color) is given by bpmem.tevksel   
   inputs are given by bpmem.combiners[0].colorC.a/b/c/d     << could be current chan color
   according to GXTevColorArg table above
   output is given by .outreg
   tevtemp is set according to swapmodetables and 
*/

const float epsilon = 1.0f/255.0f;

const char *tevKSelTableC[] =
{
	"1,1,1",            //KCSEL_1   = 0x00
	"0.875,0.875,0.875",//KCSEL_7_8 = 0x01
	"0.75,0.75,0.75",	//KCSEL_3_4 = 0x02
	"0.625,0.625,0.625",//KCSEL_5_8 = 0x03
	"0.5,0.5,0.5",      //KCSEL_1_2 = 0x04
	"0.375,0.375,0.375",//KCSEL_3_8 = 0x05
	"0.25,0.25,0.25",   //KCSEL_1_4 = 0x06
	"0.125,0.125,0.125",//KCSEL_1_8 = 0x07
	"ERROR", //0x08
	"ERROR", //0x09
	"ERROR", //0x0a
	"ERROR", //0x0b
	"k0.rgb",//KCSEL_K0 = 0x0C
	"k1.rgb",//KCSEL_K1 = 0x0D
	"k2.rgb",//KCSEL_K2 = 0x0E
	"k3.rgb",//KCSEL_K3 = 0x0F
	"k0.rrr",//KCSEL_K0_R = 0x10
	"k1.rrr",//KCSEL_K1_R = 0x11
	"k2.rrr",//KCSEL_K2_R = 0x12
	"k3.rrr",//KCSEL_K3_R = 0x13
	"k0.ggg",//KCSEL_K0_G = 0x14 
	"k1.ggg",//KCSEL_K1_G = 0x15
	"k2.ggg",//KCSEL_K2_G = 0x16
	"k3.ggg",//KCSEL_K3_G = 0x17
	"k0.bbb",//KCSEL_K0_B = 0x18
	"k1.bbb",//KCSEL_K1_B = 0x19
	"k2.bbb",//KCSEL_K2_B = 0x1A
	"k3.bbb",//KCSEL_K3_B = 0x1B
	"k0.aaa",//KCSEL_K0_A = 0x1C
	"k1.aaa",//KCSEL_K1_A = 0x1D
	"k2.aaa",//KCSEL_K2_A = 0x1E
	"k3.aaa",//KCSEL_K3_A = 0x1F
};
const char *tevKSelTableA[] =
{
	"1",    //KASEL_1   = 0x00
	"0.875",//KASEL_7_8 = 0x01
	"0.75",	//KASEL_3_4 = 0x02
	"0.625",//KASEL_5_8 = 0x03
	"0.5",  //KASEL_1_2 = 0x04
	"0.375",//KASEL_3_8 = 0x05
	"0.25", //KASEL_1_4 = 0x06
	"0.125",//KASEL_1_8 = 0x07
	"ERROR",//0x08
	"ERROR",//0x09
	"ERROR",//0x0a
	"ERROR",//0x0b
	"ERROR",//0x0c
	"ERROR",//0x0d
	"ERROR",//0x0e
	"ERROR",//0x0f
	"k0.r", //KASEL_K0_R = 0x10
	"k1.r", //KASEL_K1_R = 0x11
	"k2.r", //KASEL_K2_R = 0x12
	"k3.r", //KASEL_K3_R = 0x13
	"k0.g", //KASEL_K0_G = 0x14
	"k1.g", //KASEL_K1_G = 0x15
	"k2.g", //KASEL_K2_G = 0x16
	"k3.g", //KASEL_K3_G = 0x17
	"k0.b", //KASEL_K0_B = 0x18
	"k1.b", //KASEL_K1_B = 0x19
	"k2.b", //KASEL_K2_B = 0x1A
	"k3.b", //KASEL_K3_B = 0x1B
	"k0.a", //KASEL_K0_A = 0x1C
	"k1.a", //KASEL_K1_A = 0x1D
	"k2.a", //KASEL_K2_A = 0x1E
	"k3.a", //KASEL_K3_A = 0x1F
};

const char *tevScaleTable[] = 
{
	"1",   //SCALE_1
	"2",   //SCALE_2
	"4",   //SCALE_4
	"0.5", //DIVIDE_2
};

const char *tevBiasTable[] = 
{
	"",      //ZERO,
    "+0.5",  //ADD_HALF,
    "-0.5",  //SUB_HALF,
	"",      //WTF? seen in shadow2
};

const char *tevOpTable[] = 
{
	"+",      //ADD = 0,
    "-",      //SUB = 1,
};

const char *tevCompOpTable[] = 
{
	">",
	"==",
};


#define TEV_COMP_R8    0
#define TEV_COMP_GR16  1
#define TEV_COMP_BGR24 2
#define TEV_COMP_RGB8  3

const char *tevCInputTable[] = 
{
	"prev.rgb",           //CPREV,
    "prev.aaa",           //APREV,
    "c0.rgb",             //C0,
    "c0.aaa",             //A0,
    "c1.rgb",             //C1,
    "c1.aaa",             //A1,
    "c2.rgb",             //C2,
    "c2.aaa",             //A2,
    "textemp.rgb",        //TEXC,
    "textemp.aaa",        //TEXA,
    "rastemp.rgb",        //RASC,
    "rastemp.aaa",        //RASA,
    "float3(1,1,1)",      //ONE,
    "float3(.5,.5,.5)",   //HALF,
    "konsttemp.rgb",      //KONST,
    "float3(0,0,0)",      //ZERO
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
};
const char *tevCInputTable2[] = 
{
	"prev",                 //CPREV,
	"(prev.aaa)",           //APREV,
	"c0",                   //C0,
	"(c0.aaa)",             //A0,
	"c1",                   //C1,
	"(c1.aaa)",             //A1,
	"c2",                   //C2,
	"(c2.aaa)",             //A2,
	"textemp",              //TEXC,
	"(textemp.aaa)",        //TEXA,
	"rastemp",              //RASC,
	"(rastemp.aaa)",        //RASA,
	"float3(1,1,1)",        //ONE,
	"float3(.5,.5,.5)",     //HALF,
	"konsttemp",            //KONST,
	"float3(0,0,0)",        //ZERO
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
};

const char *tevAInputTable[] = 
{
    "prev.a",           //APREV,
    "c0.a",             //A0,
    "c1.a",             //A1,
    "c2.a",             //A2,
    "textemp.a",        //TEXA,
    "rastemp.a",        //RASA,
    "konsttemp.a",      //KONST,  (hw1 had quarter)
    "0.0",				//ZERO
	"PADERROR", "PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR", "PADERROR",
};

const char *tevAInputTable1[] = 
{
	"prev.r",           //APREV,
	"c0.r",             //A0,
	"c1.r",             //A1,
	"c2.r",             //A2,
	"textemp.r",        //TEXA,
	"rastemp.r",        //RASA,
	"konsttemp.r",      //KONST,  (hw1 had quarter)
	"0.0",				//ZERO
	"PADERROR", "PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR", "PADERROR",
};	

const char *tevAInputTable2[] = 
{
    "prev",           //APREV,
    "c0",             //A0,
    "c1",             //A1,
    "c2",             //A2,
    "textemp",        //TEXA,
    "rastemp",        //RASA,
    "konsttemp",      //KONST,  (hw1 had quarter)
    "float4(0,0,0,0)",//ZERO
	"PADERROR", "PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR",	"PADERROR",
	"PADERROR",	"PADERROR",	"PADERROR", "PADERROR",
};	

const char *tevRasTable[] = 
{
	"colors[0]",//RAS1_CC_0	0x00000000 /* color channel 0 */
	"colors[1]",//RAS1_CC_1	0x00000001 /* color channel 1 */
	"ERROR", //2
	"ERROR", //3
	"ERROR", //4
	"alphabump", //RAS1_CC_B	0x00000005 /* indirect texture bump alpha */ //green cuz unsupported
	"(alphabump*(255.0f/248.0f))", //RAS1_CC_BN	0x00000006 /* ind tex bump alpha, normalized 0-255 *///green cuz unsupported
	"float4(0,0,0,0)", //RAS1_CC_Z	0x00000007 /* set color value to zero */
};

const char *tevCOutputTable[]  = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
const char *tevAOutputTable[]  = { "prev.a", "c0.a", "c1.a", "c2.a" };
const char *tevIndAlphaSel[]   = {"", "x", "y", "z"};
const char *tevIndAlphaScale[] = {"", "*32","*16","*8"};
const char *tevIndBiasField[]  = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
const char *tevIndBiasAdd[]    = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt
const char *tevIndWrapStart[]  = {"0", "256", "128", "64", "32", "16", "0.001" };
const char *tevIndFmtScale[]   = {"255.0f", "31.0f", "15.0f", "8.0f" };

const char *tevTexFuncs[] = 
{
	"tex2D",
	"tex2Dproj"
};

const char *alphaRef[2] = 
{
	"alphaRef.x",
	"alphaRef.y"
};


char text[65536];
#define WRITE p+=sprintf


void WriteStage(char *&p, int n);
void WriteAlphaTest(char *&p);


char *swapColors = "rgba";
char swapModeTable[4][5];

void BuildSwapModeTable()
{
	for (int i = 0; i < 4; i++)
	{
		swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1];
		swapModeTable[i][1] = swapColors[bpmem.tevksel[i*2].swap2];
		swapModeTable[i][2] = swapColors[bpmem.tevksel[i*2+1].swap1];
		swapModeTable[i][3] = swapColors[bpmem.tevksel[i*2+1].swap2];
		swapModeTable[i][4] = 0;
	}
}

const char *GeneratePixelShader()
{
	BuildSwapModeTable();
	int numStages = bpmem.genMode.numtevstages + 1;
	int numTexgen = bpmem.genMode.numtexgens;
	int numSamplers = 8;

	char *p = text;
	WRITE(p,"//Pixel Shader for TEV stages\n\
//%i TEV stages, %i texgens, %i IND stages, %i COL channels\n",
		bpmem.genMode.numtevstages,bpmem.genMode.numtexgens,bpmem.genMode.numindstages,bpmem.genMode.numcolchans);

	//write kcolor declarations
	for (int i = 0; i < 4; i++) {
		if(i < 3) {
			WRITE(p,"float4 k%i : register(c%i);\n\
                     float4 color%i : register(c%i);\n",i,PS_CONST_KCOLORS+i, i,PS_CONST_COLORS+i+1);
		} else {
			WRITE(p,"float4 k%i : register(c%i);\n",i,PS_CONST_KCOLORS+i);
		}
	}

	WRITE(p,"float constalpha : register(c%i);\n\
float2 alphaRef : register(c%i);\n\n\
sampler samp[%i] : register(s0);\n\n\
float4 main(in float4 colors[2] : COLOR0",PS_CONST_CONSTALPHA,PS_CONST_ALPHAREF,numSamplers);

	if (numTexgen)
		WRITE(p,", float4 uv[%i] : TEXCOORD0",numTexgen);
	else
		WRITE(p,", float4 uv[1] : TEXCOORD0"); //HACK
	WRITE(p,") : COLOR\n\
{\n\
float4 c0=color0,c1=color1,c2=color2,prev=float4(0.0f,0.0f,0.0f,0.0f),textemp,rastemp,konsttemp;\n\
float3 comp16 = float3(1,255,0), comp24 = float3(1,255,255*255);\n\
\n");

	for (int i = 0; i < numStages; i++)
		WriteStage(p,i); //build the equation for this stage

	WriteAlphaTest(p);
	
	/* see GL shader generator - this is Donko's hack
	if (bpmem.dstalpha.enable)
		WRITE(p,"  return float4(prev.rgb,constalpha.x);\n");
	else
	*/
		WRITE(p,"  return prev;\n");

	WRITE(p,"}\n\0");
	
	return text;
}

void WriteStage(char *&p, int n)
{
	const char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
	const char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];

	int texfun = xfregs.texcoords[n].texmtxinfo.projection;

	WRITE(p,"rastemp=%s.%s;\n",tevRasTable[bpmem.tevorders[n/2].getColorChan(n&1)],rasswap);
	if (bpmem.tevorders[n/2].getEnable(n&1))
		WRITE(p,"textemp=%s(samp[%i],uv[%i]).%s;\n",
			tevTexFuncs[texfun],
			bpmem.tevorders[n/2].getTexMap(n&1),
			bpmem.tevorders[n/2].getTexCoord(n&1),texswap);
	else
		WRITE(p,"textemp=float4(1,1,1,1);\n");

	int kc = bpmem.tevksel[n/2].getKC(n&1);
	int ka = bpmem.tevksel[n/2].getKA(n&1);

	WRITE(p,"konsttemp=float4(%s,%s);\n",tevKSelTableC[kc],tevKSelTableA[ka]);

	TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
	TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC;

	WRITE(p,"float4(%s,%s)=", tevCOutputTable[cc.dest], tevAOutputTable[ac.dest]); 

	//////////////////////////////////////////////////////////////////////////
	//start of color
	//////////////////////////////////////////////////////////////////////////
	WRITE(p,"float4(\n"); 

	if (cc.bias != TB_COMPARE)
	{
		//normal color combiner goes here
		WRITE(p,"   %s*(%s%s",tevScaleTable[cc.shift],tevCInputTable[cc.d],tevOpTable[cc.op]);
		WRITE(p,"(lerp(%s,%s,%s)%s)),\n",
			tevCInputTable[cc.a],tevCInputTable[cc.b],
			tevCInputTable[cc.c],tevBiasTable[cc.bias]);
	}
	else
	{
		//compare color combiner goes here
		switch(cc.shift)  // yep comparemode stored here :P
		{
		case TEV_COMP_R8:
			if (cc.op == 0)  //equality check needs tolerance, fp in gpu has drawbacks :(
				WRITE(p,"   %s + ((%s.r > %s.r) ? %s : float3(0,0,0)),\n",
				tevCInputTable[cc.d],tevCInputTable2[cc.a],
				tevCInputTable2[cc.b],tevCInputTable[cc.c]);
			else
				WRITE(p,"   %s + (abs(%s.r - %s.r)<%f ? %s : float3(0,0,0)),\n",
				tevCInputTable[cc.d],tevCInputTable2[cc.a],
				tevCInputTable2[cc.b],epsilon,tevCInputTable[cc.c]);
			break;
		default:
			WRITE(p,"float3(0,0,0),\n");
			break;
		}
	}
	//end of color
	
	//////////////////////////////////////////////////////////////////////////
	//start of alpha
	//////////////////////////////////////////////////////////////////////////
	if (ac.bias != TB_COMPARE)
	{
		//normal alpha combiner goes here
		WRITE(p,"   %s*(%s%s",tevScaleTable[ac.shift],tevAInputTable[ac.d],tevOpTable[ac.op]);
		WRITE(p,"lerp(%s,%s,%s) %s)\n",
			tevAInputTable[ac.a],tevAInputTable[ac.b],
			tevAInputTable[ac.c],tevBiasTable[ac.bias]);
	}
	else
	{
        int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here
		const char **inputTable = NULL;
		inputTable = (cmp == TEVCMP_R8_GT || cmp == TEVCMP_R8_EQ) ? tevAInputTable1 : tevAInputTable;
		//compare alpha combiner goes here
        switch(cmp) {
        case TEVCMP_R8_GT:
        case TEVCMP_A8_GT:
			WRITE(p,"   %s + ((%s > %s) ? %s : 0)\n",
				tevAInputTable[ac.d], inputTable[ac.a], inputTable[ac.b], tevAInputTable[ac.c]);
            break;
        case TEVCMP_R8_EQ:
        case TEVCMP_A8_EQ:
            WRITE(p,"   %s + (abs(%s - %s)<%f ? %s : 0)\n",
                tevAInputTable[ac.d], inputTable[ac.a], inputTable[ac.b],epsilon,tevAInputTable[ac.c]);
            break;
        
        case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte)
        case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r
            WRITE(p,"   %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : 0)\n",
                tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b], cmp==TEVCMP_GR16_GT?"16":"24", tevAInputTable[ac.c]);
            break;
        case TEVCMP_GR16_EQ:
        case TEVCMP_BGR24_EQ:
            WRITE(p,"   %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : 0)\n",
                tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],cmp==TEVCMP_GR16_EQ?"16":"24",epsilon,tevAInputTable[ac.c]);
            break;
        default:
            WRITE(p,"0)\n");
            break;
        }
	}
	WRITE(p, ");");
    if (ac.clamp)
        WRITE(p, "%s = clamp(%s, 0.0f, 1.0f);\n", tevAOutputTable[ac.dest], tevAOutputTable[ac.dest]);
	WRITE(p, "\n");
}

void WriteAlphaCompare(char *&p, int num, int comp)
{
	WRITE(p,"  res%i = ",num);
	switch(comp) {
	case ALPHACMP_ALWAYS:  WRITE(p,"0;\n");	break;
	case ALPHACMP_NEVER:   WRITE(p,"1;\n");	break;
	case ALPHACMP_LEQUAL:  WRITE(p,"prev.a - %s.x;\n",alphaRef[num]);	break;
	case ALPHACMP_LESS:    WRITE(p,"prev.a - %s.x + %f;\n",alphaRef[num],epsilon*2);break;
	case ALPHACMP_GEQUAL:  WRITE(p,"%s - prev.a;\n",alphaRef[num]);	break;
	case ALPHACMP_GREATER: WRITE(p,"%s - prev.a + %f;\n",alphaRef[num],epsilon*2);break;
	case ALPHACMP_EQUAL:   WRITE(p,"abs(%s-prev.a)-%f;\n",alphaRef[num],epsilon*2); break;
	case ALPHACMP_NEQUAL:  WRITE(p,"%f-abs(%s-prev.a);\n",epsilon*2,alphaRef[num]); break;
	}
}

void WriteAlphaTest(char *&p)
{
	AlphaOp op = (AlphaOp)bpmem.alphaFunc.logic;
	Compare comp[2] = {(Compare)bpmem.alphaFunc.comp0,(Compare)bpmem.alphaFunc.comp1};

	//first kill all the simple cases
	if (op == ALPHAOP_AND && (comp[0] == COMPARE_ALWAYS && comp[1] == COMPARE_ALWAYS)) return;
	if (op == ALPHAOP_OR  && (comp[0] == COMPARE_ALWAYS || comp[1] == COMPARE_ALWAYS)) return;
	for (int i = 0; i < 2; i++)
	{
		int one = i;
		int other = 1-i;
		switch(op) {
		case ALPHAOP_XOR:
			if (comp[one] == COMPARE_ALWAYS && comp[other] == COMPARE_NEVER) return;
			break;
		case ALPHAOP_XNOR:
			if (comp[one] == COMPARE_ALWAYS && comp[other] == COMPARE_ALWAYS) return;
			if (comp[one] == COMPARE_ALWAYS && comp[other] == COMPARE_NEVER) return;
			break;
		}
	}

	//Ok, didn't get to do the easy way out :P
	// do the general way
	WRITE(p,"float res0, res1;\n");
	WriteAlphaCompare(p, 0, bpmem.alphaFunc.comp0);
	WriteAlphaCompare(p, 1, bpmem.alphaFunc.comp1);
	WRITE(p,"res0 = max(res0, 0);\n");
	WRITE(p,"res1 = max(res1, 0);\n");
	//probably should use lookup textures for some of these :P
	switch(bpmem.alphaFunc.logic) {
	case ALPHAOP_AND: // if both are 0
		WRITE(p,"clip(-(res0+res1)+%f);\n",epsilon);
		break;
	case ALPHAOP_OR: //if either is 0
		WRITE(p,"clip(-res0*res1+%f);\n",epsilon*epsilon);
		break;
	case ALPHAOP_XOR:
		//hmm, this might work:
		WRITE(p,"res0=(res0>0?1:0)-.5;\n");
		WRITE(p,"res1=(res1>0?1:0)-.5;\n");
		WRITE(p,"clip(-res0*res1);\n",epsilon);
		break;
	case ALPHAOP_XNOR:
		WRITE(p,"res0=(res0>0?1:0)-.5;\n");
		WRITE(p,"res1=(res1>0?1:0)-.5;\n");
		WRITE(p,"clip(res0*res1);\n",epsilon);
		break;
	}
}