GX2: Rework GX2Set*UniformReg

- Use cafeExportRegister() instead of legacy export
- Submit as a single PM4 packet
- Add logging for the special case of the size parameter (not sure if this is used by any game?)
- Add some extra validation and logging which may be helpful to homebrew devs
This commit is contained in:
Exzap 2024-03-10 01:21:04 +01:00
parent 3d0d987d89
commit 0993658c82
5 changed files with 34 additions and 42 deletions

View file

@ -396,16 +396,13 @@ void gx2_load()
osLib_addFunction("gx2", "GX2GetCurrentScanBuffer", gx2Export_GX2GetCurrentScanBuffer);
// shader stuff
//osLib_addFunction("gx2", "GX2SetVertexShader", gx2Export_GX2SetVertexShader);
osLib_addFunction("gx2", "GX2SetPixelShader", gx2Export_GX2SetPixelShader);
osLib_addFunction("gx2", "GX2SetGeometryShader", gx2Export_GX2SetGeometryShader);
osLib_addFunction("gx2", "GX2SetComputeShader", gx2Export_GX2SetComputeShader);
osLib_addFunction("gx2", "GX2SetVertexUniformReg", gx2Export_GX2SetVertexUniformReg);
osLib_addFunction("gx2", "GX2SetVertexUniformBlock", gx2Export_GX2SetVertexUniformBlock);
osLib_addFunction("gx2", "GX2RSetVertexUniformBlock", gx2Export_GX2RSetVertexUniformBlock);
osLib_addFunction("gx2", "GX2SetPixelUniformBlock", gx2Export_GX2SetPixelUniformBlock);
osLib_addFunction("gx2", "GX2SetPixelUniformReg", gx2Export_GX2SetPixelUniformReg);
osLib_addFunction("gx2", "GX2SetGeometryUniformBlock", gx2Export_GX2SetGeometryUniformBlock);
osLib_addFunction("gx2", "GX2SetShaderModeEx", gx2Export_GX2SetShaderModeEx);

View file

@ -18,11 +18,9 @@ void gx2_load();
void gx2Export_GX2SetPixelShader(PPCInterpreter_t* hCPU);
void gx2Export_GX2SetGeometryShader(PPCInterpreter_t* hCPU);
void gx2Export_GX2SetComputeShader(PPCInterpreter_t* hCPU);
void gx2Export_GX2SetVertexUniformReg(PPCInterpreter_t* hCPU);
void gx2Export_GX2SetVertexUniformBlock(PPCInterpreter_t* hCPU);
void gx2Export_GX2RSetVertexUniformBlock(PPCInterpreter_t* hCPU);
void gx2Export_GX2SetPixelUniformBlock(PPCInterpreter_t* hCPU);
void gx2Export_GX2SetPixelUniformReg(PPCInterpreter_t* hCPU);
void gx2Export_GX2SetGeometryUniformBlock(PPCInterpreter_t* hCPU);
void gx2Export_GX2SetShaderModeEx(PPCInterpreter_t* hCPU);
void gx2Export_GX2CalcGeometryShaderInputRingBufferSize(PPCInterpreter_t* hCPU);

View file

@ -417,6 +417,37 @@ namespace GX2
}
}
void _GX2SubmitUniformReg(uint32 offsetRegBase, uint32 aluRegisterOffset, uint32be* dataWords, uint32 sizeInU32s)
{
if(aluRegisterOffset&0x8000)
{
cemuLog_logDebug(LogType::Force, "_GX2SubmitUniformReg(): Unhandled loop const special case or invalid offset");
return;
}
if((aluRegisterOffset+sizeInU32s) > 0x400)
{
cemuLog_logOnce(LogType::APIErrors, "GX2SetVertexUniformReg values are out of range (offset {} + size {} must be equal or smaller than 0x400)", aluRegisterOffset, sizeInU32s);
}
if( (sizeInU32s&3) != 0)
{
cemuLog_logOnce(LogType::APIErrors, "GX2Set*UniformReg must be called with a size that is a multiple of 4 (size: {:})", sizeInU32s);
sizeInU32s &= ~3;
}
GX2ReserveCmdSpace(2 + sizeInU32s);
gx2WriteGather_submit(pm4HeaderType3(IT_SET_ALU_CONST, 1 + sizeInU32s), offsetRegBase + aluRegisterOffset);
gx2WriteGather_submitU32AsLEArray((uint32*)dataWords, sizeInU32s);
}
void GX2SetVertexUniformReg(uint32 offset, uint32 sizeInU32s, uint32be* values)
{
_GX2SubmitUniformReg(0x400, offset, values, sizeInU32s);
}
void GX2SetPixelUniformReg(uint32 offset, uint32 sizeInU32s, uint32be* values)
{
_GX2SubmitUniformReg(0, offset, values, sizeInU32s);
}
void GX2ShaderInit()
{
cafeExportRegister("gx2", GX2CalcFetchShaderSizeEx, LogType::GX2);
@ -428,5 +459,8 @@ namespace GX2
cafeExportRegister("gx2", GX2GetPixelShaderStackEntries, LogType::GX2);
cafeExportRegister("gx2", GX2SetFetchShader, LogType::GX2);
cafeExportRegister("gx2", GX2SetVertexShader, LogType::GX2);
cafeExportRegister("gx2", GX2SetVertexUniformReg, LogType::GX2);
cafeExportRegister("gx2", GX2SetPixelUniformReg, LogType::GX2);
}
}

View file

@ -270,41 +270,6 @@ void gx2Export_GX2SetComputeShader(PPCInterpreter_t* hCPU)
osLib_returnFromFunction(hCPU, 0);
}
void _GX2SubmitUniformReg(uint32 aluRegisterOffset, MPTR virtualAddress, uint32 count)
{
uint32* dataWords = (uint32*)memory_getPointerFromVirtualOffset(virtualAddress);
GX2ReserveCmdSpace(2 + (count / 0xFF) * 2 + count);
// write PM4 command(s)
uint32 currentRegisterOffset = aluRegisterOffset;
while (count > 0)
{
uint32 subCount = std::min(count, 0xFFu); // a single command can write at most 0xFF values
gx2WriteGather_submit(pm4HeaderType3(IT_SET_ALU_CONST, 1 + subCount),
currentRegisterOffset);
gx2WriteGather_submitU32AsLEArray(dataWords, subCount);
dataWords += subCount;
count -= subCount;
currentRegisterOffset += subCount;
}
}
void gx2Export_GX2SetVertexUniformReg(PPCInterpreter_t* hCPU)
{
cemuLog_log(LogType::GX2, "GX2SetVertexUniformReg(0x{:08x},0x{:x},0x{:08x})", hCPU->gpr[3], hCPU->gpr[4], hCPU->gpr[5]);
_GX2SubmitUniformReg(hCPU->gpr[3] + 0x400, hCPU->gpr[5], hCPU->gpr[4]);
cemu_assert_debug((hCPU->gpr[3] + hCPU->gpr[4]) <= 0x400);
osLib_returnFromFunction(hCPU, 0);
}
void gx2Export_GX2SetPixelUniformReg(PPCInterpreter_t* hCPU)
{
cemuLog_log(LogType::GX2, "GX2SetPixelUniformReg(0x{:08x},0x{:x},0x{:08x})", hCPU->gpr[3], hCPU->gpr[4], hCPU->gpr[5]);
_GX2SubmitUniformReg(hCPU->gpr[3], hCPU->gpr[5], hCPU->gpr[4]);
cemu_assert_debug((hCPU->gpr[3] + hCPU->gpr[4]) <= 0x400);
osLib_returnFromFunction(hCPU, 0);
}
void _GX2SubmitUniformBlock(uint32 registerBase, uint32 index, MPTR virtualAddress, uint32 size)
{
GX2ReserveCmdSpace(9);

View file

@ -543,8 +543,6 @@ void nnActExport_GetDefaultAccount(PPCInterpreter_t* hCPU)
void nnActExport_GetSlotNo(PPCInterpreter_t* hCPU)
{
// id of active account
// uint8 GetSlotNo(void);
cemuLog_logDebug(LogType::Force, "nn_act.GetSlotNo()");
osLib_returnFromFunction(hCPU, 1); // 1 is the first slot (0 is invalid)
}