From c3668e179c28dbe769f8a128e780a2269044f962 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Sat, 10 Apr 2021 17:41:06 -0700
Subject: [PATCH] Split TevStageIndirect::mid into matrix_index and matrix_id

---
 Source/Core/VideoBackends/Software/Tev.cpp    | 26 +++++-----
 Source/Core/VideoCommon/BPMemory.h            | 52 +++++++++++++++----
 Source/Core/VideoCommon/PixelShaderGen.cpp    | 26 ++++++----
 .../Core/VideoCommon/PixelShaderManager.cpp   |  4 +-
 Source/Core/VideoCommon/UberShaderPixel.cpp   | 11 ++--
 5 files changed, 78 insertions(+), 41 deletions(-)
diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp
index 65227339e4..30258f6cd3 100644
--- a/Source/Core/VideoBackends/Software/Tev.cpp
+++ b/Source/Core/VideoBackends/Software/Tev.cpp
@@ -485,20 +485,16 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
 
   // matrix multiply - results might overflow, but we don't care since we only use the lower 24 bits
   // of the result.
-  const int indmtxid = indirect.mid & 3;
-  if (indmtxid)
+  if (indirect.matrix_index != IndMtxIndex::Off)
   {
-    const IND_MTX& indmtx = bpmem.indmtx[indmtxid - 1];
-    const int scale =
-        ((u32)indmtx.col0.s0 << 0) | ((u32)indmtx.col1.s1 << 2) | ((u32)indmtx.col2.s2 << 4);
+    const IND_MTX& indmtx = bpmem.indmtx[static_cast<u32>(indirect.matrix_index.Value()) - 1];
 
-    int shift;
+    const int shift = 17 - indmtx.GetScale();
 
-    switch (indirect.mid & 12)
+    switch (indirect.matrix_id)
     {
-    case 0:
+    case IndMtxId::Indirect:
       // matrix values are S0.10, output format is S17.7, so divide by 8
-      shift = (17 - scale);
       indtevtrans[0] = (indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] +
                         indmtx.col2.me * indcoord[2]) >>
                        3;
@@ -506,25 +502,29 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
                         indmtx.col2.mf * indcoord[2]) >>
                        3;
       break;
-    case 4:  // s matrix
+    case IndMtxId::S:
       // s is S17.7, matrix elements are divided by 256, output is S17.7, so divide by 256. - TODO:
       // Maybe, since s is actually stored as S24, we should divide by 256*64?
-      shift = (17 - scale);
       indtevtrans[0] = s * indcoord[0] / 256;
       indtevtrans[1] = t * indcoord[0] / 256;
       break;
-    case 8:  // t matrix
-      shift = (17 - scale);
+    case IndMtxId::T:
       indtevtrans[0] = s * indcoord[1] / 256;
       indtevtrans[1] = t * indcoord[1] / 256;
       break;
     default:
+      PanicAlertFmt("Invalid indirect matrix ID {}", indirect.matrix_id);
       return;
     }
 
     indtevtrans[0] = shift >= 0 ? indtevtrans[0] >> shift : indtevtrans[0] << -shift;
     indtevtrans[1] = shift >= 0 ? indtevtrans[1] >> shift : indtevtrans[1] << -shift;
   }
+  else
+  {
+    // If matrix_index is Off (0), matrix_id should be Indirect (0)
+    ASSERT(indirect.matrix_id == IndMtxId::Indirect);
+  }
 
   if (indirect.fb_addprev)
   {
diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h
index 36ec9ec0a9..569b1747f3 100644
--- a/Source/Core/VideoCommon/BPMemory.h
+++ b/Source/Core/VideoCommon/BPMemory.h
@@ -300,6 +300,31 @@ struct fmt::formatter<IndTexBias> : EnumFormatter<IndTexBias::STU>
   formatter() : EnumFormatter({"None", "S", "T", "ST", "U", "SU", "TU", "STU"}) {}
 };
 
+enum class IndMtxIndex : u32
+{
+  Off = 0,
+  Matrix0 = 1,
+  Matrix1 = 2,
+  Matrix2 = 3,
+};
+template <>
+struct fmt::formatter<IndMtxIndex> : EnumFormatter<IndMtxIndex::Matrix2>
+{
+  formatter() : EnumFormatter({"Off", "Matrix 0", "Matrix 1", "Matrix 2"}) {}
+};
+
+enum class IndMtxId : u32
+{
+  Indirect = 0,
+  S = 1,
+  T = 2,
+};
+template <>
+struct fmt::formatter<IndMtxId> : EnumFormatter<IndMtxId::T>
+{
+  formatter() : EnumFormatter({"Indirect", "S", "T"}) {}
+};
+
 // Indirect texture bump alpha
 enum class IndTexBumpAlpha : u32
 {
@@ -335,7 +360,7 @@ union IND_MTXA
 {
   BitField<0, 11, s32> ma;
   BitField<11, 11, s32> mb;
-  BitField<22, 2, u32> s0;  // bits 0-1 of scale factor
+  BitField<22, 2, u8, u32> s0;  // bits 0-1 of scale factor
   u32 hex;
 };
 
@@ -343,7 +368,7 @@ union IND_MTXB
 {
   BitField<0, 11, s32> mc;
   BitField<11, 11, s32> md;
-  BitField<22, 2, u32> s1;  // bits 2-3 of scale factor
+  BitField<22, 2, u8, u32> s1;  // bits 2-3 of scale factor
   u32 hex;
 };
 
@@ -351,7 +376,7 @@ union IND_MTXC
 {
   BitField<0, 11, s32> me;
   BitField<11, 11, s32> mf;
-  BitField<22, 2, u32> s2;  // bits 4-5 of scale factor
+  BitField<22, 2, u8, u32> s2;  // bits 4-5 of scale factor
   u32 hex;
 };
 
@@ -360,6 +385,7 @@ struct IND_MTX
   IND_MTXA col0;
   IND_MTXB col1;
   IND_MTXC col2;
+  u8 GetScale() const { return (col0.s0 << 0) | (col1.s1 << 2) | (col2.s2 << 4); }
 };
 
 union IND_IMASK
@@ -475,8 +501,12 @@ union TevStageIndirect
   BitField<4, 1, bool, u32> bias_s;
   BitField<5, 1, bool, u32> bias_t;
   BitField<6, 1, bool, u32> bias_u;
-  BitField<7, 2, IndTexBumpAlpha> bs;     // Indicates which coordinate will become the 'bump alpha'
-  BitField<9, 4, u32> mid;                // Matrix ID to multiply offsets with
+  BitField<7, 2, IndTexBumpAlpha> bs;  // Indicates which coordinate will become the 'bump alpha'
+  // Indicates which indirect matrix is used when matrix_id is Indirect.
+  // Also always indicates which indirect matrix to use for the scale factor, even with S or T.
+  BitField<9, 2, IndMtxIndex> matrix_index;
+  // Should be set to Indirect (0) if matrix_index is Off (0)
+  BitField<11, 2, IndMtxId> matrix_id;
   BitField<13, 3, IndTexWrap> sw;         // Wrapping factor for S of regular coord
   BitField<16, 3, IndTexWrap> tw;         // Wrapping factor for T of regular coord
   BitField<19, 1, bool, u32> lb_utclod;   // Use modified or unmodified texture
@@ -492,9 +522,9 @@ union TevStageIndirect
 
   u32 fullhex;
 
-  // If bs and mid are zero, the result of the stage is independent of
+  // If bs and matrix are zero, the result of the stage is independent of
   // the texture sample data, so we can skip sampling the texture.
-  bool IsActive() const { return bs != IndTexBumpAlpha::Off || mid != 0; }
+  bool IsActive() const { return bs != IndTexBumpAlpha::Off || matrix_index != IndMtxIndex::Off; }
 };
 template <>
 struct fmt::formatter<TevStageIndirect>
@@ -508,13 +538,15 @@ struct fmt::formatter<TevStageIndirect>
                      "Format: {}\n"
                      "Bias: {}\n"
                      "Bump alpha: {}\n"
+                     "Offset matrix index: {}\n"
                      "Offset matrix ID: {}\n"
                      "Regular coord S wrapping factor: {}\n"
                      "Regular coord T wrapping factor: {}\n"
                      "Use modified texture coordinates for LOD computation: {}\n"
                      "Add texture coordinates from previous TEV stage: {}",
-                     tevind.bt, tevind.fmt, tevind.bias, tevind.bs, tevind.mid, tevind.sw,
-                     tevind.tw, tevind.lb_utclod ? "Yes" : "No", tevind.fb_addprev ? "Yes" : "No");
+                     tevind.bt, tevind.fmt, tevind.bias, tevind.bs, tevind.matrix_index,
+                     tevind.matrix_id, tevind.sw, tevind.tw, tevind.lb_utclod ? "Yes" : "No",
+                     tevind.fb_addprev ? "Yes" : "No");
   }
 };
 
@@ -1914,7 +1946,7 @@ struct BPMemory
   GenMode genMode;
   u32 display_copy_filter[4];  // 01-04
   u32 unknown;                 // 05
-  // indirect matrices (set by GXSetIndTexMtx, selected by TevStageIndirect::mid)
+  // indirect matrices (set by GXSetIndTexMtx, selected by TevStageIndirect::matrix_index)
   // abc form a 2x3 offset matrix, there's 3 such matrices
   // the 3 offset matrices can either be indirect type, S-type, or T-type
   // 6bit scale factor s is distributed across IND_MTXA/B/C.
diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp
index ab59bfd52a..866b7bacc8 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
@@ -991,7 +991,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
       // TODO: Should we reset alphabump to 0 here?
     }
 
-    if (tevind.mid != 0)
+    if (tevind.matrix_index != IndMtxIndex::Off)
     {
       // format
       static constexpr std::array<const char*, 4> tev_ind_fmt_mask{
@@ -1038,11 +1038,14 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
                   tev_ind_bias_add[u32(tevind.fmt.Value())]);
       }
 
+      // Multiplied by 2 because each matrix has two rows.
+      // Note also that the 4th column of the matrix contains the scale factor.
+      const u32 mtxidx = 2 * (static_cast<u32>(tevind.matrix_index.Value()) - 1);
+
       // multiply by offset matrix and scale - calculations are likely to overflow badly,
       // yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result
-      if (tevind.mid <= 3)
+      if (tevind.matrix_id == IndMtxId::Indirect)
       {
-        const u32 mtxidx = 2 * (tevind.mid - 1);
         out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
 
         out.Write("\tint2 indtevtrans{} = int2(idot(" I_INDTEXMTX
@@ -1064,10 +1067,9 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
           out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
         }
       }
-      else if (tevind.mid <= 7 && has_tex_coord)
-      {  // s matrix
-        ASSERT(tevind.mid >= 5);
-        const u32 mtxidx = 2 * (tevind.mid - 5);
+      else if (tevind.matrix_id == IndMtxId::S)
+      {
+        ASSERT(has_tex_coord);
         out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
 
         out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.xx) >> 8;\n", n,
@@ -1086,10 +1088,9 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
           out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
         }
       }
-      else if (tevind.mid <= 11 && has_tex_coord)
-      {  // t matrix
-        ASSERT(tevind.mid >= 9);
-        const u32 mtxidx = 2 * (tevind.mid - 9);
+      else if (tevind.matrix_id == IndMtxId::T)
+      {
+        ASSERT(has_tex_coord);
         out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
 
         out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.yy) >> 8;\n", n,
@@ -1112,11 +1113,14 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
       else
       {
         out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n);
+        ASSERT(false);  // Unknown value for matrix_id
       }
     }
     else
     {
       out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n);
+      // If matrix_index is Off (0), matrix_id should be Indirect (0)
+      ASSERT(tevind.matrix_id == IndMtxId::Indirect);
     }
 
     // ---------
diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp
index bae48623c7..6d52d8ffc1 100644
--- a/Source/Core/VideoCommon/PixelShaderManager.cpp
+++ b/Source/Core/VideoCommon/PixelShaderManager.cpp
@@ -336,9 +336,7 @@ void PixelShaderManager::SetIndTexScaleChanged(bool high)
 
 void PixelShaderManager::SetIndMatrixChanged(int matrixidx)
 {
-  int scale = ((u32)bpmem.indmtx[matrixidx].col0.s0 << 0) |
-              ((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) |
-              ((u32)bpmem.indmtx[matrixidx].col2.s2 << 4);
+  const u8 scale = bpmem.indmtx[matrixidx].GetScale();
 
   // xyz - static matrix
   // w - dynamic matrix scale / 128
diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp
index c35ce2821d..996d3c1d55 100644
--- a/Source/Core/VideoCommon/UberShaderPixel.cpp
+++ b/Source/Core/VideoCommon/UberShaderPixel.cpp
@@ -806,7 +806,10 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
     out.Write("      uint fmt = {};\n", BitfieldExtract<&TevStageIndirect::fmt>("tevind"));
     out.Write("      uint bias = {};\n", BitfieldExtract<&TevStageIndirect::bias>("tevind"));
     out.Write("      uint bt = {};\n", BitfieldExtract<&TevStageIndirect::bt>("tevind"));
-    out.Write("      uint mid = {};\n", BitfieldExtract<&TevStageIndirect::mid>("tevind"));
+    out.Write("      uint matrix_index = {};\n",
+              BitfieldExtract<&TevStageIndirect::matrix_index>("tevind"));
+    out.Write("      uint matrix_id = {};\n",
+              BitfieldExtract<&TevStageIndirect::matrix_id>("tevind"));
     out.Write("\n");
     out.Write("      int3 indcoord;\n");
     LookupIndirectTexture("indcoord", "bt");
@@ -846,12 +849,12 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
               "\n"
               "      // Matrix multiply\n"
               "      int2 indtevtrans = int2(0, 0);\n"
-              "      if ((mid & 3u) != 0u)\n"
+              "      if (matrix_index != 0u)\n"
               "      {{\n"
-              "        uint mtxidx = 2u * ((mid & 3u) - 1u);\n"
+              "        uint mtxidx = 2u * (matrix_index - 1u);\n"
               "        int shift = " I_INDTEXMTX "[mtxidx].w;\n"
               "\n"
-              "        switch (mid >> 2)\n"
+              "        switch (matrix_id)\n"
               "        {{\n"
               "        case 0u: // 3x2 S0.10 matrix\n"
               "          indtevtrans = int2(idot(" I_INDTEXMTX