diff --git a/src/common/aarch64/oaknut_abi.h b/src/common/aarch64/oaknut_abi.h
index 56bb8b5f3..ec28872aa 100644
--- a/src/common/aarch64/oaknut_abi.h
+++ b/src/common/aarch64/oaknut_abi.h
@@ -78,7 +78,8 @@ inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<64> regs, std::size_t fra
     return ABIFrameInfo{static_cast<u32>(total_size), static_cast<u32>(fprs_base_subtraction)};
 }
 
-inline void ABI_PushRegisters(oaknut::CodeGenerator& code, std::bitset<64> regs,
+template <typename Policy>
+inline void ABI_PushRegisters(oaknut::BasicCodeGenerator<Policy>& code, std::bitset<64> regs,
                               std::size_t frame_size = 0) {
     using namespace oaknut;
     using namespace oaknut::util;
@@ -137,7 +138,8 @@ inline void ABI_PushRegisters(oaknut::CodeGenerator& code, std::bitset<64> regs,
     }
 }
 
-inline void ABI_PopRegisters(oaknut::CodeGenerator& code, std::bitset<64> regs,
+template <typename Policy>
+inline void ABI_PopRegisters(oaknut::BasicCodeGenerator<Policy>& code, std::bitset<64> regs,
                              std::size_t frame_size = 0) {
     using namespace oaknut;
     using namespace oaknut::util;
diff --git a/src/common/aarch64/oaknut_util.h b/src/common/aarch64/oaknut_util.h
index 16cf7dfe3..253d01134 100644
--- a/src/common/aarch64/oaknut_util.h
+++ b/src/common/aarch64/oaknut_util.h
@@ -38,6 +38,16 @@ inline void CallFarFunction(oaknut::CodeGenerator& code, const T f) {
     }
 }
 
+template <typename T>
+inline void CallFarFunction(oaknut::VectorCodeGenerator& code, const T f) {
+    static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
+    // X16(IP0) and X17(IP1) is the standard veneer register
+    // LR is also available as an intermediate register
+    // https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard
+    code.MOVP2R(oaknut::util::X16, reinterpret_cast<const void*>(f));
+    code.BLR(oaknut::util::X16);
+}
+
 } // namespace Common::A64
 
 #endif // CITRA_ARCH(arm64)
diff --git a/src/video_core/shader/shader_jit_a64_compiler.cpp b/src/video_core/shader/shader_jit_a64_compiler.cpp
index 2927a28c5..18793317e 100644
--- a/src/video_core/shader/shader_jit_a64_compiler.cpp
+++ b/src/video_core/shader/shader_jit_a64_compiler.cpp
@@ -942,7 +942,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
     swizzle_data = swizzle_data_;
 
     // Reset flow control state
-    program = xptr<CompiledShader*>();
+    const std::uintptr_t program_offset = offset();
     program_counter = 0;
     loop_depth = 0;
     instruction_labels.fill(Label());
@@ -984,18 +984,28 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
     return_offsets.clear();
     return_offsets.shrink_to_fit();
 
+    // Copy to executable memory
+    const size_t code_size = code_vec.size() * sizeof(u32);
+
+    code_mem = std::make_unique<oaknut::CodeBlock>(code_size);
+    code_mem->unprotect();
+
+    program = reinterpret_cast<CompiledShader*>(reinterpret_cast<std::byte*>(code_mem->ptr()) +
+                                                program_offset);
+
+    // Copy to executable memory
+    std::memcpy(code_mem->ptr(), code_vec.data(), code_vec.size() * sizeof(u32));
+
     // Memory is ready to execute
-    protect();
-    invalidate_all();
+    code_mem->protect();
+    code_mem->invalidate_all();
 
-    const std::size_t code_size = static_cast<std::size_t>(offset());
-
-    ASSERT_MSG(code_size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
-    LOG_DEBUG(HW_GPU, "Compiled shader size={}", code_size);
+    // code_vec is no longer needed
+    code_vec.clear();
+    code_vec.shrink_to_fit();
 }
 
-JitShader::JitShader() : CodeBlock(MAX_SHADER_SIZE), CodeGenerator(CodeBlock::ptr()) {
-    unprotect();
+JitShader::JitShader() : oaknut::VectorCodeGenerator(code_vec) {
     CompilePrelude();
 }
 
@@ -1013,19 +1023,22 @@ Label JitShader::CompilePrelude_Log2() {
     // range. Coefficients for the minimax polynomial.
     // f(x) computes approximately log2(x) / (x - 1).
     // f(x) = c4 + x * (c3 + x * (c2 + x * (c1 + x * c0)).
-    align(16);
-    const void* c0 = xptr<const void*>();
+    oaknut::Label c0;
+    // align(16);
+    l(c0);
     dw(0x3d74552f);
 
-    align(16);
-    const void* c14 = xptr<const void*>();
+    // align(16);
+    oaknut::Label c14;
+    l(c14);
     dw(0xbeee7397);
     dw(0x3fbd96dd);
     dw(0xc02153f6);
     dw(0x4038d96c);
 
-    align(16);
-    const void* negative_infinity_vector = xptr<const void*>();
+    // align(16);
+    oaknut::Label negative_infinity_vector;
+    l(negative_infinity_vector);
     dw(0xff800000);
     dw(0xff800000);
     dw(0xff800000);
@@ -1038,19 +1051,19 @@ Label JitShader::CompilePrelude_Log2() {
 
     Label input_is_nan, input_is_zero, input_out_of_range;
 
-    align(16);
+    // align(16);
     l(input_out_of_range);
     B(Cond::EQ, input_is_zero);
-    MOVP2R(XSCRATCH0, default_qnan_vector);
+    ADR(XSCRATCH0, default_qnan_vector);
     LDR(SRC1, XSCRATCH0);
     RET();
 
     l(input_is_zero);
-    MOVP2R(XSCRATCH0, negative_infinity_vector);
+    ADR(XSCRATCH0, negative_infinity_vector);
     LDR(SRC1, XSCRATCH0);
     RET();
 
-    align(16);
+    // align(16);
     l(subroutine);
 
     // Here we handle edge cases: input in {NaN, 0, -Inf, Negative}.
@@ -1078,14 +1091,14 @@ Label JitShader::CompilePrelude_Log2() {
     UCVTF(VSCRATCH1.toS(), VSCRATCH1.toS());
     // VSCRATCH1 now contains the exponent of the input.
 
-    MOVP2R(XSCRATCH0, c0);
+    ADR(XSCRATCH0, c0);
     LDR(XSCRATCH0.toW(), XSCRATCH0);
     MOV(VSCRATCH0.Selem()[0], XSCRATCH0.toW());
 
     // Complete computation of polynomial
     // Load C1,C2,C3,C4 into a single scratch register
     const QReg C14 = SRC2;
-    MOVP2R(XSCRATCH0, c14);
+    ADR(XSCRATCH0, c14);
     LDR(C14, XSCRATCH0);
     FMUL(VSCRATCH0.toS(), VSCRATCH0.toS(), SRC1.toS());
     FMLA(VSCRATCH0.toS(), ONE.toS(), C14.Selem()[0]);
@@ -1118,27 +1131,35 @@ Label JitShader::CompilePrelude_Exp2() {
     // polynomial which was fit for the function exp2(x) is then evaluated. We then restore the
     // result into the appropriate range.
 
-    align(16);
-    const void* input_max = xptr<const void*>();
+    // align(16);
+    Label input_max;
+    l(input_max);
     dw(0x43010000);
-    const void* input_min = xptr<const void*>();
+    Label input_min;
+    l(input_min);
     dw(0xc2fdffff);
-    const void* c0 = xptr<const void*>();
+    Label c0;
+    l(c0);
     dw(0x3c5dbe69);
-    const void* half = xptr<const void*>();
+    Label half;
+    l(half);
     dw(0x3f000000);
-    const void* c1 = xptr<const void*>();
+    Label c1;
+    l(c1);
     dw(0x3d5509f9);
-    const void* c2 = xptr<const void*>();
+    Label c2;
+    l(c2);
     dw(0x3e773cc5);
-    const void* c3 = xptr<const void*>();
+    Label c3;
+    l(c3);
     dw(0x3f3168b3);
-    const void* c4 = xptr<const void*>();
+    Label c4;
+    l(c4);
     dw(0x3f800016);
 
     Label ret_label;
 
-    align(16);
+    // align(16);
     l(subroutine);
 
     // Handle edge cases
@@ -1149,15 +1170,15 @@ Label JitShader::CompilePrelude_Exp2() {
     // VSCRATCH0=2^round(input)
     // SRC1=input-round(input) [-0.5, 0.5)
     // Clamp to maximum range since we shift the value directly into the exponent.
-    MOVP2R(XSCRATCH0, input_max);
+    ADR(XSCRATCH0, input_max);
     LDR(VSCRATCH0.toS(), XSCRATCH0);
     FMIN(SRC1.toS(), SRC1.toS(), VSCRATCH0.toS());
 
-    MOVP2R(XSCRATCH0, input_min);
+    ADR(XSCRATCH0, input_min);
     LDR(VSCRATCH0.toS(), XSCRATCH0);
     FMAX(SRC1.toS(), SRC1.toS(), VSCRATCH0.toS());
 
-    MOVP2R(XSCRATCH0, half);
+    ADR(XSCRATCH0, half);
     LDR(VSCRATCH0.toS(), XSCRATCH0);
     FSUB(VSCRATCH0.toS(), SRC1.toS(), VSCRATCH0.toS());
 
diff --git a/src/video_core/shader/shader_jit_a64_compiler.h b/src/video_core/shader/shader_jit_a64_compiler.h
index 9819b6b7e..7accf66ac 100644
--- a/src/video_core/shader/shader_jit_a64_compiler.h
+++ b/src/video_core/shader/shader_jit_a64_compiler.h
@@ -30,20 +30,17 @@ struct ShaderUnit;
 
 namespace Pica::Shader {
 
-/// Memory allocated for each compiled shader
-constexpr std::size_t MAX_SHADER_SIZE = MAX_PROGRAM_CODE_LENGTH * 256;
-
 /**
  * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
  * code that can be executed on the host machine directly.
  */
-class JitShader : private oaknut::CodeBlock, private oaknut::CodeGenerator {
+class JitShader : public oaknut::VectorCodeGenerator {
 public:
     JitShader();
 
     void Run(const ShaderSetup& setup, ShaderUnit& state, u32 offset) const {
         program(&setup.uniforms, &state,
-                reinterpret_cast<std::byte*>(oaknut::CodeBlock::ptr()) +
+                reinterpret_cast<const std::byte*>(code_mem->ptr()) +
                     instruction_labels[offset].offset());
     }
 
@@ -81,6 +78,9 @@ public:
     void Compile_SETE(Instruction instr);
 
 private:
+    std::vector<u32> code_vec;
+    std::unique_ptr<oaknut::CodeBlock> code_mem;
+
     void Compile_Block(u32 end);
     void Compile_NextInstr();