Merge pull request #2567 from Sonicadvance1/remove_armv7

Remove ARMv7 support.
This commit is contained in:
Ryan Houdek 2015-06-08 02:19:11 -04:00
commit 1b6969ca67
36 changed files with 6 additions and 10619 deletions

View file

@ -159,11 +159,7 @@ if(NOT ENABLE_GENERIC)
message(FATAL_ERROR "x86_32 is an unsupported platform. Enable generic build if you really want a JIT-less binary.")
endif()
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
# This option only applies to 32bit ARM
set(_M_ARM 1)
set(_M_ARM_32 1)
add_definitions(-D_M_ARM=1 -D_M_ARM_32=1)
add_definitions(-marm -march=armv7-a)
message(FATAL_ERROR "ARMv7 is an unsupported platform. Enable generic build if you really want a JIT-less binary.")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
# This option only applies to 64bit ARM
set(_M_ARM 1)

View file

@ -52,16 +52,6 @@ android {
// Define product flavors, which can be split into categories. Common examples
// of product flavors are paid vs. free, ARM vs. x86, etc.
productFlavors {
arm {
// This flavor is mutually exclusive against any flavor in the same dimension.
flavorDimension "abi"
// When building this flavor, only include native libs from the specified folder.
ndk {
abiFilter "armeabi-v7a"
}
}
arm_64 {
flavorDimension "abi"
ndk {

View file

@ -42,11 +42,6 @@ public final class CPUSettingsFragment extends PreferenceFragment
cpuCores.setEntries(R.array.emuCoreEntriesARM64);
cpuCores.setEntryValues(R.array.emuCoreValuesARM64);
}
else if (Build.CPU_ABI.contains("arm"))
{
cpuCores.setEntries(R.array.emuCoreEntriesARM);
cpuCores.setEntryValues(R.array.emuCoreValuesARM);
}
else
{
cpuCores.setEntries(R.array.emuCoreEntriesOther);

File diff suppressed because it is too large Load diff

View file

@ -1,706 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!!
#pragma once
#include <vector>
#include "Common/ArmCommon.h"
#include "Common/CodeBlock.h"
#include "Common/CommonTypes.h"
#if defined(__SYMBIAN32__) || defined(PANDORA)
#include <signal.h>
#endif
#undef _IP
#undef R0
#undef _SP
#undef _LR
#undef _PC
// VCVT flags
#define TO_FLOAT 0
#define TO_INT 1 << 0
#define IS_SIGNED 1 << 1
#define ROUND_TO_ZERO 1 << 2
namespace ArmGen
{
enum ARMReg
{
// GPRs
R0 = 0, R1, R2, R3, R4, R5,
R6, R7, R8, R9, R10, R11,
// SPRs
// R13 - R15 are SP, LR, and PC.
// Almost always referred to by name instead of register number
R12 = 12, R13 = 13, R14 = 14, R15 = 15,
_IP = 12, _SP = 13, _LR = 14, _PC = 15,
// VFP single precision registers
S0, S1, S2, S3, S4, S5, S6,
S7, S8, S9, S10, S11, S12, S13,
S14, S15, S16, S17, S18, S19, S20,
S21, S22, S23, S24, S25, S26, S27,
S28, S29, S30, S31,
// VFP Double Precision registers
D0, D1, D2, D3, D4, D5, D6, D7,
D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23,
D24, D25, D26, D27, D28, D29, D30, D31,
// ASIMD Quad-Word registers
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
INVALID_REG = 0xFFFFFFFF
};
enum ShiftType
{
ST_LSL = 0,
ST_ASL = 0,
ST_LSR = 1,
ST_ASR = 2,
ST_ROR = 3,
ST_RRX = 4
};
enum
{
NUMGPRs = 13,
};
class ARMXEmitter;
enum OpType
{
TYPE_IMM = 0,
TYPE_REG,
TYPE_IMMSREG,
TYPE_RSR,
TYPE_MEM
};
// This is no longer a proper operand2 class. Need to split up.
class Operand2
{
friend class ARMXEmitter;
protected:
u32 Value;
private:
OpType Type;
// IMM types
u8 Rotation; // Only for u8 values
// Register types
u8 IndexOrShift;
ShiftType Shift;
public:
OpType GetType()
{
return Type;
}
Operand2() {}
Operand2(u32 imm, OpType type = TYPE_IMM)
{
Type = type;
Value = imm;
Rotation = 0;
}
Operand2(ARMReg Reg)
{
Type = TYPE_REG;
Value = Reg;
Rotation = 0;
}
Operand2(u8 imm, u8 rotation)
{
Type = TYPE_IMM;
Value = imm;
Rotation = rotation;
}
Operand2(ARMReg base, ShiftType type, ARMReg shift) // RSR
{
Type = TYPE_RSR;
_assert_msg_(DYNA_REC, type != ST_RRX, "Invalid Operand2: RRX does not take a register shift amount");
IndexOrShift = shift;
Shift = type;
Value = base;
}
Operand2(ARMReg base, ShiftType type, u8 shift)// For IMM shifted register
{
if (shift == 32) shift = 0;
switch (type)
{
case ST_LSL:
_assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: LSL %u", shift);
break;
case ST_LSR:
_assert_msg_(DYNA_REC, shift <= 32, "Invalid Operand2: LSR %u", shift);
if (!shift)
type = ST_LSL;
if (shift == 32)
shift = 0;
break;
case ST_ASR:
_assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: LSR %u", shift);
if (!shift)
type = ST_LSL;
if (shift == 32)
shift = 0;
break;
case ST_ROR:
_assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: ROR %u", shift);
if (!shift)
type = ST_LSL;
break;
case ST_RRX:
_assert_msg_(DYNA_REC, shift == 0, "Invalid Operand2: RRX does not take an immediate shift amount");
type = ST_ROR;
break;
}
IndexOrShift = shift;
Shift = type;
Value = base;
Type = TYPE_IMMSREG;
}
u32 GetData()
{
switch (Type)
{
case TYPE_IMM:
return Imm12Mod(); // This'll need to be changed later
case TYPE_REG:
return Rm();
case TYPE_IMMSREG:
return IMMSR();
case TYPE_RSR:
return RSR();
default:
_assert_msg_(DYNA_REC, false, "GetData with Invalid Type");
return 0;
}
}
u32 IMMSR() // IMM shifted register
{
_assert_msg_(DYNA_REC, Type == TYPE_IMMSREG, "IMMSR must be imm shifted register");
return ((IndexOrShift & 0x1f) << 7 | (Shift << 5) | Value);
}
u32 RSR() // Register shifted register
{
_assert_msg_(DYNA_REC, Type == TYPE_RSR, "RSR must be RSR Of Course");
return (IndexOrShift << 8) | (Shift << 5) | 0x10 | Value;
}
u32 Rm()
{
_assert_msg_(DYNA_REC, Type == TYPE_REG, "Rm must be with Reg");
return Value;
}
u32 Imm5()
{
_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm5 not IMM value");
return ((Value & 0x0000001F) << 7);
}
u32 Imm8()
{
_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8Rot not IMM value");
return Value & 0xFF;
}
u32 Imm8Rot() // IMM8 with Rotation
{
_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8Rot not IMM value");
_assert_msg_(DYNA_REC, (Rotation & 0xE1) != 0, "Invalid Operand2: immediate rotation %u", Rotation);
return (1 << 25) | (Rotation << 7) | (Value & 0x000000FF);
}
u32 Imm12()
{
_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm12 not IMM");
return (Value & 0x00000FFF);
}
u32 Imm12Mod()
{
// This is a IMM12 with the top four bits being rotation and the
// bottom eight being a IMM. This is for instructions that need to
// expand a 8bit IMM to a 32bit value and gives you some rotation as
// well.
// Each rotation rotates to the right by 2 bits
_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm12Mod not IMM");
return ((Rotation & 0xF) << 8) | (Value & 0xFF);
}
u32 Imm16()
{
_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM");
return ( (Value & 0xF000) << 4) | (Value & 0x0FFF);
}
u32 Imm16Low()
{
return Imm16();
}
u32 Imm16High() // Returns high 16bits
{
_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM");
return ( ((Value >> 16) & 0xF000) << 4) | ((Value >> 16) & 0x0FFF);
}
u32 Imm24()
{
_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM");
return (Value & 0x0FFFFFFF);
}
// NEON and ASIMD specific
u32 Imm8ASIMD()
{
_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8ASIMD not IMM");
return ((Value & 0x80) << 17) | ((Value & 0x70) << 12) | (Value & 0xF);
}
u32 Imm8VFP()
{
_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8VFP not IMM");
return ((Value & 0xF0) << 12) | (Value & 0xF);
}
};
// Use these when you don't know if an imm can be represented as an operand2.
// This lets you generate both an optimal and a fallback solution by checking
// the return value, which will be false if these fail to find a Operand2 that
// represents your 32-bit imm value.
bool TryMakeOperand2(u32 imm, Operand2 &op2);
bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse);
bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated);
// Use this only when you know imm can be made into an Operand2.
Operand2 AssumeMakeOperand2(u32 imm);
inline Operand2 R(ARMReg Reg) { return Operand2(Reg, TYPE_REG); }
inline Operand2 IMM(u32 Imm) { return Operand2(Imm, TYPE_IMM); }
inline Operand2 Mem(void *ptr) { return Operand2((u32)ptr, TYPE_IMM); }
//usage: struct {int e;} s; STRUCT_OFFSET(s,e)
#define STRUCT_OFF(str,elem) ((u32)((u32)&(str).elem-(u32)&(str)))
struct FixupBranch
{
u8 *ptr;
u32 condition; // Remembers our codition at the time
int type; //0 = B 1 = BL
};
struct LiteralPool
{
s32 loc;
u8* ldr_address;
u32 val;
};
typedef const u8* JumpTarget;
// XXX: Stop polluting the global namespace
const u32 I_8 = (1 << 0);
const u32 I_16 = (1 << 1);
const u32 I_32 = (1 << 2);
const u32 I_64 = (1 << 3);
const u32 I_SIGNED = (1 << 4);
const u32 I_UNSIGNED = (1 << 5);
const u32 F_32 = (1 << 6);
const u32 I_POLYNOMIAL = (1 << 7); // Only used in VMUL/VMULL
u32 EncodeVd(ARMReg Vd);
u32 EncodeVn(ARMReg Vn);
u32 EncodeVm(ARMReg Vm);
// Subtracts the base from the register to give us the real one
ARMReg SubBase(ARMReg Reg);
class ARMXEmitter
{
friend struct OpArg; // for Write8 etc
friend class NEONXEmitter;
private:
u8 *code, *startcode;
u8 *lastCacheFlushEnd;
u32 condition;
std::vector<LiteralPool> currentLitPool;
void WriteStoreOp(u32 Op, ARMReg Rt, ARMReg Rn, Operand2 op2, bool RegAdd);
void WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList);
void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2);
void WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2);
void WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void WriteVFPDataOp6bit(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm, u32 bit6);
void Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
// New Ops
void WriteInstruction(u32 op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags = false);
protected:
inline void Write32(u32 value) {*(u32*)code = value; code+=4;}
public:
ARMXEmitter() : code(nullptr), startcode(nullptr), lastCacheFlushEnd(nullptr) {
condition = CC_AL << 28;
}
ARMXEmitter(u8* code_ptr) {
code = code_ptr;
lastCacheFlushEnd = code_ptr;
startcode = code_ptr;
condition = CC_AL << 28;
}
virtual ~ARMXEmitter() {}
void SetCodePtr(u8 *ptr);
void ReserveCodeSpace(u32 bytes);
const u8 *AlignCode16();
const u8 *AlignCodePage();
const u8 *GetCodePtr() const;
void FlushIcache();
void FlushIcacheSection(u8 *start, u8 *end);
u8 *GetWritableCodePtr();
void FlushLitPool();
void AddNewLit(u32 val);
bool TrySetValue_TwoOp(ARMReg reg, u32 val);
CCFlags GetCC() { return CCFlags(condition >> 28); }
void SetCC(CCFlags cond = CC_AL);
// Special purpose instructions
// Dynamic Endian Switching
void SETEND(bool BE);
// Debug Breakpoint
void BKPT(u16 arg);
// Hint instruction
void YIELD();
// System
void MRC(u32 coproc, u32 opc1, ARMReg Rt, u32 CRn, u32 CRm, u32 opc2 = 0);
void MCR(u32 coproc, u32 opc1, ARMReg Rt, u32 CRn, u32 CRm, u32 opc2 = 0);
// Do nothing
void NOP(int count = 1); //nop padding - TODO: fast nop slides, for AMD and Intel (check their manuals)
#ifdef CALL
#undef CALL
#endif
// Branching
FixupBranch B();
FixupBranch B_CC(CCFlags Cond);
void B_CC(CCFlags Cond, const void *fnptr);
FixupBranch BL();
FixupBranch BL_CC(CCFlags Cond);
void SetJumpTarget(FixupBranch const &branch);
void B (const void *fnptr);
void B (ARMReg src);
void BL(const void *fnptr);
void BL(ARMReg src);
bool BLInRange(const void *fnptr);
void PUSH(const int num, ...);
void POP(const int num, ...);
// New Data Ops
void AND (ARMReg Rd, ARMReg Rn, Operand2 Rm);
void ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm);
void EOR (ARMReg dest, ARMReg src, Operand2 op2);
void EORS(ARMReg dest, ARMReg src, Operand2 op2);
void SUB (ARMReg dest, ARMReg src, Operand2 op2);
void SUBS(ARMReg dest, ARMReg src, Operand2 op2);
void RSB (ARMReg dest, ARMReg src, Operand2 op2);
void RSBS(ARMReg dest, ARMReg src, Operand2 op2);
void ADD (ARMReg dest, ARMReg src, Operand2 op2);
void ADDS(ARMReg dest, ARMReg src, Operand2 op2);
void ADC (ARMReg dest, ARMReg src, Operand2 op2);
void ADCS(ARMReg dest, ARMReg src, Operand2 op2);
void LSL (ARMReg dest, ARMReg src, Operand2 op2);
void LSLS(ARMReg dest, ARMReg src, Operand2 op2);
void LSR (ARMReg dest, ARMReg src, Operand2 op2);
void LSRS(ARMReg dest, ARMReg src, Operand2 op2);
void ASR (ARMReg dest, ARMReg src, Operand2 op2);
void ASRS(ARMReg dest, ARMReg src, Operand2 op2);
void SBC (ARMReg dest, ARMReg src, Operand2 op2);
void SBCS(ARMReg dest, ARMReg src, Operand2 op2);
void RBIT(ARMReg dest, ARMReg src);
void REV (ARMReg dest, ARMReg src);
void REV16 (ARMReg dest, ARMReg src);
void RSC (ARMReg dest, ARMReg src, Operand2 op2);
void RSCS(ARMReg dest, ARMReg src, Operand2 op2);
void TST ( ARMReg src, Operand2 op2);
void TEQ ( ARMReg src, Operand2 op2);
void CMP ( ARMReg src, Operand2 op2);
void CMN ( ARMReg src, Operand2 op2);
void ORR (ARMReg dest, ARMReg src, Operand2 op2);
void ORRS(ARMReg dest, ARMReg src, Operand2 op2);
void MOV (ARMReg dest, Operand2 op2);
void MOVS(ARMReg dest, Operand2 op2);
void BIC (ARMReg dest, ARMReg src, Operand2 op2); // BIC = ANDN
void BICS(ARMReg dest, ARMReg src, Operand2 op2);
void MVN (ARMReg dest, Operand2 op2);
void MVNS(ARMReg dest, Operand2 op2);
void MOVW(ARMReg dest, Operand2 op2);
void MOVT(ARMReg dest, Operand2 op2, bool TopBits = false);
// UDIV and SDIV are only available on CPUs that have
// the idiva hardare capacity
void UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor);
void SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor);
void MUL (ARMReg dest, ARMReg src, ARMReg op2);
void MULS(ARMReg dest, ARMReg src, ARMReg op2);
void UMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
void UMULLS(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
void SMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
void UMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
void SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
void SXTB(ARMReg dest, ARMReg op2);
void SXTH(ARMReg dest, ARMReg op2, u8 rotation = 0);
void SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation = 0);
void BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width);
void UBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width);
void CLZ(ARMReg rd, ARMReg rm);
// Using just MSR here messes with our defines on the PPC side of stuff (when this code was in Dolphin...)
// Just need to put an underscore here, bit annoying.
void _MSR (bool nzcvq, bool g, Operand2 op2);
void _MSR (bool nzcvq, bool g, ARMReg src);
void MRS (ARMReg dest);
// Memory load/store operations
void LDR (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
void LDRB (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
void LDRH (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
void LDRSB(ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
void LDRSH(ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
void STR (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
void STRB (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
void STRH (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
void STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...);
void LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...);
// Exclusive Access operations
void LDREX(ARMReg dest, ARMReg base);
// result contains the result if the instruction managed to store the value
void STREX(ARMReg result, ARMReg base, ARMReg op);
void DMB ();
void SVC(Operand2 op);
// NEON and ASIMD instructions
// None of these will be created with conditional since ARM
// is deprecating conditional execution of ASIMD instructions.
// ASIMD instructions don't even have a conditional encoding.
// VFP Only
void VLDR(ARMReg Dest, ARMReg Base, s16 offset);
void VSTR(ARMReg Src, ARMReg Base, s16 offset);
void VCMP(ARMReg Vd, ARMReg Vm);
void VCMPE(ARMReg Vd, ARMReg Vm);
// Compares against zero
void VCMP(ARMReg Vd);
void VCMPE(ARMReg Vd);
void VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VSQRT(ARMReg Vd, ARMReg Vm);
// NEON and VFP
void VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VABS(ARMReg Vd, ARMReg Vm);
void VNEG(ARMReg Vd, ARMReg Vm);
void VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMOV(ARMReg Dest, Operand2 op2);
void VMOV(ARMReg Dest, ARMReg Src, bool high);
void VMOV(ARMReg Dest, ARMReg Src);
void VCVT(ARMReg Dest, ARMReg Src, int flags);
void VMRS(ARMReg Rt);
void VMSR(ARMReg Rt);
void QuickCallFunction(ARMReg scratchreg, void *func);
// Wrapper around MOVT/MOVW with fallbacks.
void MOVI2R(ARMReg reg, u32 val, bool optimize = true);
void MOVI2F(ARMReg dest, float val, ARMReg tempReg, bool negate = false);
void ADDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
void ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
void CMPI2R(ARMReg rs, u32 val, ARMReg scratch);
void ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
}; // class ARMXEmitter
enum NEONAlignment
{
ALIGN_NONE = 0,
ALIGN_64 = 1,
ALIGN_128 = 2,
ALIGN_256 = 3
};
class NEONXEmitter
{
private:
ARMXEmitter *_emit;
inline void Write32(u32 value) { _emit->Write32(value); }
inline u32 encodedSize(u32 value)
{
if (value & I_8)
return 0;
else if (value & I_16)
return 1;
else if ((value & I_32) || (value & F_32))
return 2;
else if (value & I_64)
return 3;
else
_dbg_assert_msg_(DYNA_REC, false, "Passed invalid size to integer NEON instruction");
return 0;
}
void VREVX(u32 size, u32 Size, ARMReg Vd, ARMReg Vm);
public:
NEONXEmitter(ARMXEmitter *emit)
: _emit(emit)
{}
void VABA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VABAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VABD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VABDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VABS(u32 Size, ARMReg Vd, ARMReg Vm);
void VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VCEQ(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VCEQ(u32 Size, ARMReg Vd, ARMReg Vm);
void VCGE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VCGE(u32 Size, ARMReg Vd, ARMReg Vm);
void VCGT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VCGT(u32 Size, ARMReg Vd, ARMReg Vm);
void VCLE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VCLE(u32 Size, ARMReg Vd, ARMReg Vm);
void VCLS(u32 Size, ARMReg Vd, ARMReg Vm);
void VCLT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VCLT(u32 Size, ARMReg Vd, ARMReg Vm);
void VCLZ(u32 Size, ARMReg Vd, ARMReg Vm);
void VCNT(u32 Size, ARMReg Vd, ARMReg Vm);
void VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index);
void VDUP(u32 Size, ARMReg Vd, ARMReg Rt);
void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index);
void VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VFMS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMLA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMLS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMUL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VNEG(u32 Size, ARMReg Vd, ARMReg Vm);
void VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VPADAL(u32 Size, ARMReg Vd, ARMReg Vm);
void VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VPADDL(u32 Size, ARMReg Vd, ARMReg Vm);
void VPMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VPMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VQABS(u32 Size, ARMReg Vd, ARMReg Vm);
void VQADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VQDMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VQDMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VQDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VQDMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VQNEG(u32 Size, ARMReg Vd, ARMReg Vm);
void VQRDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VQRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VQSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VQSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VRADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VRECPE(u32 Size, ARMReg Vd, ARMReg Vm);
void VRECPS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VRHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VRSQRTE(u32 Size, ARMReg Vd, ARMReg Vm);
void VRSQRTS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VRSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VSUBL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VSUBW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VSWP(ARMReg Vd, ARMReg Vm);
void VTRN(u32 Size, ARMReg Vd, ARMReg Vm);
void VTST(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VUZP(u32 Size, ARMReg Vd, ARMReg Vm);
void VZIP(u32 Size, ARMReg Vd, ARMReg Vm);
void VREV64(u32 Size, ARMReg Vd, ARMReg Vm);
void VREV32(u32 Size, ARMReg Vd, ARMReg Vm);
void VREV16(u32 Size, ARMReg Vd, ARMReg Vm);
void VLD1(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC);
void VLD2(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC);
void VST1(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC);
};
class ARMCodeBlock : public CodeBlock<ARMXEmitter>
{
private:
void PoisonMemory() override
{
u32* ptr = (u32*)region;
u32* maxptr = (u32*)(region + region_size);
// If our memory isn't a multiple of u32 then this won't write the last remaining bytes with anything
// Less than optimal, but there would be nothing we could do but throw a runtime warning anyway.
// ARM: 0x01200070 = BKPT 0
while (ptr < maxptr)
*ptr++ = 0x01200070;
}
};
// VFP Specific
struct VFPEnc {
s16 opc1;
s16 opc2;
};
} // namespace

View file

@ -36,17 +36,11 @@ set(SRCS BreakPoints.cpp
Logging/LogManager.cpp)
set(LIBS enet)
if(_M_ARM)
if (_M_ARM_32) #ARMv7
set(SRCS ${SRCS}
ArmEmitter.cpp)
else() #AArch64
set(SRCS ${SRCS}
Arm64Emitter.cpp)
endif()
if(_M_ARM_64)
set(SRCS ${SRCS}
ArmCPUDetect.cpp
GenericFPURoundMode.cpp)
Arm64Emitter.cpp
ArmCPUDetect.cpp
GenericFPURoundMode.cpp)
else()
if(_M_X86) #X86
set(SRCS ${SRCS}

View file

@ -133,10 +133,6 @@ inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) |
inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);}
inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);}
inline u64 swap64(u64 _data) {return _byteswap_uint64(_data);}
#elif _M_ARM_32
inline u16 swap16 (u16 _data) { u32 data = _data; __asm__ ("rev16 %0, %1\n" : "=l" (data) : "l" (data)); return (u16)data;}
inline u32 swap32 (u32 _data) {__asm__ ("rev %0, %1\n" : "=l" (_data) : "l" (_data)); return _data;}
inline u64 swap64(u64 _data) {return ((u64)swap32(_data) << 32) | swap32(_data >> 32);}
#elif __linux__ && !(ANDROID && _M_ARM_64)
// Android NDK r10c has broken builtin byte swap routines
// Disabled for now.

View file

@ -204,24 +204,6 @@ if(_M_X86)
PowerPC/JitCommon/JitBackpatch.cpp
PowerPC/JitCommon/Jit_Util.cpp
PowerPC/JitCommon/TrampolineCache.cpp)
elseif(_M_ARM_32)
set(SRCS ${SRCS}
PowerPC/JitArm32/Jit.cpp
PowerPC/JitArm32/JitAsm.cpp
PowerPC/JitArm32/JitArm_BackPatch.cpp
PowerPC/JitArm32/JitArm_Tables.cpp
PowerPC/JitArm32/JitArmCache.cpp
PowerPC/JitArm32/JitRegCache.cpp
PowerPC/JitArm32/JitFPRCache.cpp
PowerPC/JitArm32/JitArm_Branch.cpp
PowerPC/JitArm32/JitArm_Integer.cpp
PowerPC/JitArm32/JitArm_LoadStore.cpp
PowerPC/JitArm32/JitArm_FloatingPoint.cpp
PowerPC/JitArm32/JitArm_Paired.cpp
PowerPC/JitArm32/JitArm_LoadStorePaired.cpp
PowerPC/JitArm32/JitArm_SystemRegisters.cpp
PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp
)
elseif(_M_ARM_64)
set(SRCS ${SRCS}
PowerPC/JitArm64/Jit.cpp

View file

@ -418,8 +418,6 @@ void SConfig::LoadCoreSettings(IniFile& ini)
core->Get("HLE_BS2", &m_LocalCoreStartupParameter.bHLE_BS2, false);
#ifdef _M_X86
core->Get("CPUCore", &m_LocalCoreStartupParameter.iCPUCore, PowerPC::CORE_JIT64);
#elif _M_ARM_32
core->Get("CPUCore", &m_LocalCoreStartupParameter.iCPUCore, PowerPC::CORE_JITARM);
#elif _M_ARM_64
core->Get("CPUCore", &m_LocalCoreStartupParameter.iCPUCore, PowerPC::CORE_JITARM64);
#else

View file

@ -113,9 +113,6 @@
#define CTX_REG(x) regs[x]
#define CTX_SP sp
#define CTX_PC pc
#elif _M_ARM_32
// Add others if required.
#define CTX_PC arm_pc
#else
#warning No context definition for OS
#endif

View file

@ -1,513 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <map>
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PatchEngine.h"
#include "Core/HLE/HLE.h"
#include "Core/HW/GPFifo.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/Profiler.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArm_Tables.h"
using namespace ArmGen;
void JitArm::Init()
{
AllocCodeSpace(CODE_SIZE);
blocks.Init();
asm_routines.Init();
gpr.Init(this);
fpr.Init(this);
jo.enableBlocklink = true;
jo.optimizeGatherPipe = true;
UpdateMemoryOptions();
code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa;
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
InitBackpatch();
// Disable all loadstores
// Ever since the MMU has been optimized for x86, loadstores on ARMv7 have been knackered
// XXX: Investigate exactly why these are broken
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITLoadStoreOff = true;
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITLoadStoreFloatingOff = true;
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITLoadStorePairedOff= true;
}
void JitArm::ClearCache()
{
ClearCodeSpace();
blocks.Clear();
UpdateMemoryOptions();
}
void JitArm::Shutdown()
{
FreeCodeSpace();
blocks.Shutdown();
asm_routines.Shutdown();
}
// This is only called by FallBackToInterpreter() in this file. It will execute an instruction with the interpreter functions.
void JitArm::WriteCallInterpreter(UGeckoInstruction inst)
{
gpr.Flush();
fpr.Flush();
Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst);
MOVI2R(R0, inst.hex);
MOVI2R(R12, (u32)instr);
BL(R12);
}
void JitArm::FallBackToInterpreter(UGeckoInstruction _inst)
{
WriteCallInterpreter(_inst.hex);
}
void JitArm::HLEFunction(UGeckoInstruction _inst)
{
gpr.Flush();
fpr.Flush();
MOVI2R(R0, js.compilerPC);
MOVI2R(R1, _inst.hex);
QuickCallFunction(R14, (void*)&HLE::Execute);
ARMReg rA = gpr.GetReg();
LDR(rA, R9, PPCSTATE_OFF(npc));
WriteExitDestInR(rA);
}
void JitArm::DoNothing(UGeckoInstruction _inst)
{
// Yup, just don't do anything.
}
static const bool ImHereDebug = false;
static const bool ImHereLog = false;
static std::map<u32, int> been_here;
static void ImHere()
{
static File::IOFile f;
if (ImHereLog)
{
if (!f)
{
f.Open("log32.txt", "w");
}
fprintf(f.GetHandle(), "%08x\n", PC);
}
if (been_here.find(PC) != been_here.end())
{
been_here.find(PC)->second++;
if ((been_here.find(PC)->second) & 1023)
return;
}
DEBUG_LOG(DYNA_REC, "I'm here - PC = %08x , LR = %08x", PC, LR);
been_here[PC] = 1;
}
void JitArm::Cleanup()
{
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
{
PUSH(4, R0, R1, R2, R3);
QuickCallFunction(R14, (void*)&GPFifo::FastCheckGatherPipe);
POP(4, R0, R1, R2, R3);
}
}
void JitArm::DoDownCount()
{
ARMReg rA = gpr.GetReg();
LDR(rA, R9, PPCSTATE_OFF(downcount));
if (js.downcountAmount < 255) // We can enlarge this if we used rotations
{
SUBS(rA, rA, js.downcountAmount);
}
else
{
ARMReg rB = gpr.GetReg(false);
MOVI2R(rB, js.downcountAmount);
SUBS(rA, rA, rB);
}
STR(rA, R9, PPCSTATE_OFF(downcount));
gpr.Unlock(rA);
}
void JitArm::WriteExitDestInR(ARMReg Reg)
{
STR(Reg, R9, PPCSTATE_OFF(pc));
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
MOVI2R(Reg, (u32)asm_routines.dispatcher);
B(Reg);
gpr.Unlock(Reg);
}
void JitArm::WriteRfiExitDestInR(ARMReg Reg)
{
STR(Reg, R9, PPCSTATE_OFF(pc));
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
ARMReg A = gpr.GetReg(false);
LDR(A, R9, PPCSTATE_OFF(pc));
STR(A, R9, PPCSTATE_OFF(npc));
QuickCallFunction(A, (void*)&PowerPC::CheckExceptions);
LDR(A, R9, PPCSTATE_OFF(npc));
STR(A, R9, PPCSTATE_OFF(pc));
gpr.Unlock(Reg); // This was locked in the instruction beforehand
MOVI2R(A, (u32)asm_routines.dispatcher);
B(A);
}
void JitArm::WriteExceptionExit()
{
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
ARMReg A = gpr.GetReg(false);
LDR(A, R9, PPCSTATE_OFF(pc));
STR(A, R9, PPCSTATE_OFF(npc));
QuickCallFunction(A, (void*)&PowerPC::CheckExceptions);
LDR(A, R9, PPCSTATE_OFF(npc));
STR(A, R9, PPCSTATE_OFF(pc));
MOVI2R(A, (u32)asm_routines.dispatcher);
B(A);
}
void JitArm::WriteExit(u32 destination)
{
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
JitBlock::LinkData linkData;
linkData.exitAddress = destination;
linkData.exitPtrs = GetWritableCodePtr();
linkData.linkStatus = false;
// Link opportunity!
int block;
if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0)
{
// It exists! Joy of joy!
B(blocks.GetBlock(block)->checkedEntry);
linkData.linkStatus = true;
}
else
{
ARMReg A = gpr.GetReg(false);
MOVI2R(A, destination);
STR(A, R9, PPCSTATE_OFF(pc));
MOVI2R(A, (u32)asm_routines.dispatcher);
B(A);
}
b->linkData.push_back(linkData);
}
void JitArm::Run()
{
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
pExecAddr();
}
void JitArm::SingleStep()
{
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
pExecAddr();
}
void JitArm::Trace()
{
std::string regs;
std::string fregs;
#ifdef JIT_LOG_GPR
for (int i = 0; i < 32; i++)
{
regs += StringFromFormat("r%02d: %08x ", i, PowerPC::ppcState.gpr[i]);
}
#endif
#ifdef JIT_LOG_FPR
for (int i = 0; i < 32; i++)
{
fregs += StringFromFormat("f%02d: %016x ", i, riPS0(i));
}
#endif
DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x FPSCR: %08x MSR: %08x LR: %08x %s %s",
PC, SRR0, SRR1, PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str());
}
void JitArm::Jit(u32 em_address)
{
if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache)
{
ClearCache();
}
int block_num = blocks.AllocateBlock(PowerPC::ppcState.pc);
JitBlock *b = blocks.GetBlock(block_num);
const u8* BlockPtr = DoJit(PowerPC::ppcState.pc, &code_buffer, b);
blocks.FinalizeBlock(block_num, jo.enableBlocklink, BlockPtr);
}
void JitArm::Break(UGeckoInstruction inst)
{
ERROR_LOG(DYNA_REC, "%s called a Break instruction!", PPCTables::GetInstructionName(inst));
BKPT(0x4444);
}
void JitArm::BeginTimeProfile(JitBlock* b)
{
b->ticCounter = 0;
b->ticStart = 0;
b->ticStop = 0;
// Performance counters are bit finnicky on ARM
// We must first enable and program the PMU before using it
// This is a per core operation so with thread scheduling we may jump to a core we haven't enabled PMU yet
// Work around this by enabling PMU each time at the start of a block
// Some ARM CPUs are getting absurd core counts(48+!)
// We have to reset counters at the start of every block anyway, so may as well.
// One thing to note about performance counters on ARM
// The kernel can block access to these co-processor registers
// In the case that this happens, these will generate a SIGILL
// Refer to the ARM ARM about PMCR for what these do exactly
enum
{
PERF_OPTION_ENABLE = (1 << 0),
PERF_OPTION_RESET_CR = (1 << 1),
PERF_OPTION_RESET_CCR = (1 << 2),
PERF_OPTION_DIVIDER_MODE = (1 << 3),
PERF_OPTION_EXPORT_ENABLE = (1 << 4),
};
const u32 perf_options =
PERF_OPTION_ENABLE |
PERF_OPTION_RESET_CR |
PERF_OPTION_RESET_CCR |
PERF_OPTION_EXPORT_ENABLE;
MOVI2R(R0, perf_options);
// Programs the PMCR
MCR(15, 0, R0, 9, 12, 0);
MOVI2R(R0, 0x8000000F);
// Enables all counters
MCR(15, 0, R0, 9, 12, 1);
// Clears all counter overflows
MCR(15, 0, R0, 9, 12, 3);
// Gets the cycle counter
MRC(15, 0, R1, 9, 13, 0);
MOVI2R(R0, (u32)&b->ticStart);
STR(R1, R0, 0);
}
void JitArm::EndTimeProfile(JitBlock* b)
{
// Gets the cycle counter
MRC(15, 0, R1, 9, 13, 0);
MOVI2R(R0, (u32)&b->ticStop);
STR(R1, R0, 0);
MOVI2R(R0, (u32)&b->ticStart);
MOVI2R(R14, (u32)asm_routines.m_increment_profile_counter);
BL(R14);
}
const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b)
{
int blockSize = code_buf->GetSize();
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
{
// Comment out the following to disable breakpoints (speed-up)
if (!Profiler::g_ProfileBlocks)
{
if (PowerPC::GetState() == PowerPC::CPU_STEPPING)
blockSize = 1;
Trace();
}
}
if (em_address == 0)
{
Core::SetState(Core::CORE_PAUSE);
PanicAlert("ERROR: Compiling at 0. LR=%08x CTR=%08x", LR, CTR);
}
js.isLastInstruction = false;
js.blockStart = em_address;
js.fifoBytesThisBlock = 0;
js.curBlock = b;
u32 nextPC = em_address;
// Analyze the block, collect all instructions it is made of (including inlining,
// if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize);
PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
const u8 *start = GetCodePtr();
b->checkedEntry = start;
b->runCount = 0;
// Downcount flag check, Only valid for linked blocks
{
FixupBranch no_downcount = B_CC(CC_PL);
ARMReg rA = gpr.GetReg(false);
MOVI2R(rA, js.blockStart);
STR(rA, R9, PPCSTATE_OFF(pc));
MOVI2R(rA, (u32)asm_routines.doTiming);
B(rA);
SetJumpTarget(no_downcount);
}
const u8 *normalEntry = GetCodePtr();
b->normalEntry = normalEntry;
if (ImHereDebug)
QuickCallFunction(R14, (void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful
if (js.fpa.any)
{
// This block uses FPU - needs to add FP exception bailout
ARMReg A = gpr.GetReg();
ARMReg C = gpr.GetReg();
Operand2 Shift(2, 10); // 1 << 13
MOVI2R(C, js.blockStart); // R3
LDR(A, R9, PPCSTATE_OFF(msr));
TST(A, Shift);
FixupBranch no_fpe = B_CC(CC_NEQ);
STR(C, R9, PPCSTATE_OFF(pc));
LDR(A, R9, PPCSTATE_OFF(Exceptions));
ORR(A, A, EXCEPTION_FPU_UNAVAILABLE);
STR(A, R9, PPCSTATE_OFF(Exceptions));
QuickCallFunction(A, (void*)&PowerPC::CheckExceptions);
LDR(A, R9, PPCSTATE_OFF(npc));
STR(A, R9, PPCSTATE_OFF(pc));
MOVI2R(A, (u32)asm_routines.dispatcher);
B(A);
SetJumpTarget(no_fpe);
gpr.Unlock(A, C);
}
// Conditionally add profiling code.
if (Profiler::g_ProfileBlocks)
{
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
MOVI2R(rA, (u32)&b->runCount); // Load in to register
LDR(rB, rA); // Load the actual value in to R11.
ADD(rB, rB, 1); // Add one to the value
STR(rB, rA); // Now store it back in the memory location
BeginTimeProfile(b);
gpr.Unlock(rA, rB);
}
gpr.Start(js.gpa);
fpr.Start(js.fpa);
js.downcountAmount = 0;
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
js.skipInstructions = 0;
js.compilerPC = nextPC;
// Translate instructions
for (u32 i = 0; i < code_block.m_num_instructions; i++)
{
js.compilerPC = ops[i].address;
js.op = &ops[i];
js.instructionNumber = i;
const GekkoOPInfo *opinfo = ops[i].opinfo;
js.downcountAmount += opinfo->numCycles;
if (i == (code_block.m_num_instructions - 1))
{
// WARNING - cmp->branch merging will screw this up.
js.isLastInstruction = true;
}
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
{
js.fifoBytesThisBlock -= 32;
PUSH(4, R0, R1, R2, R3);
QuickCallFunction(R14, (void*)&GPFifo::FastCheckGatherPipe);
POP(4, R0, R1, R2, R3);
}
if (!ops[i].skip)
{
if (jo.memcheck && (opinfo->flags & FL_USE_FPU))
{
// Don't do this yet
BKPT(0x7777);
}
JitArmTables::CompileInstruction(ops[i]);
// If we have a register that will never be used again, flush it.
for (int j : ~ops[i].gprInUse)
gpr.StoreFromRegister(j);
for (int j : ~ops[i].fprInUse)
fpr.StoreFromRegister(j);
if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
{
// Don't do this yet
BKPT(0x666);
}
}
}
if (code_block.m_memory_exception)
BKPT(0x500);
if (code_block.m_broken)
{
printf("Broken Block going to 0x%08x\n", nextPC);
WriteExit(nextPC);
}
b->codeSize = (u32)(GetCodePtr() - start);
b->originalSize = code_block.m_num_instructions;
FlushIcache();
return start;
}

View file

@ -1,248 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
// ========================
// See comments in Jit.cpp.
// ========================
// Mystery: Capcom vs SNK 800aa278
// CR flags approach:
// * Store that "N+Z flag contains CR0" or "S+Z flag contains CR3".
// * All flag altering instructions flush this
// * A flush simply does a conditional write to the appropriate CRx.
// * If flag available, branch code can become absolutely trivial.
// Settings
// ----------
#pragma once
#include "Core/PowerPC/CPUCoreBase.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/JitArm32/JitArmCache.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitFPRCache.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
#include "Core/PowerPC/JitArmCommon/BackPatch.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#define PPCSTATE_OFF(elem) ((s32)STRUCT_OFF(PowerPC::ppcState, elem) - (s32)STRUCT_OFF(PowerPC::ppcState, spr[0]))
// Some asserts to make sure we will be able to load everything
static_assert(PPCSTATE_OFF(spr[1023]) > -4096 && PPCSTATE_OFF(spr[1023]) < 4096, "LDR can't reach all of the SPRs");
static_assert(PPCSTATE_OFF(ps[0][0]) >= -1020 && PPCSTATE_OFF(ps[0][0]) <= 1020, "VLDR can't reach all of the FPRs");
static_assert((PPCSTATE_OFF(ps[0][0]) % 4) == 0, "VLDR requires FPRs to be 4 byte aligned");
class JitArm : public JitBase, public ArmGen::ARMCodeBlock
{
private:
JitArmBlockCache blocks;
JitArmAsmRoutineManager asm_routines;
// TODO: Make arm specific versions of these, shouldn't be too hard to
// make it so we allocate some space at the start(?) of code generation
// and keep the registers in a cache. Will burn this bridge when we get to
// it.
ArmRegCache gpr;
ArmFPRCache fpr;
PPCAnalyst::CodeBuffer code_buffer;
// The key is the backpatch flags
std::map<u32, BackPatchInfo> m_backpatch_info;
void DoDownCount();
void Helper_UpdateCR1(ArmGen::ARMReg fpscr, ArmGen::ARMReg temp);
void SetFPException(ArmGen::ARMReg Reg, u32 Exception);
ArmGen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
void BeginTimeProfile(JitBlock* b);
void EndTimeProfile(JitBlock* b);
bool BackPatch(SContext* ctx);
bool DisasmLoadStore(const u8* ptr, u32* flags, ArmGen::ARMReg* rD, ArmGen::ARMReg* V1);
// Initializes the information that backpatching needs
// This is required so we know the backpatch routine sizes and trouble offsets
void InitBackpatch();
// Returns the trouble instruction offset
// Zero if it isn't a fastmem routine
u32 EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ArmGen::ARMReg RS, ArmGen::ARMReg V1 = ArmGen::ARMReg::INVALID_REG);
public:
JitArm() : code_buffer(32000) {}
~JitArm() {}
void Init();
void Shutdown();
// Jit!
void Jit(u32 em_address);
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b);
JitBaseBlockCache *GetBlockCache() { return &blocks; }
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
void Trace();
void ClearCache();
CommonAsmRoutinesBase *GetAsmRoutines()
{
return &asm_routines;
}
const char *GetName()
{
return "JITARM";
}
// Run!
void Run();
void SingleStep();
// Utilities for use by opcodes
void WriteExit(u32 destination);
void WriteExitDestInR(ArmGen::ARMReg Reg);
void WriteRfiExitDestInR(ArmGen::ARMReg Reg);
void WriteExceptionExit();
void WriteCallInterpreter(UGeckoInstruction _inst);
void Cleanup();
void ComputeRC(ArmGen::ARMReg value, int cr = 0);
void ComputeRC(s32 value, int cr);
void ComputeCarry();
void ComputeCarry(bool Carry);
void GetCarryAndClear(ArmGen::ARMReg reg);
void FinalizeCarry(ArmGen::ARMReg reg);
void SafeStoreFromReg(s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset);
void SafeLoadToReg(ArmGen::ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update);
// OPCODES
void FallBackToInterpreter(UGeckoInstruction _inst);
void DoNothing(UGeckoInstruction _inst);
void HLEFunction(UGeckoInstruction _inst);
void DynaRunTable4(UGeckoInstruction _inst);
void DynaRunTable19(UGeckoInstruction _inst);
void DynaRunTable31(UGeckoInstruction _inst);
void DynaRunTable59(UGeckoInstruction _inst);
void DynaRunTable63(UGeckoInstruction _inst);
// Breakin shit
void Break(UGeckoInstruction _inst);
// Branch
void bx(UGeckoInstruction _inst);
void bcx(UGeckoInstruction _inst);
void bclrx(UGeckoInstruction _inst);
void sc(UGeckoInstruction _inst);
void rfi(UGeckoInstruction _inst);
void bcctrx(UGeckoInstruction _inst);
// Integer
void arith(UGeckoInstruction _inst);
void addex(UGeckoInstruction _inst);
void subfic(UGeckoInstruction _inst);
void cntlzwx(UGeckoInstruction _inst);
void cmp (UGeckoInstruction _inst);
void cmpl(UGeckoInstruction _inst);
void cmpi(UGeckoInstruction _inst);
void cmpli(UGeckoInstruction _inst);
void negx(UGeckoInstruction _inst);
void mulhwux(UGeckoInstruction _inst);
void rlwimix(UGeckoInstruction _inst);
void rlwinmx(UGeckoInstruction _inst);
void rlwnmx(UGeckoInstruction _inst);
void srawix(UGeckoInstruction _inst);
void extshx(UGeckoInstruction inst);
void extsbx(UGeckoInstruction inst);
// System Registers
void mtmsr(UGeckoInstruction _inst);
void mfmsr(UGeckoInstruction _inst);
void mtspr(UGeckoInstruction _inst);
void mfspr(UGeckoInstruction _inst);
void mftb(UGeckoInstruction _inst);
void mcrf(UGeckoInstruction _inst);
void mtsr(UGeckoInstruction _inst);
void mfsr(UGeckoInstruction _inst);
void twx(UGeckoInstruction _inst);
// LoadStore
void stX(UGeckoInstruction _inst);
void lXX(UGeckoInstruction _inst);
void lmw(UGeckoInstruction _inst);
void stmw(UGeckoInstruction _inst);
void icbi(UGeckoInstruction _inst);
void dcbst(UGeckoInstruction _inst);
// Floating point
void fabsx(UGeckoInstruction _inst);
void fnabsx(UGeckoInstruction _inst);
void fnegx(UGeckoInstruction _inst);
void faddsx(UGeckoInstruction _inst);
void faddx(UGeckoInstruction _inst);
void fsubsx(UGeckoInstruction _inst);
void fsubx(UGeckoInstruction _inst);
void fmulsx(UGeckoInstruction _inst);
void fmulx(UGeckoInstruction _inst);
void fmrx(UGeckoInstruction _inst);
void fmaddsx(UGeckoInstruction _inst);
void fmaddx(UGeckoInstruction _inst);
void fctiwx(UGeckoInstruction _inst);
void fctiwzx(UGeckoInstruction _inst);
void fnmaddx(UGeckoInstruction _inst);
void fnmaddsx(UGeckoInstruction _inst);
void fresx(UGeckoInstruction _inst);
void fselx(UGeckoInstruction _inst);
void frsqrtex(UGeckoInstruction _inst);
// Floating point loadStore
void lfXX(UGeckoInstruction _inst);
void stfXX(UGeckoInstruction _inst);
// Paired Singles
void ps_add(UGeckoInstruction _inst);
void ps_div(UGeckoInstruction _inst);
void ps_res(UGeckoInstruction _inst);
void ps_sum0(UGeckoInstruction _inst);
void ps_sum1(UGeckoInstruction _inst);
void ps_madd(UGeckoInstruction _inst);
void ps_nmadd(UGeckoInstruction _inst);
void ps_msub(UGeckoInstruction _inst);
void ps_nmsub(UGeckoInstruction _inst);
void ps_madds0(UGeckoInstruction _inst);
void ps_madds1(UGeckoInstruction _inst);
void ps_sub(UGeckoInstruction _inst);
void ps_mul(UGeckoInstruction _inst);
void ps_muls0(UGeckoInstruction _inst);
void ps_muls1(UGeckoInstruction _inst);
void ps_merge00(UGeckoInstruction _inst);
void ps_merge01(UGeckoInstruction _inst);
void ps_merge10(UGeckoInstruction _inst);
void ps_merge11(UGeckoInstruction _inst);
void ps_mr(UGeckoInstruction _inst);
void ps_neg(UGeckoInstruction _inst);
void ps_abs(UGeckoInstruction _inst);
void ps_nabs(UGeckoInstruction _inst);
void ps_rsqrte(UGeckoInstruction _inst);
void ps_sel(UGeckoInstruction _inst);
// LoadStore paired
void psq_l(UGeckoInstruction _inst);
void psq_lx(UGeckoInstruction _inst);
void psq_st(UGeckoInstruction _inst);
void psq_stx(UGeckoInstruction _inst);
};

View file

@ -1,36 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
// Enable define below to enable oprofile integration. For this to work,
// it requires at least oprofile version 0.9.4, and changing the build
// system to link the Dolphin executable against libopagent. Since the
// dependency is a little inconvenient and this is possibly a slight
// performance hit, it's not enabled by default, but it's useful for
// locating performance issues.
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArmCache.h"
using namespace ArmGen;
void JitArmBlockCache::WriteLinkBlock(u8* location, const u8* address)
{
ARMXEmitter emit(location);
emit.B(address);
emit.FlushIcache();
}
void JitArmBlockCache::WriteDestroyBlock(const u8* location, u32 address)
{
ARMXEmitter emit((u8 *)location);
emit.MOVI2R(R11, address);
emit.MOVI2R(R12, (u32)jit->GetAsmRoutines()->dispatcher);
emit.STR(R11, R9, PPCSTATE_OFF(pc));
emit.B(R12);
emit.FlushIcache();
}

View file

@ -1,17 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Core/PowerPC/JitCommon/JitCache.h"
typedef void (*CompiledCode)();
class JitArmBlockCache : public JitBaseBlockCache
{
private:
void WriteLinkBlock(u8* location, const u8* address);
void WriteDestroyBlock(const u8* location, u32 address);
};

View file

@ -1,707 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <string>
#include "Common/CommonTypes.h"
#include "Common/StringUtil.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/JitArm32/Jit.h"
using namespace ArmGen;
// This generates some fairly heavy trampolines, but:
// 1) It's really necessary. We don't know anything about the context.
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
// that many of them in a typical program/game.
bool JitArm::DisasmLoadStore(const u8* ptr, u32* flags, ARMReg* rD, ARMReg* V1)
{
u32 inst = *(u32*)ptr;
u32 prev_inst = *(u32*)(ptr - 4);
u32 next_inst = *(u32*)(ptr + 4);
u8 op = (inst >> 20) & 0xFF;
*rD = (ARMReg)((inst >> 12) & 0xF);
switch (op)
{
case 0b01011000: // STR(imm)
case 0b01111000: // STR(register)
{
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_32;
*rD = (ARMReg)(prev_inst & 0xF);
}
break;
case 0b01011001: // LDR(imm)
case 0b01111001: // LDR(register)
{
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32;
// REV
if ((next_inst & 0x0FFF0FF0) != 0x06BF0F30)
*flags |= BackPatchInfo::FLAG_REVERSE;
}
break;
case 0b00011101: // LDRH(imm)
case 0b00011001: // LDRH(register)
{
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16;
// REV16
if((next_inst & 0x0FFF0FF0) != 0x06BF0FB0)
*flags |= BackPatchInfo::FLAG_REVERSE;
}
break;
case 0b01011101: // LDRB(imm)
case 0b01111101: // LDRB(register)
{
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_8;
}
break;
case 0b01011100: // STRB(imm)
case 0b01111100: // STRB(register)
{
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_8;
*rD = (ARMReg)((inst >> 12) & 0xF);
}
break;
case 0b00011100: // STRH(imm)
case 0b00011000: // STRH(register)
{
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_16;
*rD = (ARMReg)(prev_inst & 0xF);
}
break;
default:
{
// Could be a floating point loadstore
u8 op2 = (inst >> 24) & 0xF;
switch (op2)
{
case 0xD: // VLDR/VSTR
{
bool load = (inst >> 20) & 1;
bool single = !((inst >> 8) & 1);
if (load)
*flags |= BackPatchInfo::FLAG_LOAD;
else
*flags |= BackPatchInfo::FLAG_STORE;
if (single)
*flags |= BackPatchInfo::FLAG_SIZE_F32;
else
*flags |= BackPatchInfo::FLAG_SIZE_F64;
if (single)
{
if (!load)
{
u32 vcvt = *(u32*)(ptr - 8);
u32 src_register = vcvt & 0xF;
src_register |= (vcvt >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
}
}
break;
case 0x4: // VST1/VLD1
{
u32 size = (inst >> 6) & 0x3;
bool load = (inst >> 21) & 1;
if (load)
*flags |= BackPatchInfo::FLAG_LOAD;
else
*flags |= BackPatchInfo::FLAG_STORE;
if (size == 2) // 32bit
{
if (load)
{
// For 32bit loads we are loading to a temporary
// So we need to read PC+8,PC+12 to get the two destination registers
u32 vcvt_1 = *(u32*)(ptr + 8);
u32 vcvt_2 = *(u32*)(ptr + 12);
u32 dest_register_1 = (vcvt_1 >> 12) & 0xF;
dest_register_1 |= (vcvt_1 >> 18) & 0x10;
u32 dest_register_2 = (vcvt_2 >> 12) & 0xF;
dest_register_2 |= (vcvt_2 >> 18) & 0x10;
// Make sure to encode the destination register to something our emitter understands
*rD = (ARMReg)(dest_register_1 + D0);
*V1 = (ARMReg)(dest_register_2 + D0);
}
else
{
// For 32bit stores we are storing from a temporary
// So we need to check the VCVT at PC-8 for the source register
u32 vcvt = *(u32*)(ptr - 8);
u32 src_register = vcvt & 0xF;
src_register |= (vcvt >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
*flags |= BackPatchInfo::FLAG_SIZE_F32;
}
else if (size == 3) // 64bit
{
if (load)
{
// For 64bit loads we load directly in to the VFP register
u32 dest_register = (inst >> 12) & 0xF;
dest_register |= (inst >> 18) & 0x10;
// Make sure to encode the destination register to something our emitter understands
*rD = (ARMReg)(dest_register + D0);
}
else
{
// For 64bit stores we are storing from a temporary
// Check the previous VREV64 instruction for the real register
u32 src_register = prev_inst & 0xF;
src_register |= (prev_inst >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
*flags |= BackPatchInfo::FLAG_SIZE_F64;
}
}
break;
default:
printf("Op is 0x%02x\n", op);
return false;
break;
}
}
}
return true;
}
bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx)
{
if (access_address < (uintptr_t)Memory::physical_base)
PanicAlertT("Exception handler - access below memory space. 0x%08x", access_address);
return BackPatch(ctx);
}
bool JitArm::BackPatch(SContext* ctx)
{
// TODO: This ctx needs to be filled with our information
// We need to get the destination register before we start
u8* codePtr = (u8*)ctx->CTX_PC;
u32 Value = *(u32*)codePtr;
ARMReg rD = INVALID_REG;
ARMReg V1 = INVALID_REG;
u32 flags = 0;
if (!DisasmLoadStore(codePtr, &flags, &rD, &V1))
{
WARN_LOG(DYNA_REC, "Invalid backpatch at location 0x%08lx(0x%08x)", ctx->CTX_PC, Value);
exit(0);
}
BackPatchInfo& info = m_backpatch_info[flags];
ARMXEmitter emitter(codePtr - info.m_fastmem_trouble_inst_offset * 4);
u32 new_pc = (u32)emitter.GetCodePtr();
EmitBackpatchRoutine(&emitter, flags, false, true, rD, V1);
emitter.FlushIcache();
ctx->CTX_PC = new_pc;
return true;
}
u32 JitArm::EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ARMReg RS, ARMReg V1)
{
ARMReg addr = R12;
ARMReg temp = R11;
u32 trouble_offset = 0;
const u8* code_base = emit->GetCodePtr();
if (fastmem)
{
ARMReg temp2 = R10;
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
emit->BIC(temp, addr, mask);
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->ADD(temp, temp, R8);
NEONXEmitter nemit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->VCVT(S0, RS, 0);
nemit.VREV32(I_8, D0, D0);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
emit->VSTR(S0, temp, 0);
}
else
{
nemit.VREV64(I_8, D0, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
nemit.VST1(I_64, D0, temp);
}
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->ADD(temp, temp, R8);
NEONXEmitter nemit(emit);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
nemit.VLD1(F_32, D0, temp);
nemit.VREV32(I_8, D0, D0); // Byte swap to result
emit->VCVT(RS, S0, 0);
emit->VCVT(V1, S0, 0);
}
else
{
nemit.VLD1(I_64, RS, temp);
nemit.VREV64(I_8, RS, RS); // Byte swap to result
}
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(temp2, RS);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(temp2, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->STR(temp2, R8, temp);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->STRH(temp2, R8, temp);
else
emit->STRB(RS, R8, temp);
}
else
{
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->LDR(RS, R8, temp); // 5
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->LDRH(RS, R8, temp);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->LDRB(RS, R8, temp);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(RS, RS); // 6
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, RS);
}
if (flags & BackPatchInfo::FLAG_EXTEND)
emit->SXTH(RS, RS);
}
}
else
{
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->PUSH(4, R0, R1, R2, R3);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->MOV(R1, addr);
emit->VCVT(S0, RS, 0);
emit->VMOV(R0, S0);
emit->MOVI2R(temp, (u32)&PowerPC::Write_U32);
emit->BL(temp);
}
else
{
emit->MOVI2R(temp, (u32)&PowerPC::Write_F64);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
emit->VMOV(R0, RS);
emit->MOV(R2, addr);
#else
emit->VMOV(D0, RS);
emit->MOV(R0, addr);
#endif
emit->BL(temp);
}
emit->POP(4, R0, R1, R2, R3);
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, addr);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->MOVI2R(temp, (u32)&PowerPC::Read_U32);
emit->BL(temp);
emit->VMOV(S0, R0);
emit->VCVT(RS, S0, 0);
emit->VCVT(V1, S0, 0);
}
else
{
emit->MOVI2R(temp, (u32)&PowerPC::Read_F64);
emit->BL(temp);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
emit->VMOV(RS, R0);
#else
emit->VMOV(RS, D0);
#endif
}
emit->POP(4, R0, R1, R2, R3);
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, RS);
emit->MOV(R1, addr);
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(temp, (u32)&PowerPC::Write_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(temp, (u32)&PowerPC::Write_U16);
else
emit->MOVI2R(temp, (u32)&PowerPC::Write_U8);
emit->BL(temp);
emit->POP(4, R0, R1, R2, R3);
}
else
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, addr);
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(temp, (u32)&PowerPC::Read_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(temp, (u32)&PowerPC::Read_U16);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->MOVI2R(temp, (u32)&PowerPC::Read_U8);
emit->BL(temp);
emit->MOV(temp, R0);
emit->POP(4, R0, R1, R2, R3);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{
emit->MOV(RS, temp);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(RS, temp); // 6
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, temp);
}
}
}
if (do_padding)
{
BackPatchInfo& info = m_backpatch_info[flags];
u32 num_insts_max = std::max(info.m_fastmem_size, info.m_slowmem_size);
u32 code_size = emit->GetCodePtr() - code_base;
code_size /= 4;
emit->NOP(num_insts_max - code_size);
}
return trouble_offset;
}
void JitArm::InitBackpatch()
{
u32 flags = 0;
BackPatchInfo info;
u8* code_base = GetWritableCodePtr();
u8* code_end;
// Writes
{
// 8bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
// Loads
{
// 8bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit - reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit - sign extend
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16 |
BackPatchInfo::FLAG_EXTEND;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit - reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, D0, D1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0, D1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
}

View file

@ -1,309 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
// The branches are known good, or at least reasonably good.
// No need for a disable-mechanism.
using namespace ArmGen;
void JitArm::sc(UGeckoInstruction inst)
{
INSTRUCTION_START
gpr.Flush();
fpr.Flush();
ARMReg rA = gpr.GetReg();
MOVI2R(rA, js.compilerPC + 4);
STR(rA, R9, PPCSTATE_OFF(pc));
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
ORR(rA, rA, EXCEPTION_SYSCALL);
STR(rA, R9, PPCSTATE_OFF(Exceptions));
gpr.Unlock(rA);
WriteExceptionExit();
}
void JitArm::rfi(UGeckoInstruction inst)
{
INSTRUCTION_START
gpr.Flush();
fpr.Flush();
// See Interpreter rfi for details
const u32 mask = 0x87C0FFFF;
const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
// MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13;
// R0 = MSR location
// R1 = MSR contents
// R2 = Mask
// R3 = Mask
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg rC = gpr.GetReg();
ARMReg rD = gpr.GetReg();
MOVI2R(rB, (~mask) & clearMSR13);
MOVI2R(rC, mask & clearMSR13);
LDR(rD, R9, PPCSTATE_OFF(msr));
AND(rD, rD, rB); // rD = Masked MSR
LDR(rB, R9, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here
AND(rB, rB, rC); // rB contains masked SRR1 here
ORR(rB, rD, rB); // rB = Masked MSR OR masked SRR1
STR(rB, R9, PPCSTATE_OFF(msr)); // STR rB in to rA
LDR(rA, R9, PPCSTATE_OFF(spr[SPR_SRR0]));
gpr.Unlock(rB, rC, rD);
WriteRfiExitDestInR(rA); // rA gets unlocked here
//AND(32, M(&MSR), Imm32((~mask) & clearMSR13));
//MOV(32, R(EAX), M(&SRR1));
//AND(32, R(EAX), Imm32(mask & clearMSR13));
//OR(32, M(&MSR), R(EAX));
// NPC = SRR0;
//MOV(32, R(EAX), M(&SRR0));
//WriteRfiExitDestInEAX();
}
void JitArm::bx(UGeckoInstruction inst)
{
INSTRUCTION_START
// We must always process the following sentence
// even if the blocks are merged by PPCAnalyst::Flatten().
if (inst.LK)
{
ARMReg rA = gpr.GetReg(false);
u32 Jumpto = js.compilerPC + 4;
MOVI2R(rA, Jumpto);
STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR]));
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
// If this is not the last instruction of a block,
// we will skip the rest process.
// Because PPCAnalyst::Flatten() merged the blocks.
if (!js.isLastInstruction)
{
return;
}
gpr.Flush();
fpr.Flush();
u32 destination;
if (inst.AA)
destination = SignExt26(inst.LI << 2);
else
destination = js.compilerPC + SignExt26(inst.LI << 2);
if (destination == js.compilerPC)
{
//PanicAlert("Idle loop detected at %08x", destination);
// CALL(ProtectFunction(&CoreTiming::Idle, 0));
// JMP(Asm::testExceptions, true);
// make idle loops go faster
MOVI2R(R14, (u32)&CoreTiming::Idle);
BL(R14);
MOVI2R(R14, js.compilerPC);
STR(R14, R9, PPCSTATE_OFF(pc));
WriteExceptionExit();
}
WriteExit(destination);
}
void JitArm::bcx(UGeckoInstruction inst)
{
INSTRUCTION_START
// USES_CR
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{
LDR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR]));
SUBS(rB, rB, 1);
STR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR]));
//SUB(32, M(&CTR), Imm8(1));
if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = B_CC(CC_NEQ);
else
pCTRDontBranch = B_CC(CC_EQ);
}
FixupBranch pConditionDontBranch;
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit
{
pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
}
if (inst.LK)
{
u32 Jumpto = js.compilerPC + 4;
MOVI2R(rB, Jumpto);
STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR]));
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); // Careful, destroys R14, R12
}
gpr.Unlock(rA, rB);
u32 destination;
if (inst.AA)
destination = SignExt16(inst.BD << 2);
else
destination = js.compilerPC + SignExt16(inst.BD << 2);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExit(destination);
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch );
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
SetJumpTarget( pCTRDontBranch );
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}
void JitArm::bcctrx(UGeckoInstruction inst)
{
INSTRUCTION_START
// bcctrx doesn't decrement and/or test CTR
_dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!");
if (inst.BO_2 & BO_DONT_CHECK_CONDITION)
{
// BO_2 == 1z1zz -> b always
//NPC = CTR & 0xfffffffc;
gpr.Flush();
fpr.Flush();
ARMReg rA = gpr.GetReg();
if (inst.LK_3)
{
u32 Jumpto = js.compilerPC + 4;
MOVI2R(rA, Jumpto);
STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR]));
// ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
LDR(rA, R9, PPCSTATE_OFF(spr[SPR_CTR]));
BIC(rA, rA, 0x3);
WriteExitDestInR(rA);
}
else
{
// Rare condition seen in (just some versions of?) Nintendo's NES Emulator
// BO_2 == 001zy -> b if false
// BO_2 == 011zy -> b if true
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
LDR(rA, R9, PPCSTATE_OFF(spr[SPR_CTR]));
BIC(rA, rA, 0x3);
if (inst.LK_3)
{
u32 Jumpto = js.compilerPC + 4;
MOVI2R(rB, Jumpto);
STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR]));
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInR(rA);
SetJumpTarget(b);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}
}
void JitArm::bclrx(UGeckoInstruction inst)
{
INSTRUCTION_START
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{
LDR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR]));
SUBS(rB, rB, 1);
STR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR]));
//SUB(32, M(&CTR), Imm8(1));
if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = B_CC(CC_NEQ);
else
pCTRDontBranch = B_CC(CC_EQ);
}
FixupBranch pConditionDontBranch;
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit
{
pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
}
//MOV(32, R(EAX), M(&LR));
//AND(32, R(EAX), Imm32(0xFFFFFFFC));
LDR(rA, R9, PPCSTATE_OFF(spr[SPR_LR]));
BIC(rA, rA, 0x3);
if (inst.LK)
{
u32 Jumpto = js.compilerPC + 4;
MOVI2R(rB, Jumpto);
STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR]));
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInR(rA);
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch );
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
SetJumpTarget( pCTRDontBranch );
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}

View file

@ -1,69 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitFPRCache.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
static const double minmaxFloat[2] = {-(double)0x80000000, (double)0x7FFFFFFF};
static const double doublenum = 0xfff8000000000000ull;
// Exception masks
static ArmGen::Operand2 FRFIMask(5, 0x8); // 0x60000
static ArmGen::Operand2 FIMask(2, 8); // 0x20000
static ArmGen::Operand2 FRMask(4, 8); // 0x40000
static ArmGen::Operand2 FXMask(2, 1); // 0x80000000
static ArmGen::Operand2 VEMask(0x40, 0); // 0x40
static ArmGen::Operand2 XXException(2, 4); // 0x2000000
static ArmGen::Operand2 CVIException(1, 0xC); // 0x100
static ArmGen::Operand2 NANException(1, 4); // 0x1000000
static ArmGen::Operand2 VXVCException(8, 8); // 0x80000
static ArmGen::Operand2 ZXException(1, 3); // 0x4000000
static ArmGen::Operand2 VXSQRTException(2, 5); // 0x200
inline void JitArm::SetFPException(ArmGen::ARMReg Reg, u32 Exception)
{
ArmGen::Operand2 *ExceptionMask;
switch (Exception)
{
case FPSCR_VXCVI:
ExceptionMask = &CVIException;
break;
case FPSCR_XX:
ExceptionMask = &XXException;
break;
case FPSCR_VXSNAN:
ExceptionMask = &NANException;
break;
case FPSCR_VXVC:
ExceptionMask = &VXVCException;
break;
case FPSCR_ZX:
ExceptionMask = &ZXException;
break;
case FPSCR_VXSQRT:
ExceptionMask = &VXSQRTException;
break;
default:
_assert_msg_(DYNA_REC, false, "Passed unsupported FPexception: 0x%08x", Exception);
return;
break;
}
ArmGen::ARMReg rB = gpr.GetReg();
MOV(rB, Reg);
ORR(Reg, Reg, *ExceptionMask);
CMP(rB, Reg);
SetCC(CC_NEQ);
ORR(Reg, Reg, FXMask); // If exception is set, set exception bit
SetCC();
BIC(Reg, Reg, FRFIMask);
gpr.Unlock(rB);
}

View file

@ -1,536 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArm_FPUtils.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitFPRCache.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
void JitArm::Helper_UpdateCR1(ARMReg fpscr, ARMReg temp)
{
}
void JitArm::fctiwx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(true);
u32 b = inst.FB;
u32 d = inst.FD;
ARMReg vB = fpr.R0(b);
ARMReg vD = fpr.R0(d);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
ARMReg V2 = fpr.GetReg();
ARMReg rA = gpr.GetReg();
ARMReg fpscrReg = gpr.GetReg();
FixupBranch DoneMax, DoneMin;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
MOVI2R(rA, (u32)minmaxFloat);
// Check if greater than max float
{
VLDR(V0, rA, 8); // Load Max
VCMPE(vB, V0);
VMRS(_PC); // Loads in to APSR
FixupBranch noException = B_CC(CC_LE);
VMOV(vD, V0); // Set to max
SetFPException(fpscrReg, FPSCR_VXCVI);
DoneMax = B();
SetJumpTarget(noException);
}
// Check if less than min float
{
VLDR(V0, rA, 0);
VCMPE(vB, V0);
VMRS(_PC);
FixupBranch noException = B_CC(CC_GE);
VMOV(vD, V0);
SetFPException(fpscrReg, FPSCR_VXCVI);
DoneMin = B();
SetJumpTarget(noException);
}
// Within ranges, convert to integer
// Set rounding mode first
// PPC <-> ARM rounding modes
// 0, 1, 2, 3 <-> 0, 3, 1, 2
ARMReg rB = gpr.GetReg();
VMRS(rA);
// Bits 22-23
BIC(rA, rA, Operand2(3, 5));
LDR(rB, R9, PPCSTATE_OFF(fpscr));
AND(rB, rB, 0x3); // Get the FPSCR rounding bits
CMP(rB, 1);
SetCC(CC_EQ); // zero
ORR(rA, rA, Operand2(3, 5));
SetCC(CC_NEQ);
CMP(rB, 2); // +inf
SetCC(CC_EQ);
ORR(rA, rA, Operand2(1, 5));
SetCC(CC_NEQ);
CMP(rB, 3); // -inf
SetCC(CC_EQ);
ORR(rA, rA, Operand2(2, 5));
SetCC();
VMSR(rA);
ORR(rA, rA, Operand2(3, 5));
VCVT(vD, vB, TO_INT | IS_SIGNED);
VMSR(rA);
gpr.Unlock(rB);
VCMPE(vD, vB);
VMRS(_PC);
SetCC(CC_EQ);
BIC(fpscrReg, fpscrReg, FRFIMask);
FixupBranch DoneEqual = B();
SetCC();
SetFPException(fpscrReg, FPSCR_XX);
ORR(fpscrReg, fpscrReg, FIMask);
VABS(V1, vB);
VABS(V2, vD);
VCMPE(V2, V1);
VMRS(_PC);
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, FRMask);
SetCC();
SetJumpTarget(DoneEqual);
SetJumpTarget(DoneMax);
SetJumpTarget(DoneMin);
MOVI2R(rA, (u32)&doublenum);
VLDR(V0, rA, 0);
NEONXEmitter nemit(this);
nemit.VORR(vD, vD, V0);
if (inst.Rc)
Helper_UpdateCR1(fpscrReg, rA);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(rA);
gpr.Unlock(fpscrReg);
fpr.Unlock(V0);
fpr.Unlock(V1);
fpr.Unlock(V2);
}
void JitArm::fctiwzx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(true);
u32 b = inst.FB;
u32 d = inst.FD;
ARMReg vB = fpr.R0(b);
ARMReg vD = fpr.R0(d);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
ARMReg V2 = fpr.GetReg();
ARMReg rA = gpr.GetReg();
ARMReg fpscrReg = gpr.GetReg();
FixupBranch DoneMax, DoneMin;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
MOVI2R(rA, (u32)minmaxFloat);
// Check if greater than max float
{
VLDR(V0, rA, 8); // Load Max
VCMPE(vB, V0);
VMRS(_PC); // Loads in to APSR
FixupBranch noException = B_CC(CC_LE);
VMOV(vD, V0); // Set to max
SetFPException(fpscrReg, FPSCR_VXCVI);
DoneMax = B();
SetJumpTarget(noException);
}
// Check if less than min float
{
VLDR(V0, rA, 0);
VCMPE(vB, V0);
VMRS(_PC);
FixupBranch noException = B_CC(CC_GE);
VMOV(vD, V0);
SetFPException(fpscrReg, FPSCR_VXCVI);
DoneMin = B();
SetJumpTarget(noException);
}
// Within ranges, convert to integer
VCVT(vD, vB, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
VCMPE(vD, vB);
VMRS(_PC);
SetCC(CC_EQ);
BIC(fpscrReg, fpscrReg, FRFIMask);
FixupBranch DoneEqual = B();
SetCC();
SetFPException(fpscrReg, FPSCR_XX);
ORR(fpscrReg, fpscrReg, FIMask);
VABS(V1, vB);
VABS(V2, vD);
VCMPE(V2, V1);
VMRS(_PC);
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, FRMask);
SetCC();
SetJumpTarget(DoneEqual);
SetJumpTarget(DoneMax);
SetJumpTarget(DoneMin);
MOVI2R(rA, (u32)&doublenum);
VLDR(V0, rA, 0);
NEONXEmitter nemit(this);
nemit.VORR(vD, vD, V0);
if (inst.Rc)
Helper_UpdateCR1(fpscrReg, rA);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(rA);
gpr.Unlock(fpscrReg);
fpr.Unlock(V0);
fpr.Unlock(V1);
fpr.Unlock(V2);
}
void JitArm::fabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VABS(vD, vB);
}
void JitArm::fnabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VABS(vD, vB);
VNEG(vD, vD);
}
void JitArm::fnegx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VNEG(vD, vB);
}
void JitArm::faddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD, false);
VADD(vD0, vA, vB);
VMOV(vD1, vD0);
}
void JitArm::faddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VADD(vD, vA, vB);
}
void JitArm::fsubsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD, false);
VSUB(vD0, vA, vB);
VMOV(vD1, vD0);
}
void JitArm::fsubx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VSUB(vD, vA, vB);
}
void JitArm::fmulsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vC = fpr.R0(inst.FC);
ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD, false);
VMUL(vD0, vA, vC);
VMOV(vD1, vD0);
}
void JitArm::fmulx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vC = fpr.R0(inst.FC);
ARMReg vD0 = fpr.R0(inst.FD, false);
VMUL(vD0, vA, vC);
}
void JitArm::fmrx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VMOV(vD, vB);
}
void JitArm::fmaddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
VMOV(V0, vB0);
VMLA(V0, vA0, vC0);
VMOV(vD0, V0);
VMOV(vD1, V0);
fpr.Unlock(V0);
}
void JitArm::fmaddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg V0 = fpr.GetReg();
VMOV(V0, vB0);
VMLA(V0, vA0, vC0);
VMOV(vD0, V0);
fpr.Unlock(V0);
}
void JitArm::fnmaddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg V0 = fpr.GetReg();
VMOV(V0, vB0);
VMLA(V0, vA0, vC0);
VNEG(vD0, V0);
fpr.Unlock(V0);
}
void JitArm::fnmaddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
VMOV(V0, vB0);
VMLA(V0, vA0, vC0);
VNEG(vD0, V0);
VNEG(vD1, V0);
fpr.Unlock(V0);
}
// XXX: Messes up Super Mario Sunshine title screen
void JitArm::fresx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
// FIXME
FALLBACK_IF(true);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
MOVI2R(V0, 1.0, INVALID_REG); // temp reg isn't needed for 1.0
VDIV(vD1, V0, vB0);
VDIV(vD0, V0, vB0);
fpr.Unlock(V0);
}
void JitArm::fselx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
VCMP(vA0);
VMRS(_PC);
FixupBranch GT0 = B_CC(CC_GE);
VMOV(vD0, vB0);
FixupBranch EQ0 = B();
SetJumpTarget(GT0);
VMOV(vD0, vC0);
SetJumpTarget(EQ0);
}
void JitArm::frsqrtex(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(true);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg fpscrReg = gpr.GetReg();
ARMReg V0 = D1;
ARMReg rA = gpr.GetReg();
MOVI2R(fpscrReg, (u32)&PPC_NAN);
VLDR(V0, fpscrReg, 0);
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
VCMP(vB0);
VMRS(_PC);
FixupBranch Less0 = B_CC(CC_LT);
VMOV(vD0, V0);
SetFPException(fpscrReg, FPSCR_VXSQRT);
FixupBranch SkipOrr0 = B();
SetJumpTarget(Less0);
FixupBranch noException = B_CC(CC_EQ);
SetFPException(fpscrReg, FPSCR_ZX);
SetJumpTarget(noException);
SetJumpTarget(SkipOrr0);
VCVT(S0, vB0, 0);
NEONXEmitter nemit(this);
nemit.VRSQRTE(F_32, D0, D0);
VCVT(vD0, S0, 0);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, rA);
}

File diff suppressed because it is too large Load diff

View file

@ -1,547 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
void JitArm::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, int accessSize, s32 offset)
{
// We want to make sure to not get LR as a temp register
ARMReg rA = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (regOffset == -1)
{
if (dest != -1)
{
if (gpr.IsImm(dest))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + offset;
}
else
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(rA, gpr.R(dest), off);
}
else
{
MOVI2R(rA, offset);
ADD(rA, rA, gpr.R(dest));
}
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (dest != -1)
{
if (gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + gpr.GetImm(regOffset);
}
else if (gpr.IsImm(dest) && !gpr.IsImm(regOffset))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(dest), off))
{
ADD(rA, gpr.R(regOffset), off);
}
else
{
MOVI2R(rA, gpr.GetImm(dest));
ADD(rA, rA, gpr.R(regOffset));
}
}
else if (!gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(regOffset), off))
{
ADD(rA, gpr.R(dest), off);
}
else
{
MOVI2R(rA, gpr.GetImm(regOffset));
ADD(rA, rA, gpr.R(dest));
}
}
else
{
ADD(rA, gpr.R(dest), gpr.R(regOffset));
}
}
else
{
if (gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(regOffset);
}
else
{
MOV(rA, gpr.R(regOffset));
}
}
}
ARMReg RS = gpr.R(value);
u32 flags = BackPatchInfo::FLAG_STORE;
if (accessSize == 32)
flags |= BackPatchInfo::FLAG_SIZE_32;
else if (accessSize == 16)
flags |= BackPatchInfo::FLAG_SIZE_16;
else
flags |= BackPatchInfo::FLAG_SIZE_8;
if (is_immediate)
{
if (jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr))
{
MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount);
MOVI2R(R10, (u32)GPFifo::m_gatherPipe);
LDR(R11, R14);
if (accessSize == 32)
{
REV(RS, RS);
STR(RS, R10, R11);
REV(RS, RS);
}
else if (accessSize == 16)
{
REV16(RS, RS);
STRH(RS, R10, R11);
REV16(RS, RS);
}
else
{
STRB(RS, R10, R11);
}
ADD(R11, R11, accessSize >> 3);
STR(R11, R14);
jit->js.fifoBytesThisBlock += accessSize >> 3;
}
else if (PowerPC::IsOptimizableRAMAddress(imm_addr))
{
MOVI2R(rA, imm_addr);
EmitBackpatchRoutine(this, flags, jo.fastmem, true, RS);
}
else
{
MOVI2R(rA, imm_addr);
EmitBackpatchRoutine(this, flags, false, false, RS);
}
}
else
{
EmitBackpatchRoutine(this, flags, jo.fastmem, true, RS);
}
}
void JitArm::stX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
u32 a = inst.RA, b = inst.RB, s = inst.RS;
s32 offset = inst.SIMM_16;
u32 accessSize = 0;
s32 regOffset = -1;
bool update = false;
switch (inst.OPCD)
{
case 45: // sthu
update = true;
case 44: // sth
accessSize = 16;
break;
case 31:
switch (inst.SUBOP10)
{
case 183: // stwux
update = true;
case 151: // stwx
accessSize = 32;
regOffset = b;
break;
case 247: // stbux
update = true;
case 215: // stbx
accessSize = 8;
regOffset = b;
break;
case 439: // sthux
update = true;
case 407: // sthx
accessSize = 16;
regOffset = b;
break;
}
break;
case 37: // stwu
update = true;
case 36: // stw
accessSize = 32;
break;
case 39: // stbu
update = true;
case 38: // stb
accessSize = 8;
break;
}
SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, accessSize, offset);
if (update)
{
ARMReg rA = gpr.GetReg();
ARMReg RB;
ARMReg RA = gpr.R(a);
if (regOffset != -1)
RB = gpr.R(regOffset);
// Check for DSI exception prior to writing back address
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
TST(rA, EXCEPTION_DSI);
FixupBranch has_exception = B_CC(CC_NEQ);
if (regOffset == -1)
{
MOVI2R(rA, offset);
ADD(RA, RA, rA);
}
else
{
ADD(RA, RA, RB);
}
SetJumpTarget(has_exception);
gpr.Unlock(rA);
}
}
void JitArm::SafeLoadToReg(ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update)
{
// We want to make sure to not get LR as a temp register
ARMReg rA = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (offsetReg == -1)
{
if (addr != -1)
{
if (gpr.IsImm(addr))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + offset;
}
else
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(rA, gpr.R(addr), off);
}
else
{
MOVI2R(rA, offset);
ADD(rA, rA, gpr.R(addr));
}
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (addr != -1)
{
if (gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + gpr.GetImm(offsetReg);
}
else if (gpr.IsImm(addr) && !gpr.IsImm(offsetReg))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(addr), off))
{
ADD(rA, gpr.R(offsetReg), off);
}
else
{
MOVI2R(rA, gpr.GetImm(addr));
ADD(rA, rA, gpr.R(offsetReg));
}
}
else if (!gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(offsetReg), off))
{
ADD(rA, gpr.R(addr), off);
}
else
{
MOVI2R(rA, gpr.GetImm(offsetReg));
ADD(rA, rA, gpr.R(addr));
}
}
else
{
ADD(rA, gpr.R(addr), gpr.R(offsetReg));
}
}
else
{
if (gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else
{
MOV(rA, gpr.R(offsetReg));
}
}
}
if (is_immediate)
MOVI2R(rA, imm_addr);
u32 flags = BackPatchInfo::FLAG_LOAD;
if (accessSize == 32)
flags |= BackPatchInfo::FLAG_SIZE_32;
else if (accessSize == 16)
flags |= BackPatchInfo::FLAG_SIZE_16;
else
flags |= BackPatchInfo::FLAG_SIZE_8;
if (reverse)
flags |= BackPatchInfo::FLAG_REVERSE;
if (signExtend)
flags |= BackPatchInfo::FLAG_EXTEND;
EmitBackpatchRoutine(this, flags,
jo.fastmem,
true, dest);
if (update)
MOV(gpr.R(addr), rA);
}
void JitArm::lXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
u32 a = inst.RA, b = inst.RB, d = inst.RD;
s32 offset = inst.SIMM_16;
u32 accessSize = 0;
s32 offsetReg = -1;
bool update = false;
bool signExtend = false;
bool reverse = false;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 55: // lwzux
update = true;
case 23: // lwzx
accessSize = 32;
offsetReg = b;
break;
case 119: //lbzux
update = true;
case 87: // lbzx
accessSize = 8;
offsetReg = b;
break;
case 311: // lhzux
update = true;
case 279: // lhzx
accessSize = 16;
offsetReg = b;
break;
case 375: // lhaux
update = true;
case 343: // lhax
accessSize = 16;
signExtend = true;
offsetReg = b;
break;
case 534: // lwbrx
accessSize = 32;
reverse = true;
break;
case 790: // lhbrx
accessSize = 16;
reverse = true;
break;
}
break;
case 33: // lwzu
update = true;
case 32: // lwz
accessSize = 32;
break;
case 35: // lbzu
update = true;
case 34: // lbz
accessSize = 8;
break;
case 41: // lhzu
update = true;
case 40: // lhz
accessSize = 16;
break;
case 43: // lhau
update = true;
case 42: // lha
signExtend = true;
accessSize = 16;
break;
}
// Check for exception before loading
ARMReg rA = gpr.GetReg(false);
ARMReg RD = gpr.R(d);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
TST(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_NEQ);
SafeLoadToReg(RD, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse, update);
SetJumpTarget(DoNotLoad);
// LWZ idle skipping
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle &&
inst.OPCD == 32 &&
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
(PowerPC::HostRead_U32(js.compilerPC + 4) == 0x28000000 ||
(SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && PowerPC::HostRead_U32(js.compilerPC + 4) == 0x2C000000)) &&
PowerPC::HostRead_U32(js.compilerPC + 8) == 0x4182fff8)
{
// if it's still 0, we can wait until the next event
TST(RD, RD);
FixupBranch noIdle = B_CC(CC_NEQ);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
rA = gpr.GetReg();
MOVI2R(rA, (u32)&PowerPC::OnIdle);
BL(rA);
gpr.Unlock(rA);
WriteExceptionExit();
SetJumpTarget(noIdle);
//js.compilerPC += 8;
return;
}
}
// Some games use this heavily in video codecs
// We make the assumption that this pulls from main RAM at /all/ times
void JitArm::lmw(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
FALLBACK_IF(!jo.fastmem);
u32 a = inst.RA;
ARMReg rA = gpr.GetReg();
MOVI2R(rA, inst.SIMM_16);
if (a)
ADD(rA, rA, gpr.R(a));
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(rA, rA, mask);
ADD(rA, rA, R8);
for (int i = inst.RD; i < 32; i++)
{
ARMReg RX = gpr.R(i);
LDR(RX, rA, (i - inst.RD) * 4);
REV(RX, RX);
}
gpr.Unlock(rA);
}
void JitArm::stmw(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
FALLBACK_IF(!jo.fastmem);
u32 a = inst.RA;
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
MOVI2R(rA, inst.SIMM_16);
if (a)
ADD(rA, rA, gpr.R(a));
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(rA, rA, mask);
ADD(rA, rA, R8);
for (int i = inst.RD; i < 32; i++)
{
ARMReg RX = gpr.R(i);
REV(rB, RX);
STR(rB, rA, (i - inst.RD) * 4);
}
gpr.Unlock(rA, rB);
}
void JitArm::dcbst(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
// If the dcbst instruction is preceded by dcbt, it is flushing a prefetched
// memory location. Do not invalidate the JIT cache in this case as the memory
// will be the same.
// dcbt = 0x7c00022c
FALLBACK_IF((PowerPC::HostRead_U32(js.compilerPC - 4) & 0x7c00022c) != 0x7c00022c);
}
void JitArm::icbi(UGeckoInstruction inst)
{
FallBackToInterpreter(inst);
WriteExit(js.compilerPC + 4);
}

View file

@ -1,403 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitFPRCache.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
void JitArm::lfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
ARMReg RA;
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_LOAD;
bool update = false;
s32 offsetReg = -1;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 567: // lfsux
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
offsetReg = b;
break;
case 535: // lfsx
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 631: // lfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offsetReg = b;
break;
case 599: // lfdx
flags |= BackPatchInfo::FLAG_SIZE_F64;
offsetReg = b;
break;
}
break;
case 49: // lfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
break;
case 48: // lfs
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 51: // lfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 50: // lfd
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
ARMReg v0 = fpr.R0(inst.FD, false), v1 = INVALID_REG;
if (flags & BackPatchInfo::FLAG_SIZE_F32)
v1 = fpr.R1(inst.FD, false);
ARMReg rA = R11;
ARMReg addr = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (update)
{
// Always uses RA
if (gpr.IsImm(a) && offsetReg == -1)
{
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else
{
if (offsetReg == -1)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
ADD(addr, gpr.R(offsetReg), gpr.R(a));
}
}
}
else
{
if (offsetReg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else if (!a && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else if (a)
{
ADD(addr, gpr.R(a), gpr.R(offsetReg));
}
else
{
MOV(addr, gpr.R(offsetReg));
}
}
}
if (update)
RA = gpr.R(a);
if (is_immediate)
MOVI2R(addr, imm_addr);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
if (update)
MOV(RA, addr);
EmitBackpatchRoutine(this, flags,
jo.fastmem,
!(is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)), v0, v1);
SetJumpTarget(DoNotLoad);
}
void JitArm::stfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
ARMReg RA;
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_STORE;
bool update = false;
s32 offsetReg = -1;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 663: // stfsx
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 695: // stfsux
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 727: // stfdx
flags |= BackPatchInfo::FLAG_SIZE_F64;
offsetReg = b;
break;
case 759: // stfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offsetReg = b;
break;
}
break;
case 53: // stfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
break;
case 52: // stfs
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 55: // stfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 54: // stfd
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
ARMReg v0 = fpr.R0(inst.FS);
ARMReg rA = R11;
ARMReg addr = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (update)
{
// Always uses RA
if (gpr.IsImm(a) && offsetReg == -1)
{
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else
{
if (offsetReg == -1)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
ADD(addr, gpr.R(offsetReg), gpr.R(a));
}
}
}
else
{
if (offsetReg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else if (!a && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else if (a)
{
ADD(addr, gpr.R(a), gpr.R(offsetReg));
}
else
{
MOV(addr, gpr.R(offsetReg));
}
}
}
if (is_immediate)
MOVI2R(addr, imm_addr);
if (update)
{
RA = gpr.R(a);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
SetCC(CC_NEQ);
MOV(RA, addr);
SetCC();
}
if (is_immediate)
{
if (jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr))
{
int accessSize;
if (flags & BackPatchInfo::FLAG_SIZE_F64)
accessSize = 64;
else
accessSize = 32;
MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount);
MOVI2R(R10, (u32)GPFifo::m_gatherPipe);
LDR(R11, R14);
ADD(R10, R10, R11);
NEONXEmitter nemit(this);
if (accessSize == 64)
{
PUSH(2, R0, R1);
nemit.VREV64(I_8, D0, v0);
VMOV(R0, D0);
STR(R0, R10, 0);
STR(R1, R10, 4);
POP(2, R0, R1);
}
else if (accessSize == 32)
{
VCVT(S0, v0, 0);
nemit.VREV32(I_8, D0, D0);
VMOV(addr, S0);
STR(addr, R10);
}
ADD(R11, R11, accessSize >> 3);
STR(R11, R14);
jit->js.fifoBytesThisBlock += accessSize >> 3;
}
else if (PowerPC::IsOptimizableRAMAddress(imm_addr))
{
MOVI2R(addr, imm_addr);
EmitBackpatchRoutine(this, flags, jo.fastmem, false, v0);
}
else
{
MOVI2R(addr, imm_addr);
EmitBackpatchRoutine(this, flags, false, false, v0);
}
}
else
{
EmitBackpatchRoutine(this, flags, jo.fastmem, true, v0);
}
}

View file

@ -1,218 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
void JitArm::psq_l(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// R12 contains scale
// R11 contains type
// R10 is the ADDR
FALLBACK_IF(jo.memcheck || !jo.fastmem);
bool update = inst.OPCD == 57;
s32 offset = inst.SIMM_12;
LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
UBFX(R12, R11, 16, 3); // Type
LSL(R12, R12, 2);
UBFX(R11, R11, 24, 6); // Scale
LSL(R11, R11, 3);
Operand2 off;
if (TryMakeOperand2(offset, off))
{
if (inst.RA || update)
ADD(R10, gpr.R(inst.RA), off);
else
MOV(R10, off);
}
else
{
MOVI2R(R10, (u32)offset);
if (inst.RA || update) // Always uses the register on update
ADD(R10, R10, gpr.R(inst.RA));
}
if (update)
MOV(gpr.R(inst.RA), R10);
MOVI2R(R14, (u32)asm_routines.pairedLoadQuantized);
ADD(R14, R14, R12);
LDR(R14, R14, inst.W ? 8 * 4 : 0);
// Values returned in S0, S1
BL(R14); // Jump to the quantizer Load
ARMReg vD0 = fpr.R0(inst.RS, false);
ARMReg vD1 = fpr.R1(inst.RS, false);
VCVT(vD0, S0, 0);
if (!inst.W)
VCVT(vD1, S1, 0);
else
MOVI2F(vD1, 1.0f, INVALID_REG); // No need for temp reg with 1.0f
}
void JitArm::psq_lx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// R12 contains scale
// R11 contains type
// R10 is the ADDR
FALLBACK_IF(jo.memcheck || !jo.fastmem);
bool update = inst.SUBOP10 == 38;
LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.Ix]));
UBFX(R12, R11, 16, 3); // Type
LSL(R12, R12, 2);
UBFX(R11, R11, 24, 6); // Scale
LSL(R11, R11, 3);
if (inst.RA || update) // Always uses the register on update
{
ADD(R10, gpr.R(inst.RB), gpr.R(inst.RA));
}
else
{
MOV(R10, gpr.R(inst.RB));
}
if (update)
MOV(gpr.R(inst.RA), R10);
MOVI2R(R14, (u32)asm_routines.pairedLoadQuantized);
ADD(R14, R14, R12);
LDR(R14, R14, inst.Wx ? 8 * 4 : 0);
// Values returned in S0, S1
BL(R14); // Jump to the quantizer Load
ARMReg vD0 = fpr.R0(inst.RS, false);
ARMReg vD1 = fpr.R1(inst.RS, false);
LDR(R14, R9, PPCSTATE_OFF(Exceptions));
CMP(R14, EXCEPTION_DSI);
SetCC(CC_NEQ);
VCVT(vD0, S0, 0);
if (!inst.Wx)
VCVT(vD1, S1, 0);
else
MOVI2F(vD1, 1.0f, INVALID_REG); // No need for temp reg with 1.0f
SetCC();
}
void JitArm::psq_st(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// R12 contains scale
// R11 contains type
// R10 is the ADDR
FALLBACK_IF(jo.memcheck || !jo.fastmem);
bool update = inst.OPCD == 61;
s32 offset = inst.SIMM_12;
LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
UBFX(R12, R11, 0, 3); // Type
LSL(R12, R12, 2);
UBFX(R11, R11, 8, 6); // Scale
LSL(R11, R11, 3);
Operand2 off;
if (TryMakeOperand2(offset, off))
{
if (inst.RA || update)
ADD(R10, gpr.R(inst.RA), off);
else
MOV(R10, off);
}
else
{
MOVI2R(R10, (u32)offset);
if (inst.RA || update) // Always uses the register on update
ADD(R10, R10, gpr.R(inst.RA));
}
if (update)
MOV(gpr.R(inst.RA), R10);
MOVI2R(R14, (u32)asm_routines.pairedStoreQuantized);
ADD(R14, R14, R12);
LDR(R14, R14, inst.W ? 8 * 4 : 0);
ARMReg vD0 = fpr.R0(inst.RS);
VCVT(S0, vD0, 0);
if (!inst.W)
{
ARMReg vD1 = fpr.R1(inst.RS);
VCVT(S1, vD1, 0);
}
// floats passed through D0
BL(R14); // Jump to the quantizer Store
}
void JitArm::psq_stx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// R12 contains scale
// R11 contains type
// R10 is the ADDR
FALLBACK_IF(jo.memcheck || !jo.fastmem);
bool update = inst.SUBOP10 == 39;
LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
UBFX(R12, R11, 0, 3); // Type
LSL(R12, R12, 2);
UBFX(R11, R11, 8, 6); // Scale
LSL(R11, R11, 3);
if (inst.RA || update) // Always uses the register on update
{
ADD(R10, gpr.R(inst.RA), gpr.R(inst.RB));
}
else
{
MOV(R10, gpr.R(inst.RB));
}
if (update)
MOV(gpr.R(inst.RA), R10);
MOVI2R(R14, (u32)asm_routines.pairedStoreQuantized);
ADD(R14, R14, R12);
LDR(R14, R14, inst.W ? 8 * 4 : 0);
ARMReg vD0 = fpr.R0(inst.RS);
VCVT(S0, vD0, 0);
if (!inst.W)
{
ARMReg vD1 = fpr.R1(inst.RS);
VCVT(S1, vD1, 0);
}
// floats passed through D0
BL(R14); // Jump to the quantizer Store
}

View file

@ -1,618 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArm_FPUtils.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
void JitArm::ps_rsqrte(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(true);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg fpscrReg = gpr.GetReg();
ARMReg V0 = D1;
ARMReg rA = gpr.GetReg();
MOVI2R(fpscrReg, (u32)&PPC_NAN);
VLDR(V0, fpscrReg, 0);
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
VCMP(vB0);
VMRS(_PC);
FixupBranch Less0 = B_CC(CC_LT);
VMOV(vD0, V0);
SetFPException(fpscrReg, FPSCR_VXSQRT);
FixupBranch SkipOrr0 = B();
SetJumpTarget(Less0);
SetCC(CC_EQ);
ORR(rA, rA, 1);
SetCC();
SetJumpTarget(SkipOrr0);
VCMP(vB1);
VMRS(_PC);
FixupBranch Less1 = B_CC(CC_LT);
VMOV(vD1, V0);
SetFPException(fpscrReg, FPSCR_VXSQRT);
FixupBranch SkipOrr1 = B();
SetJumpTarget(Less1);
SetCC(CC_EQ);
ORR(rA, rA, 2);
SetCC();
SetJumpTarget(SkipOrr1);
CMP(rA, 0);
FixupBranch noException = B_CC(CC_EQ);
SetFPException(fpscrReg, FPSCR_ZX);
SetJumpTarget(noException);
VCVT(S0, vB0, 0);
VCVT(S1, vB1, 0);
NEONXEmitter nemit(this);
nemit.VRSQRTE(F_32, D0, D0);
VCVT(vD0, S0, 0);
VCVT(vD1, S1, 0);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, rA);
}
void JitArm::ps_sel(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VCMP(vA0);
VMRS(_PC);
FixupBranch GT0 = B_CC(CC_GE);
VMOV(vD0, vB0);
FixupBranch EQ0 = B();
SetJumpTarget(GT0);
VMOV(vD0, vC0);
SetJumpTarget(EQ0);
VCMP(vA1);
VMRS(_PC);
FixupBranch GT1 = B_CC(CC_GE);
VMOV(vD1, vB1);
FixupBranch EQ1 = B();
SetJumpTarget(GT1);
VMOV(vD1, vC1);
SetJumpTarget(EQ1);
}
void JitArm::ps_add(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VADD(vD0, vA0, vB0);
VADD(vD1, vA1, vB1);
}
void JitArm::ps_div(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VDIV(vD0, vA0, vB0);
VDIV(vD1, vA1, vB1);
}
void JitArm::ps_res(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
MOVI2R(V0, 1.0, INVALID_REG); // temp reg not needed for 1.0
VDIV(vD0, V0, vB0);
VDIV(vD1, V0, vB1);
fpr.Unlock(V0);
}
void JitArm::ps_nmadd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC1);
VADD(vD0, V0, vB0);
VADD(vD1, V1, vB1);
VNEG(vD0, vD0);
VNEG(vD1, vD1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_madd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC1);
VADD(vD0, V0, vB0);
VADD(vD1, V1, vB1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_nmsub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC1);
VSUB(vD0, V0, vB0);
VSUB(vD1, V1, vB1);
VNEG(vD0, vD0);
VNEG(vD1, vD1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_msub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC1);
VSUB(vD0, V0, vB0);
VSUB(vD1, V1, vB1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_madds0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC0);
VADD(vD0, V0, vB0);
VADD(vD1, V1, vB1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_madds1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC1);
VMUL(V1, vA1, vC1);
VADD(vD0, V0, vB0);
VADD(vD1, V1, vB1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_sum0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VADD(vD0, vA0, vB1);
VMOV(vD1, vC1);
}
void JitArm::ps_sum1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vC0);
VADD(vD1, vA0, vB1);
}
void JitArm::ps_sub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VSUB(vD0, vA0, vB0);
VSUB(vD1, vA1, vB1);
}
void JitArm::ps_mul(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMUL(vD0, vA0, vC0);
VMUL(vD1, vA1, vC1);
}
void JitArm::ps_muls0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC0);
VMOV(vD0, V0);
VMOV(vD1, V1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_muls1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC1);
VMUL(V1, vA1, vC1);
VMOV(vD0, V0);
VMOV(vD1, V1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_merge00(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD1, vB0);
VMOV(vD0, vA0);
}
void JitArm::ps_merge01(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA0);
VMOV(vD1, vB1);
}
void JitArm::ps_merge10(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
VMOV(V0, vB0);
VMOV(vD0, vA1);
VMOV(vD1, V0);
fpr.Unlock(V0);
}
void JitArm::ps_merge11(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA1 = fpr.R1(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA1);
VMOV(vD1, vB1);
}
void JitArm::ps_mr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vB0);
VMOV(vD1, vB1);
}
void JitArm::ps_neg(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VNEG(vD0, vB0);
VNEG(vD1, vB1);
}
void JitArm::ps_abs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VABS(vD0, vB0);
VABS(vD1, vB1);
}
void JitArm::ps_nabs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VABS(vD0, vB0);
VNEG(vD0, vD0);
VABS(vD1, vB1);
VNEG(vD1, vD1);
}

View file

@ -1,217 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
FixupBranch JitArm::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
{
ARMReg RA = gpr.GetReg();
Operand2 SOBit(2, 2); // 0x10000000
Operand2 LTBit(1, 1); // 0x80000000
FixupBranch branch;
switch (bit)
{
case CR_SO_BIT: // check bit 61 set
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32));
TST(RA, SOBit);
branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ);
break;
case CR_EQ_BIT: // check bits 31-0 == 0
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]));
CMP(RA, 0);
branch = B_CC(jump_if_set ? CC_EQ : CC_NEQ);
break;
case CR_GT_BIT: // check val > 0
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]));
CMP(RA, 1);
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32));
SBCS(RA, RA, 0);
branch = B_CC(jump_if_set ? CC_GE : CC_LT);
break;
case CR_LT_BIT: // check bit 62 set
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32));
TST(RA, LTBit);
branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ);
break;
default:
_assert_msg_(DYNA_REC, false, "Invalid CR bit");
}
gpr.Unlock(RA);
return branch;
}
void JitArm::mtspr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex)
{
case SPR_DMAU:
case SPR_SPRG0:
case SPR_SPRG1:
case SPR_SPRG2:
case SPR_SPRG3:
case SPR_SRR0:
case SPR_SRR1:
// These are safe to do the easy way, see the bottom of this function.
break;
case SPR_LR:
case SPR_CTR:
case SPR_GQR0:
case SPR_GQR0 + 1:
case SPR_GQR0 + 2:
case SPR_GQR0 + 3:
case SPR_GQR0 + 4:
case SPR_GQR0 + 5:
case SPR_GQR0 + 6:
case SPR_GQR0 + 7:
// These are safe to do the easy way, see the bottom of this function.
break;
case SPR_XER:
{
ARMReg RD = gpr.R(inst.RD);
ARMReg tmp = gpr.GetReg();
ARMReg mask = gpr.GetReg();
MOVI2R(mask, 0xFF7F);
AND(tmp, RD, mask);
STRH(tmp, R9, PPCSTATE_OFF(xer_stringctrl));
LSR(tmp, RD, XER_CA_SHIFT);
AND(tmp, tmp, 1);
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LSR(tmp, RD, XER_OV_SHIFT);
STRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
gpr.Unlock(tmp, mask);
}
break;
default:
FALLBACK_IF(true);
}
// OK, this is easy.
ARMReg RD = gpr.R(inst.RD);
STR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4);
}
void JitArm::mftb(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
mfspr(inst);
}
void JitArm::mfspr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex)
{
case SPR_XER:
{
gpr.BindToRegister(inst.RD, false);
ARMReg RD = gpr.R(inst.RD);
ARMReg tmp = gpr.GetReg();
LDRH(RD, R9, PPCSTATE_OFF(xer_stringctrl));
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LSL(tmp, tmp, XER_CA_SHIFT);
ORR(RD, RD, tmp);
LDRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
LSL(tmp, tmp, XER_OV_SHIFT);
ORR(RD, RD, tmp);
gpr.Unlock(tmp);
}
break;
case SPR_WPAR:
case SPR_DEC:
case SPR_TL:
case SPR_TU:
FALLBACK_IF(true);
default:
gpr.BindToRegister(inst.RD, false);
ARMReg RD = gpr.R(inst.RD);
LDR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4);
break;
}
}
void JitArm::mtsr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
STR(gpr.R(inst.RS), R9, PPCSTATE_OFF(sr[inst.SR]));
}
void JitArm::mfsr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(sr[inst.SR]));
}
void JitArm::mtmsr(UGeckoInstruction inst)
{
INSTRUCTION_START
// Don't interpret this, if we do we get thrown out
//JITDISABLE(bJITSystemRegistersOff);
STR(gpr.R(inst.RS), R9, PPCSTATE_OFF(msr));
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
void JitArm::mfmsr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(msr));
}
void JitArm::mcrf(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
ARMReg rA = gpr.GetReg();
if (inst.CRFS != inst.CRFD)
{
LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS]));
STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD]));
LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS]) + sizeof(u32));
STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD]) + sizeof(u32));
}
gpr.Unlock(rA);
}

View file

@ -1,483 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArm_Tables.h"
// Should be moved in to the Jit class
typedef void (JitArm::*_Instruction) (UGeckoInstruction instCode);
static _Instruction dynaOpTable[64];
static _Instruction dynaOpTable4[1024];
static _Instruction dynaOpTable19[1024];
static _Instruction dynaOpTable31[1024];
static _Instruction dynaOpTable59[32];
static _Instruction dynaOpTable63[1024];
void JitArm::DynaRunTable4(UGeckoInstruction _inst) {(this->*dynaOpTable4 [_inst.SUBOP10])(_inst);}
void JitArm::DynaRunTable19(UGeckoInstruction _inst) {(this->*dynaOpTable19[_inst.SUBOP10])(_inst);}
void JitArm::DynaRunTable31(UGeckoInstruction _inst) {(this->*dynaOpTable31[_inst.SUBOP10])(_inst);}
void JitArm::DynaRunTable59(UGeckoInstruction _inst) {(this->*dynaOpTable59[_inst.SUBOP5 ])(_inst);}
void JitArm::DynaRunTable63(UGeckoInstruction _inst) {(this->*dynaOpTable63[_inst.SUBOP10])(_inst);}
struct GekkoOPTemplate
{
int opcode;
_Instruction Inst;
//GekkoOPInfo opinfo; // Doesn't need opinfo, Interpreter fills it out
};
static GekkoOPTemplate primarytable[] =
{
{4, &JitArm::DynaRunTable4}, // RunTable4
{19, &JitArm::DynaRunTable19}, // RunTable19
{31, &JitArm::DynaRunTable31}, // RunTable31
{59, &JitArm::DynaRunTable59}, // RunTable59
{63, &JitArm::DynaRunTable63}, // RunTable63
{16, &JitArm::bcx}, // bcx
{18, &JitArm::bx}, // bx
{3, &JitArm::twx}, // twi
{17, &JitArm::sc}, // sc
{7, &JitArm::arith}, // mulli
{8, &JitArm::subfic}, // subfic
{10, &JitArm::cmpli}, // cmpli
{11, &JitArm::cmpi}, // cmpi
{12, &JitArm::arith}, // addic
{13, &JitArm::arith}, // addic_rc
{14, &JitArm::arith}, // addi
{15, &JitArm::arith}, // addis
{20, &JitArm::rlwimix}, // rlwimix
{21, &JitArm::rlwinmx}, // rlwinmx
{23, &JitArm::rlwnmx}, // rlwnmx
{24, &JitArm::arith}, // ori
{25, &JitArm::arith}, // oris
{26, &JitArm::arith}, // xori
{27, &JitArm::arith}, // xoris
{28, &JitArm::arith}, // andi_rc
{29, &JitArm::arith}, // andis_rc
{32, &JitArm::lXX}, // lwz
{33, &JitArm::lXX}, // lwzu
{34, &JitArm::lXX}, // lbz
{35, &JitArm::lXX}, // lbzu
{40, &JitArm::lXX}, // lhz
{41, &JitArm::lXX}, // lhzu
{42, &JitArm::lXX}, // lha
{43, &JitArm::lXX}, // lhau
{44, &JitArm::stX}, // sth
{45, &JitArm::stX}, // sthu
{36, &JitArm::stX}, // stw
{37, &JitArm::stX}, // stwu
{38, &JitArm::stX}, // stb
{39, &JitArm::stX}, // stbu
{46, &JitArm::lmw}, // lmw
{47, &JitArm::stmw}, // stmw
{48, &JitArm::lfXX}, // lfs
{49, &JitArm::lfXX}, // lfsu
{50, &JitArm::lfXX}, // lfd
{51, &JitArm::lfXX}, // lfdu
{52, &JitArm::stfXX}, // stfs
{53, &JitArm::stfXX}, // stfsu
{54, &JitArm::stfXX}, // stfd
{55, &JitArm::stfXX}, // stfdu
{56, &JitArm::psq_l}, // psq_l
{57, &JitArm::psq_l}, // psq_lu
{60, &JitArm::psq_st}, // psq_st
{61, &JitArm::psq_st}, // psq_stu
//missing: 0, 1, 2, 5, 6, 9, 22, 30, 62, 58
};
static GekkoOPTemplate table4[] =
{ //SUBOP10
{0, &JitArm::FallBackToInterpreter}, // ps_cmpu0
{32, &JitArm::FallBackToInterpreter}, // ps_cmpo0
{40, &JitArm::ps_neg}, // ps_neg
{136, &JitArm::ps_nabs}, // ps_nabs
{264, &JitArm::ps_abs}, // ps_abs
{64, &JitArm::FallBackToInterpreter}, // ps_cmpu1
{72, &JitArm::ps_mr}, // ps_mr
{96, &JitArm::FallBackToInterpreter}, // ps_cmpo1
{528, &JitArm::ps_merge00}, // ps_merge00
{560, &JitArm::ps_merge01}, // ps_merge01
{592, &JitArm::ps_merge10}, // ps_merge10
{624, &JitArm::ps_merge11}, // ps_merge11
{1014, &JitArm::FallBackToInterpreter}, // dcbz_l
};
static GekkoOPTemplate table4_2[] =
{
{10, &JitArm::ps_sum0}, // ps_sum0
{11, &JitArm::ps_sum1}, // ps_sum1
{12, &JitArm::ps_muls0}, // ps_muls0
{13, &JitArm::ps_muls1}, // ps_muls1
{14, &JitArm::ps_madds0}, // ps_madds0
{15, &JitArm::ps_madds1}, // ps_madds1
{18, &JitArm::ps_div}, // ps_div
{20, &JitArm::ps_sub}, // ps_sub
{21, &JitArm::ps_add}, // ps_add
{23, &JitArm::ps_sel}, // ps_sel
{24, &JitArm::ps_res}, // ps_res
{25, &JitArm::ps_mul}, // ps_mul
{26, &JitArm::ps_rsqrte}, // ps_rsqrte
{28, &JitArm::ps_msub}, // ps_msub
{29, &JitArm::ps_madd}, // ps_madd
{30, &JitArm::ps_nmsub}, // ps_nmsub
{31, &JitArm::ps_nmadd}, // ps_nmadd
};
static GekkoOPTemplate table4_3[] =
{
{6, &JitArm::psq_lx}, // psq_lx
{7, &JitArm::psq_stx}, // psq_stx
{38, &JitArm::psq_lx}, // psq_lux
{39, &JitArm::psq_stx}, // psq_stux
};
static GekkoOPTemplate table19[] =
{
{528, &JitArm::bcctrx}, // bcctrx
{16, &JitArm::bclrx}, // bclrx
{257, &JitArm::FallBackToInterpreter}, // crand
{129, &JitArm::FallBackToInterpreter}, // crandc
{289, &JitArm::FallBackToInterpreter}, // creqv
{225, &JitArm::FallBackToInterpreter}, // crnand
{33, &JitArm::FallBackToInterpreter}, // crnor
{449, &JitArm::FallBackToInterpreter}, // cror
{417, &JitArm::FallBackToInterpreter}, // crorc
{193, &JitArm::FallBackToInterpreter}, // crxor
{150, &JitArm::DoNothing}, // isync
{0, &JitArm::mcrf}, // mcrf
{50, &JitArm::rfi}, // rfi
{18, &JitArm::Break}, // rfid
};
static GekkoOPTemplate table31[] =
{
{266, &JitArm::arith}, // addx
{778, &JitArm::arith}, // addox
{10, &JitArm::arith}, // addcx
{522, &JitArm::arith}, // addcox
{138, &JitArm::addex}, // addex
{650, &JitArm::addex}, // addeox
{234, &JitArm::FallBackToInterpreter}, // addmex
{746, &JitArm::FallBackToInterpreter}, // addmeox
{202, &JitArm::FallBackToInterpreter}, // addzex
{714, &JitArm::FallBackToInterpreter}, // addzeox
{491, &JitArm::FallBackToInterpreter}, // divwx
{1003, &JitArm::FallBackToInterpreter}, // divwox
{459, &JitArm::FallBackToInterpreter}, // divwux
{971, &JitArm::FallBackToInterpreter}, // divwuox
{75, &JitArm::FallBackToInterpreter}, // mulhwx
{11, &JitArm::mulhwux}, // mulhwux
{235, &JitArm::arith}, // mullwx
{747, &JitArm::arith}, // mullwox
{104, &JitArm::negx}, // negx
{616, &JitArm::negx}, // negox
{40, &JitArm::arith}, // subfx
{552, &JitArm::arith}, // subfox
{8, &JitArm::FallBackToInterpreter}, // subfcx
{520, &JitArm::FallBackToInterpreter}, // subfcox
{136, &JitArm::FallBackToInterpreter}, // subfex
{648, &JitArm::FallBackToInterpreter}, // subfeox
{232, &JitArm::FallBackToInterpreter}, // subfmex
{744, &JitArm::FallBackToInterpreter}, // subfmeox
{200, &JitArm::FallBackToInterpreter}, // subfzex
{712, &JitArm::FallBackToInterpreter}, // subfzeox
{28, &JitArm::arith}, // andx
{60, &JitArm::arith}, // andcx
{444, &JitArm::arith}, // orx
{124, &JitArm::arith}, // norx
{316, &JitArm::arith}, // xorx
{412, &JitArm::arith}, // orcx
{476, &JitArm::arith}, // nandx
{284, &JitArm::arith}, // eqvx
{0, &JitArm::cmp}, // cmp
{32, &JitArm::cmpl}, // cmpl
{26, &JitArm::cntlzwx}, // cntlzwx
{922, &JitArm::extshx}, // extshx
{954, &JitArm::extsbx}, // extsbx
{536, &JitArm::arith}, // srwx
{792, &JitArm::arith}, // srawx
{824, &JitArm::srawix}, // srawix
{24, &JitArm::arith}, // slwx
{54, &JitArm::dcbst}, // dcbst
{86, &JitArm::FallBackToInterpreter}, // dcbf
{246, &JitArm::DoNothing}, // dcbtst
{278, &JitArm::DoNothing}, // dcbt
{470, &JitArm::FallBackToInterpreter}, // dcbi
{758, &JitArm::DoNothing}, // dcba
{1014, &JitArm::FallBackToInterpreter}, // dcbz
//load word
{23, &JitArm::lXX}, // lwzx
{55, &JitArm::FallBackToInterpreter}, // lwzux
//load halfword
{279, &JitArm::lXX}, // lhzx
{311, &JitArm::lXX}, // lhzux
//load halfword signextend
{343, &JitArm::lXX}, // lhax
{375, &JitArm::lXX}, // lhaux
//load byte
{87, &JitArm::lXX}, // lbzx
{119, &JitArm::lXX}, // lbzux
//load byte reverse
{534, &JitArm::lXX}, // lwbrx
{790, &JitArm::lXX}, // lhbrx
// Conditional load/store (Wii SMP)
{150, &JitArm::FallBackToInterpreter}, // stwcxd
{20, &JitArm::FallBackToInterpreter}, // lwarx
//load string (interpret these)
{533, &JitArm::FallBackToInterpreter}, // lswx
{597, &JitArm::FallBackToInterpreter}, // lswi
//store word
{151, &JitArm::stX}, // stwx
{183, &JitArm::stX}, // stwux
//store halfword
{407, &JitArm::stX}, // sthx
{439, &JitArm::stX}, // sthux
//store byte
{215, &JitArm::stX}, // stbx
{247, &JitArm::stX}, // stbux
//store bytereverse
{662, &JitArm::FallBackToInterpreter}, // stwbrx
{918, &JitArm::FallBackToInterpreter}, // sthbrx
{661, &JitArm::FallBackToInterpreter}, // stswx
{725, &JitArm::FallBackToInterpreter}, // stswi
// fp load/store
{535, &JitArm::lfXX}, // lfsx
{567, &JitArm::lfXX}, // lfsux
{599, &JitArm::lfXX}, // lfdx
{631, &JitArm::lfXX}, // lfdux
{663, &JitArm::stfXX}, // stfsx
{695, &JitArm::stfXX}, // stfsux
{727, &JitArm::stfXX}, // stfdx
{759, &JitArm::stfXX}, // stfdux
{983, &JitArm::FallBackToInterpreter}, // stfiwx
{19, &JitArm::FallBackToInterpreter}, // mfcr
{83, &JitArm::mfmsr}, // mfmsr
{144, &JitArm::FallBackToInterpreter}, // mtcrf
{146, &JitArm::mtmsr}, // mtmsr
{210, &JitArm::mtsr}, // mtsr
{242, &JitArm::FallBackToInterpreter}, // mtsrin
{339, &JitArm::mfspr}, // mfspr
{467, &JitArm::mtspr}, // mtspr
{371, &JitArm::mftb}, // mftb
{512, &JitArm::FallBackToInterpreter}, // mcrxr
{595, &JitArm::mfsr}, // mfsr
{659, &JitArm::FallBackToInterpreter}, // mfsrin
{4, &JitArm::twx}, // tw
{598, &JitArm::DoNothing}, // sync
{982, &JitArm::icbi}, // icbi
// Unused instructions on GC
{310, &JitArm::FallBackToInterpreter}, // eciwx
{438, &JitArm::FallBackToInterpreter}, // ecowx
{854, &JitArm::DoNothing}, // eieio
{306, &JitArm::FallBackToInterpreter}, // tlbie
{370, &JitArm::FallBackToInterpreter}, // tlbia
{566, &JitArm::DoNothing}, // tlbsync
};
static GekkoOPTemplate table59[] =
{
{18, &JitArm::FallBackToInterpreter}, // fdivsx
{20, &JitArm::fsubsx}, // fsubsx
{21, &JitArm::faddsx}, // faddsx
// {22, &JitArm::FallBackToInterpreter}, // fsqrtsx
{24, &JitArm::fresx}, // fresx
{25, &JitArm::fmulsx}, // fmulsx
{28, &JitArm::FallBackToInterpreter}, // fmsubsx
{29, &JitArm::fmaddsx}, // fmaddsx
{30, &JitArm::FallBackToInterpreter}, // fnmsubsx
{31, &JitArm::fnmaddsx}, // fnmaddsx
};
static GekkoOPTemplate table63[] =
{
{264, &JitArm::fabsx}, // fabsx
{32, &JitArm::FallBackToInterpreter}, // fcmpo
{0, &JitArm::FallBackToInterpreter}, // fcmpu
{14, &JitArm::fctiwx}, // fctiwx
{15, &JitArm::fctiwzx}, // fctiwzx
{72, &JitArm::fmrx}, // fmrx
{136, &JitArm::fnabsx}, // fnabsx
{40, &JitArm::fnegx}, // fnegx
{12, &JitArm::FallBackToInterpreter}, // frspx
{64, &JitArm::FallBackToInterpreter}, // mcrfs
{583, &JitArm::FallBackToInterpreter}, // mffsx
{70, &JitArm::FallBackToInterpreter}, // mtfsb0x
{38, &JitArm::FallBackToInterpreter}, // mtfsb1x
{134, &JitArm::FallBackToInterpreter}, // mtfsfix
{711, &JitArm::FallBackToInterpreter}, // mtfsfx
};
static GekkoOPTemplate table63_2[] =
{
{18, &JitArm::FallBackToInterpreter}, // fdivx
{20, &JitArm::fsubx}, // fsubx
{21, &JitArm::faddx}, // faddx
{22, &JitArm::FallBackToInterpreter}, // fsqrtx
{23, &JitArm::fselx}, // fselx
{25, &JitArm::fmulx}, // fmulx
{26, &JitArm::frsqrtex}, // frsqrtex
{28, &JitArm::FallBackToInterpreter}, // fmsubx
{29, &JitArm::fmaddx}, // fmaddx
{30, &JitArm::FallBackToInterpreter}, // fnmsubx
{31, &JitArm::fnmaddx}, // fnmaddx
};
namespace JitArmTables
{
void CompileInstruction(PPCAnalyst::CodeOp & op)
{
JitArm *jitarm = (JitArm *)jit;
(jitarm->*dynaOpTable[op.inst.OPCD])(op.inst);
GekkoOPInfo *info = op.opinfo;
if (info)
{
#ifdef OPLOG
if (!strcmp(info->opname, OP_TO_LOG)) // "mcrfs"
{
rsplocations.push_back(jit.js.compilerPC);
}
#endif
info->compileCount++;
info->lastUse = jit->js.compilerPC;
}
}
void InitTables()
{
// once initialized, tables are read-only
static bool initialized = false;
if (initialized)
return;
//clear
for (auto& tpl : dynaOpTable)
{
tpl = &JitArm::FallBackToInterpreter;
}
for (int i = 0; i < 32; i++)
{
dynaOpTable59[i] = &JitArm::FallBackToInterpreter;
}
for (int i = 0; i < 1024; i++)
{
dynaOpTable4 [i] = &JitArm::FallBackToInterpreter;
dynaOpTable19[i] = &JitArm::FallBackToInterpreter;
dynaOpTable31[i] = &JitArm::FallBackToInterpreter;
dynaOpTable63[i] = &JitArm::FallBackToInterpreter;
}
for (int i = 0; i < (int)(sizeof(primarytable) / sizeof(GekkoOPTemplate)); i++)
{
dynaOpTable[primarytable[i].opcode] = primarytable[i].Inst;
}
for (int i = 0; i < 32; i++)
{
int fill = i << 5;
for (int j = 0; j < (int)(sizeof(table4_2) / sizeof(GekkoOPTemplate)); j++)
{
int op = fill+table4_2[j].opcode;
dynaOpTable4[op] = table4_2[j].Inst;
}
}
for (int i = 0; i < 16; i++)
{
int fill = i << 6;
for (int j = 0; j < (int)(sizeof(table4_3) / sizeof(GekkoOPTemplate)); j++)
{
int op = fill+table4_3[j].opcode;
dynaOpTable4[op] = table4_3[j].Inst;
}
}
for (int i = 0; i < (int)(sizeof(table4) / sizeof(GekkoOPTemplate)); i++)
{
int op = table4[i].opcode;
dynaOpTable4[op] = table4[i].Inst;
}
for (int i = 0; i < (int)(sizeof(table31) / sizeof(GekkoOPTemplate)); i++)
{
int op = table31[i].opcode;
dynaOpTable31[op] = table31[i].Inst;
}
for (int i = 0; i < (int)(sizeof(table19) / sizeof(GekkoOPTemplate)); i++)
{
int op = table19[i].opcode;
dynaOpTable19[op] = table19[i].Inst;
}
for (int i = 0; i < (int)(sizeof(table59) / sizeof(GekkoOPTemplate)); i++)
{
int op = table59[i].opcode;
dynaOpTable59[op] = table59[i].Inst;
}
for (int i = 0; i < (int)(sizeof(table63) / sizeof(GekkoOPTemplate)); i++)
{
int op = table63[i].opcode;
dynaOpTable63[op] = table63[i].Inst;
}
for (int i = 0; i < 32; i++)
{
int fill = i << 5;
for (int j = 0; j < (int)(sizeof(table63_2) / sizeof(GekkoOPTemplate)); j++)
{
int op = fill + table63_2[j].opcode;
dynaOpTable63[op] = table63_2[j].Inst;
}
}
initialized = true;
}
} // namespace

View file

@ -1,14 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/PPCTables.h"
namespace JitArmTables
{
void CompileInstruction(PPCAnalyst::CodeOp & op);
void InitTables();
}

View file

@ -1,659 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/MemoryUtil.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HW/GPFifo.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
using namespace ArmGen;
//TODO - make an option
//#if _DEBUG
// bool enableDebug = false;
//#else
// bool enableDebug = false;
//#endif
JitArmAsmRoutineManager asm_routines;
static void WriteDual8(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U16(((u16)(u8)val1 << 8) | (u16)(u8)val2, addr);
}
static void WriteDual16(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U32(((u32)(u16)val1 << 16) | (u32)(u16)val2, addr);
}
static void WriteDual32(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr);
}
void JitArmAsmRoutineManager::Generate()
{
enterCode = GetCodePtr();
PUSH(9, R4, R5, R6, R7, R8, R9, R10, R11, _LR);
// Take care to 8-byte align stack for function calls.
// We are misaligned here because of an odd number of args for PUSH.
// It's not like x86 where you need to account for an extra 4 bytes
// consumed by CALL.
SUB(_SP, _SP, 4);
MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]);
MOVI2R(R8, (u32)Memory::physical_base);
FixupBranch skipToRealDispatcher = B();
dispatcher = GetCodePtr();
printf("Dispatcher is %p\n", dispatcher);
// Downcount Check
// The result of slice decrementation should be in flags if somebody jumped here
// IMPORTANT - We jump on negative, not carry!!!
FixupBranch bail = B_CC(CC_MI);
FixupBranch dbg_exit;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
{
MOVI2R(R0, (u32)PowerPC::GetStatePtr());
LDR(R0, R0);
TST(R0, PowerPC::CPU_STEPPING);
FixupBranch not_stepping = B_CC(CC_EQ);
// XXX: Check for breakpoints
dbg_exit = B();
SetJumpTarget(not_stepping);
}
SetJumpTarget(skipToRealDispatcher);
dispatcherNoCheck = GetCodePtr();
// This block of code gets the address of the compiled block of code
// It runs though to the compiling portion if it isn't found
LDR(R12, R9, PPCSTATE_OFF(pc));// Load the current PC into R12
Operand2 iCacheMask = Operand2(0xE, 2); // JIT_ICACHE_MASK
BIC(R12, R12, iCacheMask); // R12 contains PC & JIT_ICACHE_MASK here.
MOVI2R(R14, (u32)jit->GetBlockCache()->iCache.data());
LDR(R12, R14, R12); // R12 contains iCache[PC & JIT_ICACHE_MASK] here
// R12 Confirmed this is the correct iCache Location loaded.
TST(R12, 0x80); // Test to see if it is a JIT block.
FixupBranch no_block = B_CC(CC_NEQ);
// Success, it is our Jitblock.
MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers());
// LDR R14 right here to get CodePointers()[0] pointer.
LSL(R12, R12, 2); // Multiply by four because address locations are u32 in size
LDR(R14, R14, R12); // Load the block address in to R14
B(R14);
// No need to jump anywhere after here, the block will go back to dispatcher start
SetJumpTarget(no_block);
// If we get to this point, that means that we don't have the block cached to execute
// So call ArmJit to compile the block and then execute it.
MOVI2R(R14, (u32)&Jit);
BL(R14);
B(dispatcherNoCheck);
SetJumpTarget(bail);
doTiming = GetCodePtr();
// XXX: In JIT64, Advance() gets called /after/ the exception checking
// once it jumps back to the start of outerLoop
QuickCallFunction(R14, (void*)&CoreTiming::Advance);
// Does exception checking
LDR(R0, R9, PPCSTATE_OFF(pc));
STR(R0, R9, PPCSTATE_OFF(npc));
QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions);
LDR(R0, R9, PPCSTATE_OFF(npc));
STR(R0, R9, PPCSTATE_OFF(pc));
// Check the state pointer to see if we are exiting
// Gets checked on every exception check
MOVI2R(R0, (u32)PowerPC::GetStatePtr());
MVN(R1, 0);
LDR(R0, R0);
TST(R0, R1);
FixupBranch Exit = B_CC(CC_NEQ);
B(dispatcher);
SetJumpTarget(Exit);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
SetJumpTarget(dbg_exit);
// Let the waiting thread know we are done leaving
MOVI2R(R0, (u32)&PowerPC::FinishStateMove);
BL(R0);
ADD(_SP, _SP, 4);
POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns
GenerateCommon();
FlushIcache();
}
void JitArmAsmRoutineManager::GenerateCommon()
{
// R14 is LR
// R12 is scratch
// R11 is scale
// R10 is the address
Operand2 mask(3, 1); // ~(Memory::MEMVIEW32_MASK)
Operand2 arghmask(3, 3); // 0x0C000000
NEONXEmitter nemit(this);
const u8* loadPairedIllegal = GetCodePtr();
BKPT(0x10);
const u8* loadPairedFloatTwo = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
nemit.VLD1(I_32, D0, R10);
nemit.VREV32(I_8, D0, D0);
MOV(_PC, _LR);
}
const u8* loadPairedFloatOne = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
nemit.VLD1(I_32, D0, R10);
nemit.VREV32(I_8, D0, D0);
MOV(_PC, _LR);
}
const u8* loadPairedU8Two = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRB(R12, R10);
VMOV(S0, R12);
LDRB(R12, R10, 1);
VMOV(S1, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT);
VCVT(S1, S1, TO_FLOAT);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
MOV(_PC, _LR);
}
const u8* loadPairedU8One = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRB(R12, R10);
VMOV(S0, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT);
VMUL(S0, S0, S2);
MOV(_PC, _LR);
}
const u8* loadPairedS8Two = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRSB(R12, R10);
VMOV(S0, R12);
LDRSB(R12, R10, 1);
VMOV(S1, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT | IS_SIGNED);
VCVT(S1, S1, TO_FLOAT | IS_SIGNED);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
MOV(_PC, _LR);
}
const u8* loadPairedS8One = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRSB(R12, R10);
VMOV(S0, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT | IS_SIGNED);
VMUL(S0, S0, S2);
MOV(_PC, _LR);
}
const u8* loadPairedU16Two = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRH(R12, R10);
REV16(R12, R12);
VMOV(S0, R12);
LDRH(R12, R10, 2);
REV16(R12, R12);
VMOV(S1, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT);
VCVT(S1, S1, TO_FLOAT);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
MOV(_PC, _LR);
}
const u8* loadPairedU16One = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRH(R12, R10);
REV16(R12, R12);
VMOV(S0, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT);
VMUL(S0, S0, S2);
MOV(_PC, _LR);
}
const u8* loadPairedS16Two = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRH(R12, R10);
REV16(R12, R12);
SXTH(R12, R12);
VMOV(S0, R12);
LDRH(R12, R10, 2);
REV16(R12, R12);
SXTH(R12, R12);
VMOV(S1, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT | IS_SIGNED);
VCVT(S1, S1, TO_FLOAT | IS_SIGNED);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
MOV(_PC, _LR);
}
const u8* loadPairedS16One = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRH(R12, R10);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
REV16(R12, R12);
SXTH(R12, R12);
VMOV(S0, R12);
VCVT(S0, S0, TO_FLOAT | IS_SIGNED);
VMUL(S0, S0, S2);
MOV(_PC, _LR);
}
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
ReserveCodeSpace(16 * sizeof(u8*));
pairedLoadQuantized[0] = loadPairedFloatTwo;
pairedLoadQuantized[1] = loadPairedIllegal;
pairedLoadQuantized[2] = loadPairedIllegal;
pairedLoadQuantized[3] = loadPairedIllegal;
pairedLoadQuantized[4] = loadPairedU8Two;
pairedLoadQuantized[5] = loadPairedU16Two;
pairedLoadQuantized[6] = loadPairedS8Two;
pairedLoadQuantized[7] = loadPairedS16Two;
pairedLoadQuantized[8] = loadPairedFloatOne;
pairedLoadQuantized[9] = loadPairedIllegal;
pairedLoadQuantized[10] = loadPairedIllegal;
pairedLoadQuantized[11] = loadPairedIllegal;
pairedLoadQuantized[12] = loadPairedU8One;
pairedLoadQuantized[13] = loadPairedU16One;
pairedLoadQuantized[14] = loadPairedS8One;
pairedLoadQuantized[15] = loadPairedS16One;
// Stores
const u8* storePairedIllegal = GetCodePtr();
BKPT(0x21);
const u8* storePairedFloat = GetCodePtr();
{
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
nemit.VREV32(I_8, D0, D0);
nemit.VST1(I_32, D0, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
VMOV(R1, S1);
MOV(R2, R10);
MOVI2R(R12, (u32)&WriteDual32);
BL(R12);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storePairedU8 = GetCodePtr();
{
// R10 is the addr
// R11 is the scale
// R12 is scratch
// S0, S1 is the values
PUSH(5, R0, R1, R2, R3, _LR);
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO);
VCVT(S1, S1, TO_INT | ROUND_TO_ZERO);
VMOV(R0, S0);
VMOV(R1, S1);
MOV(R2, R10);
MOVI2R(R12, (u32)&WriteDual8);
BL(R12);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storePairedS8 = GetCodePtr();
{
// R10 is the addr
// R11 is the scale
// R12 is scratch
// S0, S1 is the values
PUSH(5, R0, R1, R2, R3, _LR);
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VCVT(S1, S1, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VMOV(R0, S0);
VMOV(R1, S1);
MOV(R2, R10);
MOVI2R(R12, (u32)&WriteDual8);
BL(R12);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storePairedU16 = GetCodePtr();
{
PUSH(5, R0, R1, R2, R3, _LR);
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO);
VCVT(S1, S1, TO_INT | ROUND_TO_ZERO);
VMOV(R0, S0);
VMOV(R1, S1);
MOV(R2, R10);
MOVI2R(R12, (u32)&WriteDual16);
BL(R12);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storePairedS16 = GetCodePtr();
{
PUSH(5, R0, R1, R2, R3, _LR);
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VCVT(S1, S1, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VMOV(R0, S0);
VMOV(R1, S1);
MOV(R2, R10);
MOVI2R(R12, (u32)&WriteDual16);
BL(R12);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storeSingleIllegal = GetCodePtr();
BKPT(0x27);
const u8* storeSingleFloat = GetCodePtr();
{
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
VMOV(R12, S0);
REV(R12, R12);
STR(R12, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
MOV(R1, R10);
MOVI2R(R10, (u32)&PowerPC::Write_U32);
BL(R10);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii
{
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO);
VMOV(R12, S0);
STRB(R12, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
MOV(R1, R10);
MOVI2R(R10, (u32)&PowerPC::Write_U8);
BL(R10);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storeSingleS8 = GetCodePtr();
{
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VMOV(R12, S0);
STRB(R12, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
MOV(R1, R10);
MOVI2R(R10, (u32)&PowerPC::Write_U8);
BL(R10);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii
{
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO);
VMOV(R12, S0);
REV16(R12, R12);
STRH(R12, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
MOV(R1, R10);
MOVI2R(R10, (u32)&PowerPC::Write_U16);
BL(R10);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storeSingleS16 = GetCodePtr();
{
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VMOV(R12, S0);
REV16(R12, R12);
STRH(R12, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
MOV(R1, R10);
MOVI2R(R10, (u32)&PowerPC::Write_U16);
BL(R10);
POP(5, R0, R1, R2, R3, _PC);
}
pairedStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
ReserveCodeSpace(16 * sizeof(u8*));
pairedStoreQuantized[0] = storePairedFloat;
pairedStoreQuantized[1] = storePairedIllegal;
pairedStoreQuantized[2] = storePairedIllegal;
pairedStoreQuantized[3] = storePairedIllegal;
pairedStoreQuantized[4] = storePairedU8;
pairedStoreQuantized[5] = storePairedU16;
pairedStoreQuantized[6] = storePairedS8;
pairedStoreQuantized[7] = storePairedS16;
pairedStoreQuantized[8] = storeSingleFloat;
pairedStoreQuantized[9] = storeSingleIllegal;
pairedStoreQuantized[10] = storeSingleIllegal;
pairedStoreQuantized[11] = storeSingleIllegal;
pairedStoreQuantized[12] = storeSingleU8;
pairedStoreQuantized[13] = storeSingleU16;
pairedStoreQuantized[14] = storeSingleS8;
pairedStoreQuantized[15] = storeSingleS16;
m_increment_profile_counter = AlignCode16();
nemit.VLD1(I_64, D0, R0); // Start
ADD(R0, R0, 8);
nemit.VLD1(I_64, D1, R0); // End
ADD(R0, R0, 8);
nemit.VLD1(I_64, D2, R0); // Counter
nemit.VSUB(I_64, D1, D1, D0);
nemit.VADD(I_64, D2, D2, D1);
nemit.VST1(I_64, D2, R0);
MOV(_PC, _LR);
}

View file

@ -1,32 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/ArmEmitter.h"
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
class JitArmAsmRoutineManager : public CommonAsmRoutinesBase, public ArmGen::ARMCodeBlock
{
private:
void Generate();
void GenerateCommon();
public:
const u8* m_increment_profile_counter;
void Init()
{
AllocCodeSpace(8192);
Generate();
WriteProtect();
}
void Shutdown()
{
FreeCodeSpace();
}
};
extern JitArmAsmRoutineManager asm_routines;

View file

@ -1,252 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitFPRCache.h"
using namespace ArmGen;
ArmFPRCache::ArmFPRCache()
{
emit = nullptr;
}
void ArmFPRCache::Init(ARMXEmitter *emitter)
{
emit = emitter;
ARMReg *PPCRegs = GetPPCAllocationOrder(NUMPPCREG);
ARMReg *Regs = GetAllocationOrder(NUMARMREG);
for (u8 a = 0; a < NUMPPCREG; ++a)
{
ArmCRegs[a].PPCReg = 33;
ArmCRegs[a].Reg = PPCRegs[a];
ArmCRegs[a].LastLoad = 0;
ArmCRegs[a].PS1 = false;
}
for (u8 a = 0; a < NUMARMREG; ++a)
{
ArmRegs[a].Reg = Regs[a];
ArmRegs[a].free = true;
}
}
void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats)
{
// Make sure the state is wiped on Start
// There is a potential for the state remaining dirty from the previous block
// This is due to conditional branches not clearing the register cache state
for (u8 a = 0; a < 32; ++a)
{
if (_regs[a][0].GetType() != REG_NOTLOADED)
{
u32 regindex = _regs[a][0].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
}
if (_regs[a][1].GetType() != REG_NOTLOADED)
{
u32 regindex = _regs[a][1].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
}
}
ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count)
{
// This will return us the allocation order of the registers we can use on
// the ppc side.
static ARMReg allocationOrder[] =
{
D4, D5, D6, D7, D8, D9, D10, D11, D12, D13,
D14, D15, D16, D17, D18, D19, D20, D21, D22,
D23, D24, D25, D26, D27, D28, D29, D30, D31
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
}
ARMReg *ArmFPRCache::GetAllocationOrder(int &count)
{
// This will return us the allocation order of the registers we can use on
// the host side.
static ARMReg allocationOrder[] =
{
D0, D1, D2, D3
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
}
ARMReg ArmFPRCache::GetReg(bool AutoLock)
{
for (u8 a = 0; a < NUMARMREG; ++a)
{
if (ArmRegs[a].free)
{
// Alright, this one is free
if (AutoLock)
ArmRegs[a].free = false;
return ArmRegs[a].Reg;
}
}
// Uh Oh, we have all them locked....
_assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb");
return D31;
}
void ArmFPRCache::Unlock(ARMReg V0)
{
for (u8 RegNum = 0; RegNum < NUMARMREG; ++RegNum)
{
if (ArmRegs[RegNum].Reg == V0)
{
_assert_msg_(_DYNA_REC, !ArmRegs[RegNum].free, "This register is already unlocked");
ArmRegs[RegNum].free = true;
}
}
}
u32 ArmFPRCache::GetLeastUsedRegister(bool increment)
{
u32 HighestUsed = 0;
u8 lastRegIndex = 0;
for (u8 a = 0; a < NUMPPCREG; ++a)
{
if (increment)
++ArmCRegs[a].LastLoad;
if (ArmCRegs[a].LastLoad > HighestUsed)
{
HighestUsed = ArmCRegs[a].LastLoad;
lastRegIndex = a;
}
}
return lastRegIndex;
}
bool ArmFPRCache::FindFreeRegister(u32 &regindex)
{
for (u8 a = 0; a < NUMPPCREG; ++a)
{
if (ArmCRegs[a].PPCReg == 33)
{
regindex = a;
return true;
}
}
return false;
}
ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad)
{
u32 lastRegIndex = GetLeastUsedRegister(true);
if (_regs[preg][PS1].GetType() != REG_NOTLOADED)
{
u8 a = _regs[preg][PS1].GetRegIndex();
ArmCRegs[a].LastLoad = 0;
return ArmCRegs[a].Reg;
}
u32 regindex;
if (FindFreeRegister(regindex))
{
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
ArmCRegs[regindex].PPCReg = preg;
ArmCRegs[regindex].LastLoad = 0;
ArmCRegs[regindex].PS1 = PS1;
_regs[preg][PS1].LoadToReg(regindex);
if (preLoad)
emit->VLDR(ArmCRegs[regindex].Reg, R9, offset);
return ArmCRegs[regindex].Reg;
}
// Alright, we couldn't get a free space, dump that least used register
s16 offsetOld = PPCSTATE_OFF(ps) + (ArmCRegs[lastRegIndex].PPCReg * 16) + (ArmCRegs[lastRegIndex].PS1 ? 8 : 0);
s16 offsetNew = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
emit->VSTR(ArmCRegs[lastRegIndex].Reg, R9, offsetOld);
_regs[ArmCRegs[lastRegIndex].PPCReg][ArmCRegs[lastRegIndex].PS1].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
ArmCRegs[lastRegIndex].PS1 = PS1;
_regs[preg][PS1].LoadToReg(lastRegIndex);
if (preLoad)
emit->VLDR(ArmCRegs[lastRegIndex].Reg, R9, offsetNew);
return ArmCRegs[lastRegIndex].Reg;
}
ARMReg ArmFPRCache::R0(u32 preg, bool preLoad)
{
return GetPPCReg(preg, false, preLoad);
}
ARMReg ArmFPRCache::R1(u32 preg, bool preLoad)
{
return GetPPCReg(preg, true, preLoad);
}
void ArmFPRCache::Flush(FlushMode mode)
{
for (u8 a = 0; a < 32; ++a)
{
if (_regs[a][0].GetType() != REG_NOTLOADED)
{
s16 offset = PPCSTATE_OFF(ps) + (a * 16);
u32 regindex = _regs[a][0].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
}
}
if (_regs[a][1].GetType() != REG_NOTLOADED)
{
s16 offset = PPCSTATE_OFF(ps) + (a * 16) + 8;
u32 regindex = _regs[a][1].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
}
}
}
void ArmFPRCache::StoreFromRegister(u32 preg)
{
if (_regs[preg][0].GetType() != REG_NOTLOADED)
{
s16 offset = PPCSTATE_OFF(ps) + (preg * 16);
u32 regindex = _regs[preg][0].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[preg][0].Flush();
}
if (_regs[preg][1].GetType() != REG_NOTLOADED)
{
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + 8;
u32 regindex = _regs[preg][1].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[preg][1].Flush();
}
}

View file

@ -1,50 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/ArmEmitter.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
#define ARMFPUREGS 32
class ArmFPRCache
{
private:
OpArg _regs[32][2]; // One for each FPR reg
JRCPPC ArmCRegs[ARMFPUREGS];
JRCReg ArmRegs[ARMFPUREGS];
int NUMPPCREG;
int NUMARMREG;
ArmGen::ARMReg *GetAllocationOrder(int &count);
ArmGen::ARMReg *GetPPCAllocationOrder(int &count);
ArmGen::ARMReg GetPPCReg(u32 preg, bool PS1, bool preLoad);
u32 GetLeastUsedRegister(bool increment);
bool FindFreeRegister(u32 &regindex);
protected:
ArmGen::ARMXEmitter *emit;
public:
ArmFPRCache();
~ArmFPRCache() {}
void Init(ArmGen::ARMXEmitter *emitter);
void Start(PPCAnalyst::BlockRegStats &stats);
void SetEmitter(ArmGen::ARMXEmitter *emitter) {emit = emitter;}
ArmGen::ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use.
void Unlock(ArmGen::ARMReg V0);
void Flush(FlushMode mode = FLUSH_ALL);
ArmGen::ARMReg R0(u32 preg, bool preLoad = true); // Returns a cached register
ArmGen::ARMReg R1(u32 preg, bool preLoad = true);
void StoreFromRegister(u32 preg);
};

View file

@ -1,319 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
ArmRegCache::ArmRegCache()
{
emit = nullptr;
}
void ArmRegCache::Init(ARMXEmitter *emitter)
{
emit = emitter;
ARMReg *PPCRegs = GetPPCAllocationOrder(NUMPPCREG);
ARMReg *Regs = GetAllocationOrder(NUMARMREG);
for (u8 a = 0; a < NUMPPCREG; ++a)
{
ArmCRegs[a].PPCReg = 33;
ArmCRegs[a].Reg = PPCRegs[a];
ArmCRegs[a].LastLoad = 0;
}
for (u8 a = 0; a < NUMARMREG; ++a)
{
ArmRegs[a].Reg = Regs[a];
ArmRegs[a].free = true;
}
}
void ArmRegCache::Start(PPCAnalyst::BlockRegStats &stats)
{
// Make sure the state is wiped on Start
// There is a potential for the state remaining dirty from the previous block
// This is due to conditional branches not clearing the register cache state
for (u8 a = 0; a < 32; ++a)
{
if (regs[a].GetType() == REG_REG)
{
u32 regindex = regs[a].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
}
regs[a].Flush();
}
}
ARMReg *ArmRegCache::GetPPCAllocationOrder(int &count)
{
// This will return us the allocation order of the registers we can use on
// the ppc side.
static ARMReg allocationOrder[] =
{
R0, R1, R2, R3, R4, R5, R6, R7
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
}
ARMReg *ArmRegCache::GetAllocationOrder(int &count)
{
// This will return us the allocation order of the registers we can use on
// the host side.
static ARMReg allocationOrder[] =
{
R14, R12, R11, R10
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
}
ARMReg ArmRegCache::GetReg(bool AutoLock)
{
for (u8 a = 0; a < NUMARMREG; ++a)
{
if (ArmRegs[a].free)
{
// Alright, this one is free
if (AutoLock)
ArmRegs[a].free = false;
return ArmRegs[a].Reg;
}
}
// Uh Oh, we have all them locked....
_assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb");
return R0;
}
void ArmRegCache::Unlock(ARMReg R0, ARMReg R1, ARMReg R2, ARMReg R3)
{
for (u8 RegNum = 0; RegNum < NUMARMREG; ++RegNum)
{
if (ArmRegs[RegNum].Reg == R0)
{
_assert_msg_(_DYNA_REC, !ArmRegs[RegNum].free, "This register is already unlocked");
ArmRegs[RegNum].free = true;
}
if (R1 != INVALID_REG && ArmRegs[RegNum].Reg == R1)
ArmRegs[RegNum].free = true;
if (R2 != INVALID_REG && ArmRegs[RegNum].Reg == R2)
ArmRegs[RegNum].free = true;
if (R3 != INVALID_REG && ArmRegs[RegNum].Reg == R3)
ArmRegs[RegNum].free = true;
}
}
u32 ArmRegCache::GetLeastUsedRegister(bool increment)
{
u32 HighestUsed = 0;
u8 lastRegIndex = 0;
for (u8 a = 0; a < NUMPPCREG; ++a)
{
if (increment)
++ArmCRegs[a].LastLoad;
if (ArmCRegs[a].LastLoad > HighestUsed)
{
HighestUsed = ArmCRegs[a].LastLoad;
lastRegIndex = a;
}
}
return lastRegIndex;
}
bool ArmRegCache::FindFreeRegister(u32 &regindex)
{
for (u8 a = 0; a < NUMPPCREG; ++a)
{
if (ArmCRegs[a].PPCReg == 33)
{
regindex = a;
return true;
}
}
return false;
}
ARMReg ArmRegCache::R(u32 preg)
{
if (regs[preg].GetType() == REG_IMM)
return BindToRegister(preg, true, true);
u32 lastRegIndex = GetLeastUsedRegister(true);
// Check if already Loaded
if (regs[preg].GetType() == REG_REG)
{
u8 a = regs[preg].GetRegIndex();
ArmCRegs[a].LastLoad = 0;
return ArmCRegs[a].Reg;
}
// Check if we have a free register
u32 regindex;
if (FindFreeRegister(regindex))
{
emit->LDR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
ArmCRegs[regindex].PPCReg = preg;
ArmCRegs[regindex].LastLoad = 0;
regs[preg].LoadToReg(regindex);
return ArmCRegs[regindex].Reg;
}
// Alright, we couldn't get a free space, dump that least used register
emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4);
emit->LDR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
regs[ArmCRegs[lastRegIndex].PPCReg].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
regs[preg].LoadToReg(lastRegIndex);
return ArmCRegs[lastRegIndex].Reg;
}
void ArmRegCache::BindToRegister(u32 preg, bool doLoad)
{
BindToRegister(preg, doLoad, false);
}
ARMReg ArmRegCache::BindToRegister(u32 preg, bool doLoad, bool kill_imm)
{
u32 lastRegIndex = GetLeastUsedRegister(false);
u32 freeRegIndex;
bool found_free = FindFreeRegister(freeRegIndex);
if (regs[preg].GetType() == REG_IMM)
{
if (!kill_imm)
return INVALID_REG;
if (found_free)
{
if (doLoad)
emit->MOVI2R(ArmCRegs[freeRegIndex].Reg, regs[preg].GetImm());
ArmCRegs[freeRegIndex].PPCReg = preg;
ArmCRegs[freeRegIndex].LastLoad = 0;
regs[preg].LoadToReg(freeRegIndex);
return ArmCRegs[freeRegIndex].Reg;
}
else
{
emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4);
if (doLoad)
emit->MOVI2R(ArmCRegs[lastRegIndex].Reg, regs[preg].GetImm());
regs[ArmCRegs[lastRegIndex].PPCReg].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
regs[preg].LoadToReg(lastRegIndex);
return ArmCRegs[lastRegIndex].Reg;
}
}
else if (regs[preg].GetType() == REG_NOTLOADED)
{
if (found_free)
{
if (doLoad)
emit->LDR(ArmCRegs[freeRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
ArmCRegs[freeRegIndex].PPCReg = preg;
ArmCRegs[freeRegIndex].LastLoad = 0;
regs[preg].LoadToReg(freeRegIndex);
return ArmCRegs[freeRegIndex].Reg;
}
else
{
emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4);
if (doLoad)
emit->LDR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
regs[ArmCRegs[lastRegIndex].PPCReg].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
regs[preg].LoadToReg(lastRegIndex);
return ArmCRegs[lastRegIndex].Reg;
}
}
else
{
u8 a = regs[preg].GetRegIndex();
ArmCRegs[a].LastLoad = 0;
return ArmCRegs[a].Reg;
}
}
void ArmRegCache::SetImmediate(u32 preg, u32 imm)
{
if (regs[preg].GetType() == REG_REG)
{
// Dump real reg at this point
u32 regindex = regs[preg].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
}
regs[preg].LoadToImm(imm);
}
void ArmRegCache::Flush(FlushMode mode)
{
for (u8 a = 0; a < 32; ++a)
{
if (regs[a].GetType() == REG_IMM)
{
if (mode == FLUSH_ALL)
{
// This changes the type over to a REG_REG and gets caught below.
BindToRegister(a, true, true);
}
else
{
ARMReg tmp = GetReg();
emit->MOVI2R(tmp, regs[a].GetImm());
emit->STR(tmp, R9, PPCSTATE_OFF(gpr) + a * 4);
Unlock(tmp);
}
}
if (regs[a].GetType() == REG_REG)
{
u32 regindex = regs[a].GetRegIndex();
emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + a * 4);
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
regs[a].Flush();
}
}
}
}
void ArmRegCache::StoreFromRegister(u32 preg)
{
if (regs[preg].GetType() == REG_IMM)
{
// This changes the type over to a REG_REG and gets caught below.
BindToRegister(preg, true, true);
}
if (regs[preg].GetType() == REG_REG)
{
u32 regindex = regs[preg].GetRegIndex();
emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
regs[preg].Flush();
}
}

View file

@ -1,140 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/ArmEmitter.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/PPCAnalyst.h"
// This ARM Register cache actually pre loads the most used registers before
// the block to increase speed since every memory load requires two
// instructions to load it. We are going to use R0-RMAX as registers for the
// use of PPC Registers.
// Allocation order as follows
#define ARMREGS 16
// Allocate R0 to R9 for PPC first.
// For General registers on the host side, start with R14 and go down as we go
// R13 is reserved for our stack pointer, don't ever use that. Unless you save
// it
// So we have R14, R12, R11, R10 to work with instructions
enum RegType
{
REG_NOTLOADED = 0,
REG_REG, // Reg type is register
REG_IMM, // Reg is really a IMM
REG_AWAY, // Bound to a register, but not preloaded
};
enum FlushMode
{
FLUSH_ALL = 0,
FLUSH_MAINTAIN_STATE,
};
class OpArg
{
private:
RegType m_type; // store type
u8 m_reg; // index to register
u32 m_value; // IMM value
public:
OpArg()
{
m_type = REG_NOTLOADED;
m_reg = 33;
m_value = 0;
}
RegType GetType()
{
return m_type;
}
u8 GetRegIndex()
{
return m_reg;
}
u32 GetImm()
{
return m_value;
}
void LoadToAway(u8 reg)
{
m_type = REG_AWAY;
m_reg = reg;
}
void LoadToReg(u8 reg)
{
m_type = REG_REG;
m_reg = reg;
}
void LoadToImm(u32 imm)
{
m_type = REG_IMM;
m_value = imm;
}
void Flush()
{
m_type = REG_NOTLOADED;
}
};
struct JRCPPC
{
u32 PPCReg; // Tied to which PPC Register
bool PS1;
ArmGen::ARMReg Reg; // Tied to which ARM Register
u32 LastLoad;
};
struct JRCReg
{
ArmGen::ARMReg Reg; // Which reg this is.
bool free;
};
class ArmRegCache
{
private:
OpArg regs[32];
JRCPPC ArmCRegs[ARMREGS];
JRCReg ArmRegs[ARMREGS]; // Four registers remaining
int NUMPPCREG;
int NUMARMREG;
ArmGen::ARMReg *GetAllocationOrder(int &count);
ArmGen::ARMReg *GetPPCAllocationOrder(int &count);
u32 GetLeastUsedRegister(bool increment);
bool FindFreeRegister(u32 &regindex);
// Private function can kill immediates
ArmGen::ARMReg BindToRegister(u32 preg, bool doLoad, bool kill_imm);
protected:
ArmGen::ARMXEmitter *emit;
public:
ArmRegCache();
~ArmRegCache() {}
void Init(ArmGen::ARMXEmitter *emitter);
void Start(PPCAnalyst::BlockRegStats &stats);
ArmGen::ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use.
void Unlock(ArmGen::ARMReg R0, ArmGen::ARMReg R1 = ArmGen::INVALID_REG, ArmGen::ARMReg R2 = ArmGen::INVALID_REG, ArmGen::ARMReg R3 = ArmGen::INVALID_REG);
void Flush(FlushMode mode = FLUSH_ALL);
ArmGen::ARMReg R(u32 preg); // Returns a cached register
bool IsImm(u32 preg) { return regs[preg].GetType() == REG_IMM; }
u32 GetImm(u32 preg) { return regs[preg].GetImm(); }
void SetImmediate(u32 preg, u32 imm);
// Public function doesn't kill immediates
// In reality when you call R(u32) it'll bind an immediate there
void BindToRegister(u32 preg, bool doLoad = true);
void StoreFromRegister(u32 preg);
};

View file

@ -27,11 +27,6 @@
#include "Core/PowerPC/Jit64IL/JitIL_Tables.h"
#endif
#if _M_ARM_32
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArm_Tables.h"
#endif
#if _M_ARM_64
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_Tables.h"
@ -63,11 +58,6 @@ namespace JitInterface
ptr = new JitIL();
break;
#endif
#if _M_ARM_32
case PowerPC::CORE_JITARM:
ptr = new JitArm();
break;
#endif
#if _M_ARM_64
case PowerPC::CORE_JITARM64:
ptr = new JitArm64();
@ -94,11 +84,6 @@ namespace JitInterface
JitILTables::InitTables();
break;
#endif
#if _M_ARM_32
case PowerPC::CORE_JITARM:
JitArmTables::InitTables();
break;
#endif
#if _M_ARM_64
case PowerPC::CORE_JITARM64:
JitArm64Tables::InitTables();

View file

@ -26,8 +26,6 @@ GeneralConfigPane::GeneralConfigPane(wxWindow* parent, wxWindowID id)
#ifdef _M_X86_64
{ 1, _("JIT Recompiler (recommended)") },
{ 2, _("JITIL Recompiler (slower, experimental)") },
#elif defined(_M_ARM_32)
{ 3, _("Arm JIT (experimental)") },
#elif defined(_M_ARM_64)
{ 4, _("Arm64 JIT (experimental)") },
#endif

View file

@ -50,8 +50,6 @@ CJitWindow::CJitWindow(wxWindow* parent, wxWindowID id, const wxPoint& pos,
m_disassembler.reset(GetNewDisassembler("x86"));
#elif defined(_M_ARM_64)
m_disassembler.reset(GetNewDisassembler("aarch64"));
#elif defined(_M_ARM_32)
m_disassembler.reset(GetNewDisassembler("armv7"));
#else
m_disassembler.reset(GetNewDisassembler("UNK"));
#endif

View file

@ -12,7 +12,7 @@
// include order is important
#include <gtest/gtest.h> // NOLINT
#if _M_X86_64 || _M_ARM_32
#if _M_X86_64
enum
{
#ifdef _WIN32