[AArch64] Handle FPR island registers in a less dumb way.

This commit is contained in:
Ryan Houdek 2015-03-02 23:50:33 -06:00
parent 81242dd4a7
commit fbdee7b15f

View file

@ -2877,18 +2877,6 @@ void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp) void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp)
{ {
bool bundled_loadstore = false; bool bundled_loadstore = false;
int num_regs = registers.Count();
if (num_regs == 2)
{
int i = 0;
ARM64Reg regs[2];
for (auto it : registers)
regs[i++] = (ARM64Reg)(Q0 + it);
STP(128, INDEX_PRE, regs[0], regs[1], SP, -32);
return;
}
for (int i = 0; i < 32; ++i) for (int i = 0; i < 32; ++i)
{ {
@ -2906,8 +2894,10 @@ void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp)
if (bundled_loadstore && tmp != INVALID_REG) if (bundled_loadstore && tmp != INVALID_REG)
{ {
int num_regs = registers.Count();
m_emit->SUB(SP, SP, num_regs * 16); m_emit->SUB(SP, SP, num_regs * 16);
m_emit->ADD(tmp, SP, 0); m_emit->ADD(tmp, SP, 0);
std::vector<ARM64Reg> island_regs;
for (int i = 0; i < 32; ++i) for (int i = 0; i < 32; ++i)
{ {
if (!registers[i]) if (!registers[i])
@ -2922,15 +2912,42 @@ void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp)
// 4 < 4 && registers[i + 4] false! // 4 < 4 && registers[i + 4] false!
while (++count < 4 && (i + count) < 32 && registers[i + count]) {} while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
ST1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), tmp); if (count == 1)
island_regs.push_back((ARM64Reg)(Q0 + i));
else
ST1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), tmp);
i += count - 1; i += count - 1;
} }
// Handle island registers
std::vector<ARM64Reg> pair_regs;
for (auto& it : island_regs)
{
pair_regs.push_back(it);
if (pair_regs.size() == 2)
{
STP(128, INDEX_POST, pair_regs[0], pair_regs[1], tmp, 32);
pair_regs.clear();
}
}
if (pair_regs.size())
STR(128, INDEX_POST, pair_regs[0], tmp, 16);
} }
else else
{ {
std::vector<ARM64Reg> pair_regs;
for (auto it : registers) for (auto it : registers)
STR(128, INDEX_PRE, (ARM64Reg)(Q0 + it), SP, -16); {
pair_regs.push_back((ARM64Reg)(Q0 + it));
if (pair_regs.size() == 2)
{
STP(128, INDEX_PRE, pair_regs[0], pair_regs[1], SP, -32);
pair_regs.clear();
}
}
if (pair_regs.size())
STR(128, INDEX_PRE, pair_regs[0], SP, -16);
} }
} }
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp) void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp)
@ -2938,17 +2955,6 @@ void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp)
bool bundled_loadstore = false; bool bundled_loadstore = false;
int num_regs = registers.Count(); int num_regs = registers.Count();
if (num_regs == 2)
{
int i = 0;
ARM64Reg regs[2];
for (auto it : registers)
regs[i++] = (ARM64Reg)(Q0 + it);
LDP(128, INDEX_POST, regs[0], regs[1], SP, 32);
return;
}
for (int i = 0; i < 32; ++i) for (int i = 0; i < 32; ++i)
{ {
if (!registers[i]) if (!registers[i])
@ -2966,6 +2972,7 @@ void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp)
if (bundled_loadstore && tmp != INVALID_REG) if (bundled_loadstore && tmp != INVALID_REG)
{ {
// The temporary register is only used to indicate that we can use this code path // The temporary register is only used to indicate that we can use this code path
std::vector<ARM64Reg> island_regs;
for (int i = 0; i < 32; ++i) for (int i = 0; i < 32; ++i)
{ {
if (!registers[i]) if (!registers[i])
@ -2974,19 +2981,52 @@ void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp)
int count = 0; int count = 0;
while (++count < 4 && (i + count) < 32 && registers[i + count]) {} while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
LD1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), SP); if (count == 1)
island_regs.push_back((ARM64Reg)(Q0 + i));
else
LD1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), SP);
i += count - 1; i += count - 1;
} }
// Handle island registers
std::vector<ARM64Reg> pair_regs;
for (auto& it : island_regs)
{
pair_regs.push_back(it);
if (pair_regs.size() == 2)
{
LDP(128, INDEX_POST, pair_regs[0], pair_regs[1], SP, 32);
pair_regs.clear();
}
}
if (pair_regs.size())
LDR(128, INDEX_POST, pair_regs[0], SP, 16);
} }
else else
{ {
bool odd = num_regs % 2;
std::vector<ARM64Reg> pair_regs;
for (int i = 31; i >= 0; --i) for (int i = 31; i >= 0; --i)
{ {
if (!registers[i]) if (!registers[i])
continue; continue;
LDR(128, INDEX_POST, (ARM64Reg)(Q0 + i), SP, 16); if (odd)
{
// First load must be a regular LDR if odd
odd = false;
LDR(128, INDEX_POST, (ARM64Reg)(Q0 + i), SP, 16);
}
else
{
pair_regs.push_back((ARM64Reg)(Q0 + i));
if (pair_regs.size() == 2)
{
LDP(128, INDEX_POST, pair_regs[1], pair_regs[0], SP, 32);
pair_regs.clear();
}
}
} }
} }
} }