dolphin/Source/Core/DSPCore/Src/DSPIntUtil.h
pierre fc1db5eaa0 Core/DSPCore: Reorganize register layout for accessing accumulators
(acc and ax) and product register with one read/write.

Gives a minuscule speedup of not more than 4%. In exchange, breaks all
your out-of-tree changes to dsp. Tests are not building again, yet.


git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6680 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-12-29 02:12:06 +00:00

367 lines
8.8 KiB
C

/*====================================================================
filename: gdsp_opcodes_helper.h
project: GameCube DSP Tool (gcdsp)
created: 2005.03.04
mail: duddie@walla.com
Copyright (c) 2005 Duddie
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
====================================================================*/
#ifndef _DSP_INT_UTIL_H
#define _DSP_INT_UTIL_H
#include "Common.h"
#include "DSPInterpreter.h"
#include "DSPCore.h"
#include "DSPMemoryMap.h"
#include "DSPStacks.h"
// ---------------------------------------------------------------------------------------
// --- SR
// ---------------------------------------------------------------------------------------
inline void dsp_SR_set_flag(int flag)
{
g_dsp._r.sr |= flag;
}
inline bool dsp_SR_is_flag_set(int flag)
{
return (g_dsp._r.sr & flag) != 0;
}
// ---------------------------------------------------------------------------------------
// --- AR increments, decrements
// ---------------------------------------------------------------------------------------
// NextPowerOf2()-1
inline u16 ToMask(u16 a)
{
a = a | (a >> 8);
a = a | (a >> 4);
a = a | (a >> 2);
return a | (a >> 1);
}
inline u16 dsp_increase_addr_reg(u16 reg, s16 ix)
{
u16 ar = g_dsp._r.ar[reg];
u16 wr = g_dsp._r.wr[reg];
u16 m = ToMask(wr) | 1;
u16 nar = ar+ix;
if (ix >= 0) {
if((ar&m) + (int)(ix&m) -(int)m-1 >= 0)
nar -= wr+1;
} else {
if((ar&m) + (int)(ix&m) -(int)m-1 < m-wr)
nar += wr+1;
}
return nar;
}
inline u16 dsp_decrease_addr_reg(u16 reg, s16 ix)
{
u16 ar = g_dsp._r.ar[reg];
u16 wr = g_dsp._r.wr[reg];
u16 m = ToMask(wr) | 1;
u16 nar = ar-ix;
if ((u16)ix > 0x8000) { // equiv: ix < 0 && ix != -0x8000
if((ar&m) - (int)(ix&m) >= 0)
nar -= wr+1;
} else {
if((ar&m) - (int)(ix&m) < m-wr)
nar += wr+1;
}
return nar;
}
inline u16 dsp_increment_addr_reg(u16 reg)
{
return dsp_increase_addr_reg(reg, 1);
}
inline u16 dsp_decrement_addr_reg(u16 reg)
{
return dsp_decrease_addr_reg(reg, 1);
}
// ---------------------------------------------------------------------------------------
// --- reg
// ---------------------------------------------------------------------------------------
inline u16 dsp_op_read_reg(int reg)
{
switch (reg & 0x1f) {
case DSP_REG_ST0:
case DSP_REG_ST1:
case DSP_REG_ST2:
case DSP_REG_ST3:
return dsp_reg_load_stack(reg - DSP_REG_ST0);
case DSP_REG_AR0:
case DSP_REG_AR1:
case DSP_REG_AR2:
case DSP_REG_AR3:
return g_dsp._r.ar[reg - DSP_REG_AR0];
case DSP_REG_IX0:
case DSP_REG_IX1:
case DSP_REG_IX2:
case DSP_REG_IX3:
return g_dsp._r.ix[reg - DSP_REG_IX0];
case DSP_REG_WR0:
case DSP_REG_WR1:
case DSP_REG_WR2:
case DSP_REG_WR3:
return g_dsp._r.wr[reg - DSP_REG_WR0];
case DSP_REG_ACH0:
case DSP_REG_ACH1:
return g_dsp._r.ac[reg - DSP_REG_ACH0].h;
case DSP_REG_CR: return g_dsp._r.cr;
case DSP_REG_SR: return g_dsp._r.sr;
case DSP_REG_PRODL: return g_dsp._r.prod.l;
case DSP_REG_PRODM: return g_dsp._r.prod.m;
case DSP_REG_PRODH: return g_dsp._r.prod.h;
case DSP_REG_PRODM2: return g_dsp._r.prod.m2;
case DSP_REG_AXL0:
case DSP_REG_AXL1:
return g_dsp._r.ax[reg - DSP_REG_AXL0].l;
case DSP_REG_AXH0:
case DSP_REG_AXH1:
return g_dsp._r.ax[reg - DSP_REG_AXH0].h;
case DSP_REG_ACL0:
case DSP_REG_ACL1:
return g_dsp._r.ac[reg - DSP_REG_ACL0].l;
case DSP_REG_ACM0:
case DSP_REG_ACM1:
return g_dsp._r.ac[reg - DSP_REG_ACM0].m;
default:
_assert_msg_(DSP_INT, 0, "cannot happen");
return 0;
}
}
inline void dsp_op_write_reg(int reg, u16 val)
{
switch (reg & 0x1f) {
// 8-bit sign extended registers. Should look at prod.h too...
case DSP_REG_ACH0:
case DSP_REG_ACH1:
// sign extend from the bottom 8 bits.
g_dsp._r.ac[reg-DSP_REG_ACH0].h = (u16)(s16)(s8)(u8)val;
break;
// Stack registers.
case DSP_REG_ST0:
case DSP_REG_ST1:
case DSP_REG_ST2:
case DSP_REG_ST3:
dsp_reg_store_stack(reg - DSP_REG_ST0, val);
break;
case DSP_REG_AR0:
case DSP_REG_AR1:
case DSP_REG_AR2:
case DSP_REG_AR3:
g_dsp._r.ar[reg - DSP_REG_AR0] = val;
break;
case DSP_REG_IX0:
case DSP_REG_IX1:
case DSP_REG_IX2:
case DSP_REG_IX3:
g_dsp._r.ix[reg - DSP_REG_IX0] = val;
break;
case DSP_REG_WR0:
case DSP_REG_WR1:
case DSP_REG_WR2:
case DSP_REG_WR3:
g_dsp._r.wr[reg - DSP_REG_WR0] = val;
break;
case DSP_REG_CR: g_dsp._r.cr = val; break;
case DSP_REG_SR: g_dsp._r.sr = val; break;
case DSP_REG_PRODL: g_dsp._r.prod.l = val; break;
case DSP_REG_PRODM: g_dsp._r.prod.m = val; break;
case DSP_REG_PRODH: g_dsp._r.prod.h = val; break;
case DSP_REG_PRODM2: g_dsp._r.prod.m2 = val; break;
case DSP_REG_AXL0:
case DSP_REG_AXL1:
g_dsp._r.ax[reg - DSP_REG_AXL0].l = val;
break;
case DSP_REG_AXH0:
case DSP_REG_AXH1:
g_dsp._r.ax[reg - DSP_REG_AXH0].h = val;
break;
case DSP_REG_ACL0:
case DSP_REG_ACL1:
g_dsp._r.ac[reg - DSP_REG_ACL0].l = val;
break;
case DSP_REG_ACM0:
case DSP_REG_ACM1:
g_dsp._r.ac[reg - DSP_REG_ACM0].m = val;
break;
}
}
inline void dsp_conditional_extend_accum(int reg)
{
switch (reg)
{
case DSP_REG_ACM0:
case DSP_REG_ACM1:
if (g_dsp._r.sr & SR_40_MODE_BIT)
{
// Sign extend into whole accum.
u16 val = g_dsp._r.ac[reg-DSP_REG_ACM0].m;
g_dsp._r.ac[reg - DSP_REG_ACM0].h = (val & 0x8000) ? 0xFFFF : 0x0000;
g_dsp._r.ac[reg - DSP_REG_ACM0].l = 0;
}
}
}
// ---------------------------------------------------------------------------------------
// --- prod
// ---------------------------------------------------------------------------------------
inline s64 dsp_get_long_prod()
{
#if PROFILE
ProfilerAddDelta(g_dsp.err_pc, 1);
#endif
s64 val = (s8)(u8)g_dsp._r.prod.h;
val <<= 32;
s64 low_prod = g_dsp._r.prod.m;
low_prod += g_dsp._r.prod.m2;
low_prod <<= 16;
low_prod |= g_dsp._r.prod.l;
val += low_prod;
return val;
}
inline s64 dsp_get_long_prod_round_prodl()
{
s64 prod = dsp_get_long_prod();
if (prod & 0x10000)
prod = (prod + 0x8000) & ~0xffff;
else
prod = (prod + 0x7fff) & ~0xffff;
return prod;
}
// For accurate emulation, this is wrong - but the real prod registers behave
// in completely bizarre ways. Not needed to emulate them correctly for game ucodes.
inline void dsp_set_long_prod(s64 val)
{
#if PROFILE
ProfilerAddDelta(g_dsp.err_pc, 1);
#endif
g_dsp._r.prod.l = (u16)val;
val >>= 16;
g_dsp._r.prod.m = (u16)val;
val >>= 16;
g_dsp._r.prod.h = /*(s16)(s8)*/(u8)val;//todo: check expansion
g_dsp._r.prod.m2 = 0;
}
// ---------------------------------------------------------------------------------------
// --- ACC - main accumulators (40-bit)
// ---------------------------------------------------------------------------------------
inline s64 dsp_get_long_acc(int reg)
{
#if PROFILE
ProfilerAddDelta(g_dsp.err_pc, 1);
#endif
s64 high = (s64)(s8)g_dsp._r.ac[reg].h << 32;
u32 mid_low = ((u32)g_dsp._r.ac[reg].m << 16) | g_dsp._r.ac[reg].l;
return high | mid_low;
}
inline void dsp_set_long_acc(int _reg, s64 val)
{
#if PROFILE
ProfilerAddDelta(g_dsp.err_pc, 1);
#endif
g_dsp._r.ac[_reg].l = (u16)val;
val >>= 16;
g_dsp._r.ac[_reg].m = (u16)val;
val >>= 16;
g_dsp._r.ac[_reg].h = (u16)(s16)(s8)(u8)val;
}
inline s64 dsp_convert_long_acc(s64 val) // s64 -> s40
{
return ((s64)(s8)(val >> 32))<<32 | (u32)val;
}
inline s64 dsp_round_long_acc(s64 val)
{
if (val & 0x10000)
val = (val + 0x8000) & ~0xffff;
else
val = (val + 0x7fff) & ~0xffff;
return val;
}
inline s16 dsp_get_acc_l(int _reg)
{
return g_dsp._r.ac[_reg].l;
}
inline s16 dsp_get_acc_m(int _reg)
{
return g_dsp._r.ac[_reg].m;
}
inline s16 dsp_get_acc_h(int _reg)
{
return g_dsp._r.ac[_reg].h;
}
// ---------------------------------------------------------------------------------------
// --- AX - extra accumulators (32-bit)
// ---------------------------------------------------------------------------------------
inline s32 dsp_get_long_acx(int _reg)
{
#if PROFILE
ProfilerAddDelta(g_dsp.err_pc, 1);
#endif
return ((u32)g_dsp._r.ax[_reg].h << 16) | g_dsp._r.ax[_reg].l;
}
inline s16 dsp_get_ax_l(int _reg)
{
return (s16)g_dsp._r.ax[_reg].l;
}
inline s16 dsp_get_ax_h(int _reg)
{
return (s16)g_dsp._r.ax[_reg].h;
}
#endif