1
0
Fork 0
yuzu-mirror/src/audio_core/hle/dsp.h
MerryMage 004991d79e DSP: Implement Pipe 2
Pipe 2 is a DSP pipe that is used to initialize both the DSP hardware (the
application signals to the DSP to initialize) and the application (the DSP
provides the memory location of structures in the shared memory region).
2016-03-06 21:25:44 +00:00

539 lines
20 KiB
C++

// Copyright 2016 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <type_traits>
#include "audio_core/audio_core.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/swap.h"
namespace DSP {
namespace HLE {
// The application-accessible region of DSP memory consists of two parts.
// Both are marked as IO and have Read/Write permissions.
//
// First Region: 0x1FF50000 (Size: 0x8000)
// Second Region: 0x1FF70000 (Size: 0x8000)
//
// The DSP reads from each region alternately based on the frame counter for each region much like a
// double-buffer. The frame counter is located as the very last u16 of each region and is incremented
// each audio tick.
struct SharedMemory;
constexpr VAddr region0_base = 0x1FF50000;
extern SharedMemory g_region0;
constexpr VAddr region1_base = 0x1FF70000;
extern SharedMemory g_region1;
/**
* The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from
* its memory regions, the higher and lower 16-bit halves are swapped compared to the little-endian
* layout of the ARM11. Hence from the ARM11's point of view the memory space appears to be
* middle-endian.
*
* Unusually this does not appear to be an issue for floating point numbers. The DSP makes the more
* sensible choice of keeping that little-endian. There are also some exceptions such as the
* IntermediateMixSamples structure, which is little-endian.
*
* This struct implements the conversion to and from this middle-endianness.
*/
struct u32_dsp {
u32_dsp() = default;
operator u32() const {
return Convert(storage);
}
void operator=(u32 new_value) {
storage = Convert(new_value);
}
private:
static constexpr u32 Convert(u32 value) {
return (value << 16) | (value >> 16);
}
u32_le storage;
};
#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
static_assert(std::is_trivially_copyable<u32_dsp>::value, "u32_dsp isn't trivially copyable");
#endif
// There are 15 structures in each memory region. A table of them in the order they appear in memory
// is presented below:
//
// # First Region DSP Address Purpose Control
// 5 0x8400 DSP Status DSP
// 9 0x8410 DSP Debug Info DSP
// 6 0x8540 Final Mix Samples DSP
// 2 0x8680 Source Status [24] DSP
// 8 0x8710 Compressor Table Application
// 4 0x9430 DSP Configuration Application
// 7 0x9492 Intermediate Mix Samples DSP + App
// 1 0x9E92 Source Configuration [24] Application
// 3 0xA792 Source ADPCM Coefficients [24] Application
// 10 0xA912 Surround Sound Related
// 11 0xAA12 Surround Sound Related
// 12 0xAAD2 Surround Sound Related
// 13 0xAC52 Surround Sound Related
// 14 0xAC5C Surround Sound Related
// 0 0xBFFF Frame Counter Application
//
// #: This refers to the order in which they appear in the DspPipe::Audio DSP pipe.
// See also: DSP::HLE::PipeRead.
//
// Note that the above addresses do vary slightly between audio firmwares observed; the addresses are
// not fixed in stone. The addresses above are only an examplar; they're what this implementation
// does and provides to applications.
//
// Application requests the DSP service to convert DSP addresses into ARM11 virtual addresses using the
// ConvertProcessAddressFromDspDram service call. Applications seem to derive the addresses for the
// second region via:
// second_region_dsp_addr = first_region_dsp_addr | 0x10000
//
// Applications maintain most of its own audio state, the memory region is used mainly for
// communication and not storage of state.
//
// In the documentation below, filter and effect transfer functions are specified in the z domain.
// (If you are more familiar with the Laplace transform, z = exp(sT). The z domain is the digital
// frequency domain, just like how the s domain is the analog frequency domain.)
#define INSERT_PADDING_DSPWORDS(num_words) INSERT_PADDING_BYTES(2 * (num_words))
// GCC versions < 5.0 do not implement std::is_trivially_copyable.
// Excluding MSVC because it has weird behaviour for std::is_trivially_copyable.
#if (__GNUC__ >= 5) || defined(__clang__)
#define ASSERT_DSP_STRUCT(name, size) \
static_assert(std::is_standard_layout<name>::value, "DSP structure " #name " doesn't use standard layout"); \
static_assert(std::is_trivially_copyable<name>::value, "DSP structure " #name " isn't trivially copyable"); \
static_assert(sizeof(name) == (size), "Unexpected struct size for DSP structure " #name)
#else
#define ASSERT_DSP_STRUCT(name, size) \
static_assert(std::is_standard_layout<name>::value, "DSP structure " #name " doesn't use standard layout"); \
static_assert(sizeof(name) == (size), "Unexpected struct size for DSP structure " #name)
#endif
struct SourceConfiguration {
struct Configuration {
/// These dirty flags are set by the application when it updates the fields in this struct.
/// The DSP clears these each audio frame.
union {
u32_le dirty_raw;
BitField<2, 1, u32_le> adpcm_coefficients_dirty;
BitField<3, 1, u32_le> partial_embedded_buffer_dirty; ///< Tends to be set when a looped buffer is queued.
BitField<16, 1, u32_le> enable_dirty;
BitField<17, 1, u32_le> interpolation_dirty;
BitField<18, 1, u32_le> rate_multiplier_dirty;
BitField<19, 1, u32_le> buffer_queue_dirty;
BitField<20, 1, u32_le> loop_related_dirty;
BitField<21, 1, u32_le> play_position_dirty; ///< Tends to also be set when embedded buffer is updated.
BitField<22, 1, u32_le> filters_enabled_dirty;
BitField<23, 1, u32_le> simple_filter_dirty;
BitField<24, 1, u32_le> biquad_filter_dirty;
BitField<25, 1, u32_le> gain_0_dirty;
BitField<26, 1, u32_le> gain_1_dirty;
BitField<27, 1, u32_le> gain_2_dirty;
BitField<28, 1, u32_le> sync_dirty;
BitField<29, 1, u32_le> reset_flag;
BitField<31, 1, u32_le> embedded_buffer_dirty;
};
// Gain control
/**
* Gain is between 0.0-1.0. This determines how much will this source appear on
* each of the 12 channels that feed into the intermediate mixers.
* Each of the three intermediate mixers is fed two left and two right channels.
*/
float_le gain[3][4];
// Interpolation
/// Multiplier for sample rate. Resampling occurs with the selected interpolation method.
float_le rate_multiplier;
enum class InterpolationMode : u8 {
None = 0,
Linear = 1,
Polyphase = 2
};
InterpolationMode interpolation_mode;
INSERT_PADDING_BYTES(1); ///< Interpolation related
// Filters
/**
* This is the simplest normalized first-order digital recursive filter.
* The transfer function of this filter is:
* H(z) = b0 / (1 + a1 z^-1)
* Values are signed fixed point with 15 fractional bits.
*/
struct SimpleFilter {
s16_le b0;
s16_le a1;
};
/**
* This is a normalised biquad filter (second-order).
* The transfer function of this filter is:
* H(z) = (b0 + b1 z^-1 + b2 z^-2) / (1 - a1 z^-1 - a2 z^-2)
* Nintendo chose to negate the feedbackward coefficients. This differs from standard notation
* as in: https://ccrma.stanford.edu/~jos/filters/Direct_Form_I.html
* Values are signed fixed point with 14 fractional bits.
*/
struct BiquadFilter {
s16_le b0;
s16_le b1;
s16_le b2;
s16_le a1;
s16_le a2;
};
union {
u16_le filters_enabled;
BitField<0, 1, u16_le> simple_filter_enabled;
BitField<1, 1, u16_le> biquad_filter_enabled;
};
SimpleFilter simple_filter;
BiquadFilter biquad_filter;
// Buffer Queue
/// A buffer of audio data from the application, along with metadata about it.
struct Buffer {
/// Physical memory address of the start of the buffer
u32_dsp physical_address;
/// This is length in terms of samples.
/// Note that in different buffer formats a sample takes up different number of bytes.
u32_dsp length;
/// ADPCM Predictor (4 bits) and Scale (4 bits)
union {
u16_le adpcm_ps;
BitField<0, 4, u16_le> adpcm_scale;
BitField<4, 4, u16_le> adpcm_predictor;
};
/// ADPCM Historical Samples (y[n-1] and y[n-2])
u16_le adpcm_yn[2];
/// This is non-zero when the ADPCM values above are to be updated.
u8 adpcm_dirty;
/// Is a looping buffer.
u8 is_looping;
/// This value is shown in SourceStatus::previous_buffer_id when this buffer has finished.
/// This allows the emulated application to tell what buffer is currently playing
u16_le buffer_id;
INSERT_PADDING_DSPWORDS(1);
};
u16_le buffers_dirty; ///< Bitmap indicating which buffers are dirty (bit i -> buffers[i])
Buffer buffers[4]; ///< Queued Buffers
// Playback controls
u32_dsp loop_related;
u8 enable;
INSERT_PADDING_BYTES(1);
u16_le sync; ///< Application-side sync (See also: SourceStatus::sync)
u32_dsp play_position; ///< Position. (Units: number of samples)
INSERT_PADDING_DSPWORDS(2);
// Embedded Buffer
// This buffer is often the first buffer to be used when initiating audio playback,
// after which the buffer queue is used.
u32_dsp physical_address;
/// This is length in terms of samples.
/// Note a sample takes up different number of bytes in different buffer formats.
u32_dsp length;
enum class MonoOrStereo : u16_le {
Mono = 1,
Stereo = 2
};
enum class Format : u16_le {
PCM8 = 0,
PCM16 = 1,
ADPCM = 2
};
union {
u16_le flags1_raw;
BitField<0, 2, MonoOrStereo> mono_or_stereo;
BitField<2, 2, Format> format;
BitField<5, 1, u16_le> fade_in;
};
/// ADPCM Predictor (4 bit) and Scale (4 bit)
union {
u16_le adpcm_ps;
BitField<0, 4, u16_le> adpcm_scale;
BitField<4, 4, u16_le> adpcm_predictor;
};
/// ADPCM Historical Samples (y[n-1] and y[n-2])
u16_le adpcm_yn[2];
union {
u16_le flags2_raw;
BitField<0, 1, u16_le> adpcm_dirty; ///< Has the ADPCM info above been changed?
BitField<1, 1, u16_le> is_looping; ///< Is this a looping buffer?
};
/// Buffer id of embedded buffer (used as a buffer id in SourceStatus to reference this buffer).
u16_le buffer_id;
};
Configuration config[AudioCore::num_sources];
};
ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192);
ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
struct SourceStatus {
struct Status {
u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.)
u8 previous_buffer_id_dirty; ///< Non-zero when previous_buffer_id changes
u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync
u32_dsp buffer_position; ///< Number of samples into the current buffer
u16_le previous_buffer_id; ///< Updated when a buffer finishes playing
INSERT_PADDING_DSPWORDS(1);
};
Status status[AudioCore::num_sources];
};
ASSERT_DSP_STRUCT(SourceStatus::Status, 12);
struct DspConfiguration {
/// These dirty flags are set by the application when it updates the fields in this struct.
/// The DSP clears these each audio frame.
union {
u32_le dirty_raw;
BitField<8, 1, u32_le> mixer1_enabled_dirty;
BitField<9, 1, u32_le> mixer2_enabled_dirty;
BitField<10, 1, u32_le> delay_effect_0_dirty;
BitField<11, 1, u32_le> delay_effect_1_dirty;
BitField<12, 1, u32_le> reverb_effect_0_dirty;
BitField<13, 1, u32_le> reverb_effect_1_dirty;
BitField<16, 1, u32_le> volume_0_dirty;
BitField<24, 1, u32_le> volume_1_dirty;
BitField<25, 1, u32_le> volume_2_dirty;
BitField<26, 1, u32_le> output_format_dirty;
BitField<27, 1, u32_le> limiter_enabled_dirty;
BitField<28, 1, u32_le> headphones_connected_dirty;
};
/// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for each at the final mixer
float_le volume[3];
INSERT_PADDING_DSPWORDS(3);
enum class OutputFormat : u16_le {
Mono = 0,
Stereo = 1,
Surround = 2
};
OutputFormat output_format;
u16_le limiter_enabled; ///< Not sure of the exact gain equation for the limiter.
u16_le headphones_connected; ///< Application updates the DSP on headphone status.
INSERT_PADDING_DSPWORDS(4); ///< TODO: Surround sound related
INSERT_PADDING_DSPWORDS(2); ///< TODO: Intermediate mixer 1/2 related
u16_le mixer1_enabled;
u16_le mixer2_enabled;
/**
* This is delay with feedback.
* Transfer function:
* H(z) = a z^-N / (1 - b z^-1 + a g z^-N)
* where
* N = frame_count * samples_per_frame
* g, a and b are fixed point with 7 fractional bits
*/
struct DelayEffect {
/// These dirty flags are set by the application when it updates the fields in this struct.
/// The DSP clears these each audio frame.
union {
u16_le dirty_raw;
BitField<0, 1, u16_le> enable_dirty;
BitField<1, 1, u16_le> work_buffer_address_dirty;
BitField<2, 1, u16_le> other_dirty; ///< Set when anything else has been changed
};
u16_le enable;
INSERT_PADDING_DSPWORDS(1);
u16_le outputs;
u32_dsp work_buffer_address; ///< The application allocates a block of memory for the DSP to use as a work buffer.
u16_le frame_count; ///< Frames to delay by
// Coefficients
s16_le g; ///< Fixed point with 7 fractional bits
s16_le a; ///< Fixed point with 7 fractional bits
s16_le b; ///< Fixed point with 7 fractional bits
};
DelayEffect delay_effect[2];
struct ReverbEffect {
INSERT_PADDING_DSPWORDS(26); ///< TODO
};
ReverbEffect reverb_effect[2];
INSERT_PADDING_DSPWORDS(4);
};
ASSERT_DSP_STRUCT(DspConfiguration, 196);
ASSERT_DSP_STRUCT(DspConfiguration::DelayEffect, 20);
ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52);
struct AdpcmCoefficients {
/// Coefficients are signed fixed point with 11 fractional bits.
/// Each source has 16 coefficients associated with it.
s16_le coeff[AudioCore::num_sources][16];
};
ASSERT_DSP_STRUCT(AdpcmCoefficients, 768);
struct DspStatus {
u16_le unknown;
u16_le dropped_frames;
INSERT_PADDING_DSPWORDS(0xE);
};
ASSERT_DSP_STRUCT(DspStatus, 32);
/// Final mixed output in PCM16 stereo format, what you hear out of the speakers.
/// When the application writes to this region it has no effect.
struct FinalMixSamples {
s16_le pcm16[2 * AudioCore::samples_per_frame];
};
ASSERT_DSP_STRUCT(FinalMixSamples, 640);
/// DSP writes output of intermediate mixers 1 and 2 here.
/// Writes to this region by the application edits the output of the intermediate mixers.
/// This seems to be intended to allow the application to do custom effects on the ARM11.
/// Values that exceed s16 range will be clipped by the DSP after further processing.
struct IntermediateMixSamples {
struct Samples {
s32_le pcm32[4][AudioCore::samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian.
};
Samples mix1;
Samples mix2;
};
ASSERT_DSP_STRUCT(IntermediateMixSamples, 5120);
/// Compressor table
struct Compressor {
INSERT_PADDING_DSPWORDS(0xD20); ///< TODO
};
/// There is no easy way to implement this in a HLE implementation.
struct DspDebug {
INSERT_PADDING_DSPWORDS(0x130);
};
ASSERT_DSP_STRUCT(DspDebug, 0x260);
struct SharedMemory {
/// Padding
INSERT_PADDING_DSPWORDS(0x400);
DspStatus dsp_status;
DspDebug dsp_debug;
FinalMixSamples final_samples;
SourceStatus source_statuses;
Compressor compressor;
DspConfiguration dsp_configuration;
IntermediateMixSamples intermediate_mix_samples;
SourceConfiguration source_configurations;
AdpcmCoefficients adpcm_coefficients;
struct {
INSERT_PADDING_DSPWORDS(0x100);
} unknown10;
struct {
INSERT_PADDING_DSPWORDS(0xC0);
} unknown11;
struct {
INSERT_PADDING_DSPWORDS(0x180);
} unknown12;
struct {
INSERT_PADDING_DSPWORDS(0xA);
} unknown13;
struct {
INSERT_PADDING_DSPWORDS(0x13A3);
} unknown14;
u16_le frame_counter;
};
ASSERT_DSP_STRUCT(SharedMemory, 0x8000);
// Structures must have an offset that is a multiple of two.
static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, source_configurations) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, source_statuses) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, adpcm_coefficients) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, dsp_configuration) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, dsp_status) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, final_samples) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, intermediate_mix_samples) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, compressor) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, dsp_debug) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, unknown10) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, unknown11) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, unknown12) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, unknown13) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
static_assert(offsetof(SharedMemory, unknown14) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
#undef INSERT_PADDING_DSPWORDS
#undef ASSERT_DSP_STRUCT
/// Initialize DSP hardware
void Init();
/// Shutdown DSP hardware
void Shutdown();
/**
* Perform processing and updates state of current shared memory buffer.
* This function is called every audio tick before triggering the audio interrupt.
* @return Whether an audio interrupt should be triggered this frame.
*/
bool Tick();
/// Returns a mutable reference to the current region. Current region is selected based on the frame counter.
SharedMemory& CurrentRegion();
} // namespace HLE
} // namespace DSP