Merge pull request #1163 from FearlessTobi/add-audio-stretching

audio_core: Add audio stretching support
This commit is contained in:
bunnei
2018-09-12 18:23:54 -04:00
committed by GitHub
21 changed files with 463 additions and 49 deletions

View File

@ -17,6 +17,8 @@ add_library(audio_core STATIC
sink_stream.h
stream.cpp
stream.h
time_stretch.cpp
time_stretch.h
$<$<BOOL:${ENABLE_CUBEB}>:cubeb_sink.cpp cubeb_sink.h>
)
@ -24,6 +26,7 @@ add_library(audio_core STATIC
create_target_directory_groups(audio_core)
target_link_libraries(audio_core PUBLIC common core)
target_link_libraries(audio_core PRIVATE SoundTouch)
if(ENABLE_CUBEB)
target_link_libraries(audio_core PRIVATE cubeb)

View File

@ -3,27 +3,23 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <atomic>
#include <cstring>
#include <mutex>
#include "audio_core/cubeb_sink.h"
#include "audio_core/stream.h"
#include "audio_core/time_stretch.h"
#include "common/logging/log.h"
#include "common/ring_buffer.h"
#include "core/settings.h"
namespace AudioCore {
class SinkStreamImpl final : public SinkStream {
class CubebSinkStream final : public SinkStream {
public:
SinkStreamImpl(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
const std::string& name)
: ctx{ctx}, num_channels{num_channels_} {
if (num_channels == 6) {
// 6-channel audio does not seem to work with cubeb + SDL, so we downsample this to 2
// channel for now
is_6_channel = true;
num_channels = 2;
}
CubebSinkStream(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
const std::string& name)
: ctx{ctx}, num_channels{std::min(num_channels_, 2u)}, time_stretch{sample_rate,
num_channels} {
cubeb_stream_params params{};
params.rate = sample_rate;
@ -38,7 +34,7 @@ public:
if (cubeb_stream_init(ctx, &stream_backend, name.c_str(), nullptr, nullptr, output_device,
&params, std::max(512u, minimum_latency),
&SinkStreamImpl::DataCallback, &SinkStreamImpl::StateCallback,
&CubebSinkStream::DataCallback, &CubebSinkStream::StateCallback,
this) != CUBEB_OK) {
LOG_CRITICAL(Audio_Sink, "Error initializing cubeb stream");
return;
@ -50,7 +46,7 @@ public:
}
}
~SinkStreamImpl() {
~CubebSinkStream() {
if (!ctx) {
return;
}
@ -62,27 +58,32 @@ public:
cubeb_stream_destroy(stream_backend);
}
void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) override {
if (!ctx) {
void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override {
if (source_num_channels > num_channels) {
// Downsample 6 channels to 2
std::vector<s16> buf;
buf.reserve(samples.size() * num_channels / source_num_channels);
for (size_t i = 0; i < samples.size(); i += source_num_channels) {
for (size_t ch = 0; ch < num_channels; ch++) {
buf.push_back(samples[i + ch]);
}
}
queue.Push(buf);
return;
}
std::lock_guard lock{queue_mutex};
queue.Push(samples);
}
queue.reserve(queue.size() + samples.size() * GetNumChannels());
size_t SamplesInQueue(u32 num_channels) const override {
if (!ctx)
return 0;
if (is_6_channel) {
// Downsample 6 channels to 2
const size_t sample_count_copy_size = samples.size() * 2;
queue.reserve(sample_count_copy_size);
for (size_t i = 0; i < samples.size(); i += num_channels) {
queue.push_back(samples[i]);
queue.push_back(samples[i + 1]);
}
} else {
// Copy as-is
std::copy(samples.begin(), samples.end(), std::back_inserter(queue));
}
return queue.Size() / num_channels;
}
void Flush() override {
should_flush = true;
}
u32 GetNumChannels() const {
@ -95,10 +96,11 @@ private:
cubeb* ctx{};
cubeb_stream* stream_backend{};
u32 num_channels{};
bool is_6_channel{};
std::mutex queue_mutex;
std::vector<s16> queue;
Common::RingBuffer<s16, 0x10000> queue;
std::array<s16, 2> last_frame;
std::atomic<bool> should_flush{};
TimeStretcher time_stretch;
static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
void* output_buffer, long num_frames);
@ -144,38 +146,52 @@ CubebSink::~CubebSink() {
SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
const std::string& name) {
sink_streams.push_back(
std::make_unique<SinkStreamImpl>(ctx, sample_rate, num_channels, output_device, name));
std::make_unique<CubebSinkStream>(ctx, sample_rate, num_channels, output_device, name));
return *sink_streams.back();
}
long SinkStreamImpl::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
void* output_buffer, long num_frames) {
SinkStreamImpl* impl = static_cast<SinkStreamImpl*>(user_data);
long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
void* output_buffer, long num_frames) {
CubebSinkStream* impl = static_cast<CubebSinkStream*>(user_data);
u8* buffer = reinterpret_cast<u8*>(output_buffer);
if (!impl) {
return {};
}
std::lock_guard lock{impl->queue_mutex};
const size_t num_channels = impl->GetNumChannels();
const size_t samples_to_write = num_channels * num_frames;
size_t samples_written;
const size_t frames_to_write{
std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))};
if (Settings::values.enable_audio_stretching) {
const std::vector<s16> in{impl->queue.Pop()};
const size_t num_in{in.size() / num_channels};
s16* const out{reinterpret_cast<s16*>(buffer)};
const size_t out_frames = impl->time_stretch.Process(in.data(), num_in, out, num_frames);
samples_written = out_frames * num_channels;
memcpy(buffer, impl->queue.data(), frames_to_write * sizeof(s16) * impl->GetNumChannels());
impl->queue.erase(impl->queue.begin(),
impl->queue.begin() + frames_to_write * impl->GetNumChannels());
if (impl->should_flush) {
impl->time_stretch.Flush();
impl->should_flush = false;
}
} else {
samples_written = impl->queue.Pop(buffer, samples_to_write);
}
if (frames_to_write < num_frames) {
// Fill the rest of the frames with silence
memset(buffer + frames_to_write * sizeof(s16) * impl->GetNumChannels(), 0,
(num_frames - frames_to_write) * sizeof(s16) * impl->GetNumChannels());
if (samples_written >= num_channels) {
std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16),
num_channels * sizeof(s16));
}
// Fill the rest of the frames with last_frame
for (size_t i = samples_written; i < samples_to_write; i += num_channels) {
std::memcpy(buffer + i * sizeof(s16), &impl->last_frame[0], num_channels * sizeof(s16));
}
return num_frames;
}
void SinkStreamImpl::StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) {}
void CubebSinkStream::StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) {}
std::vector<std::string> ListCubebSinkDevices() {
std::vector<std::string> device_list;

View File

@ -21,6 +21,12 @@ public:
private:
struct NullSinkStreamImpl final : SinkStream {
void EnqueueSamples(u32 /*num_channels*/, const std::vector<s16>& /*samples*/) override {}
size_t SamplesInQueue(u32 /*num_channels*/) const override {
return 0;
}
void Flush() override {}
} null_sink_stream;
};

View File

@ -25,6 +25,10 @@ public:
* @param samples Samples in interleaved stereo PCM16 format.
*/
virtual void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) = 0;
virtual std::size_t SamplesInQueue(u32 num_channels) const = 0;
virtual void Flush() = 0;
};
using SinkStreamPtr = std::unique_ptr<SinkStream>;

View File

@ -73,6 +73,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples) {
void Stream::PlayNextBuffer() {
if (!IsPlaying()) {
// Ensure we are in playing state before playing the next buffer
sink_stream.Flush();
return;
}
@ -83,6 +84,7 @@ void Stream::PlayNextBuffer() {
if (queued_buffers.empty()) {
// No queued buffers - we are effectively paused
sink_stream.Flush();
return;
}
@ -90,6 +92,7 @@ void Stream::PlayNextBuffer() {
queued_buffers.pop();
VolumeAdjustSamples(active_buffer->Samples());
sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});

View File

@ -0,0 +1,68 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cmath>
#include <cstddef>
#include "audio_core/time_stretch.h"
#include "common/logging/log.h"
namespace AudioCore {
TimeStretcher::TimeStretcher(u32 sample_rate, u32 channel_count)
: m_sample_rate(sample_rate), m_channel_count(channel_count) {
m_sound_touch.setChannels(channel_count);
m_sound_touch.setSampleRate(sample_rate);
m_sound_touch.setPitch(1.0);
m_sound_touch.setTempo(1.0);
}
void TimeStretcher::Clear() {
m_sound_touch.clear();
}
void TimeStretcher::Flush() {
m_sound_touch.flush();
}
size_t TimeStretcher::Process(const s16* in, size_t num_in, s16* out, size_t num_out) {
const double time_delta = static_cast<double>(num_out) / m_sample_rate; // seconds
// We were given actual_samples number of samples, and num_samples were requested from us.
double current_ratio = static_cast<double>(num_in) / static_cast<double>(num_out);
const double max_latency = 1.0; // seconds
const double max_backlog = m_sample_rate * max_latency;
const double backlog_fullness = m_sound_touch.numSamples() / max_backlog;
if (backlog_fullness > 5.0) {
// Too many samples in backlog: Don't push anymore on
num_in = 0;
}
// We ideally want the backlog to be about 50% full.
// This gives some headroom both ways to prevent underflow and overflow.
// We tweak current_ratio to encourage this.
constexpr double tweak_time_scale = 0.05; // seconds
const double tweak_correction = (backlog_fullness - 0.5) * (time_delta / tweak_time_scale);
current_ratio *= std::pow(1.0 + 2.0 * tweak_correction, tweak_correction < 0 ? 3.0 : 1.0);
// This low-pass filter smoothes out variance in the calculated stretch ratio.
// The time-scale determines how responsive this filter is.
constexpr double lpf_time_scale = 2.0; // seconds
const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);
// Place a lower limit of 5% speed. When a game boots up, there will be
// many silence samples. These do not need to be timestretched.
m_stretch_ratio = std::max(m_stretch_ratio, 0.05);
m_sound_touch.setTempo(m_stretch_ratio);
LOG_DEBUG(Audio, "{:5}/{:5} ratio:{:0.6f} backlog:{:0.6f}", num_in, num_out, m_stretch_ratio,
backlog_fullness);
m_sound_touch.putSamples(in, num_in);
return m_sound_touch.receiveSamples(out, num_out);
}
} // namespace AudioCore

View File

@ -0,0 +1,36 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <SoundTouch.h>
#include "common/common_types.h"
namespace AudioCore {
class TimeStretcher {
public:
TimeStretcher(u32 sample_rate, u32 channel_count);
/// @param in Input sample buffer
/// @param num_in Number of input frames in `in`
/// @param out Output sample buffer
/// @param num_out Desired number of output frames in `out`
/// @returns Actual number of frames written to `out`
size_t Process(const s16* in, size_t num_in, s16* out, size_t num_out);
void Clear();
void Flush();
private:
u32 m_sample_rate;
u32 m_channel_count;
soundtouch::SoundTouch m_sound_touch;
double m_stretch_ratio = 1.0;
};
} // namespace AudioCore