Merge pull request #1163 from FearlessTobi/add-audio-stretching

audio_core: Add audio stretching support
2025-06-19 21:17:53 -05:00 · 2018-09-12 18:23:54 -04:00
parent 49c4fe1f2f 957ddab679
commit 926dd41587
21 changed files with 463 additions and 49 deletions
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@ -17,6 +17,8 @@ add_library(audio_core STATIC
    sink_stream.h
    stream.cpp
    stream.h
+    time_stretch.cpp
+    time_stretch.h

    $<$<BOOL:${ENABLE_CUBEB}>:cubeb_sink.cpp cubeb_sink.h>
 )
@ -24,6 +26,7 @@ add_library(audio_core STATIC
 create_target_directory_groups(audio_core)

 target_link_libraries(audio_core PUBLIC common core)
+target_link_libraries(audio_core PRIVATE SoundTouch)

 if(ENABLE_CUBEB)
    target_link_libraries(audio_core PRIVATE cubeb)
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@ -3,27 +3,23 @@
 // Refer to the license.txt file included.

 #include <algorithm>
+#include <atomic>
 #include <cstring>
-#include <mutex>
-
 #include "audio_core/cubeb_sink.h"
 #include "audio_core/stream.h"
+#include "audio_core/time_stretch.h"
 #include "common/logging/log.h"
+#include "common/ring_buffer.h"
+#include "core/settings.h"

 namespace AudioCore {

-class SinkStreamImpl final : public SinkStream {
+class CubebSinkStream final : public SinkStream {
 public:
-    SinkStreamImpl(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
-                   const std::string& name)
-        : ctx{ctx}, num_channels{num_channels_} {
-
-        if (num_channels == 6) {
-            // 6-channel audio does not seem to work with cubeb + SDL, so we downsample this to 2
-            // channel for now
-            is_6_channel = true;
-            num_channels = 2;
-        }
+    CubebSinkStream(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
+                    const std::string& name)
+        : ctx{ctx}, num_channels{std::min(num_channels_, 2u)}, time_stretch{sample_rate,
+                                                                            num_channels} {

        cubeb_stream_params params{};
        params.rate = sample_rate;
@ -38,7 +34,7 @@ public:

        if (cubeb_stream_init(ctx, &stream_backend, name.c_str(), nullptr, nullptr, output_device,
                              &params, std::max(512u, minimum_latency),
-                              &SinkStreamImpl::DataCallback, &SinkStreamImpl::StateCallback,
+                              &CubebSinkStream::DataCallback, &CubebSinkStream::StateCallback,
                              this) != CUBEB_OK) {
            LOG_CRITICAL(Audio_Sink, "Error initializing cubeb stream");
            return;
@ -50,7 +46,7 @@ public:
        }
    }

-    ~SinkStreamImpl() {
+    ~CubebSinkStream() {
        if (!ctx) {
            return;
        }
@ -62,27 +58,32 @@ public:
        cubeb_stream_destroy(stream_backend);
    }

-    void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) override {
-        if (!ctx) {
+    void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override {
+        if (source_num_channels > num_channels) {
+            // Downsample 6 channels to 2
+            std::vector<s16> buf;
+            buf.reserve(samples.size() * num_channels / source_num_channels);
+            for (size_t i = 0; i < samples.size(); i += source_num_channels) {
+                for (size_t ch = 0; ch < num_channels; ch++) {
+                    buf.push_back(samples[i + ch]);
+                }
+            }
+            queue.Push(buf);
            return;
        }

-        std::lock_guard lock{queue_mutex};
+        queue.Push(samples);
+    }

-        queue.reserve(queue.size() + samples.size() * GetNumChannels());
+    size_t SamplesInQueue(u32 num_channels) const override {
+        if (!ctx)
+            return 0;

-        if (is_6_channel) {
-            // Downsample 6 channels to 2
-            const size_t sample_count_copy_size = samples.size() * 2;
-            queue.reserve(sample_count_copy_size);
-            for (size_t i = 0; i < samples.size(); i += num_channels) {
-                queue.push_back(samples[i]);
-                queue.push_back(samples[i + 1]);
-            }
-        } else {
-            // Copy as-is
-            std::copy(samples.begin(), samples.end(), std::back_inserter(queue));
-        }
+        return queue.Size() / num_channels;
+    }
+
+    void Flush() override {
+        should_flush = true;
    }

    u32 GetNumChannels() const {
@ -95,10 +96,11 @@ private:
    cubeb* ctx{};
    cubeb_stream* stream_backend{};
    u32 num_channels{};
-    bool is_6_channel{};

-    std::mutex queue_mutex;
-    std::vector<s16> queue;
+    Common::RingBuffer<s16, 0x10000> queue;
+    std::array<s16, 2> last_frame;
+    std::atomic<bool> should_flush{};
+    TimeStretcher time_stretch;

    static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
                             void* output_buffer, long num_frames);
@ -144,38 +146,52 @@ CubebSink::~CubebSink() {
 SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
                                         const std::string& name) {
    sink_streams.push_back(
-        std::make_unique<SinkStreamImpl>(ctx, sample_rate, num_channels, output_device, name));
+        std::make_unique<CubebSinkStream>(ctx, sample_rate, num_channels, output_device, name));
    return *sink_streams.back();
 }

-long SinkStreamImpl::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
-                                  void* output_buffer, long num_frames) {
-    SinkStreamImpl* impl = static_cast<SinkStreamImpl*>(user_data);
+long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
+                                   void* output_buffer, long num_frames) {
+    CubebSinkStream* impl = static_cast<CubebSinkStream*>(user_data);
    u8* buffer = reinterpret_cast<u8*>(output_buffer);

    if (!impl) {
        return {};
    }

-    std::lock_guard lock{impl->queue_mutex};
+    const size_t num_channels = impl->GetNumChannels();
+    const size_t samples_to_write = num_channels * num_frames;
+    size_t samples_written;

-    const size_t frames_to_write{
-        std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))};
+    if (Settings::values.enable_audio_stretching) {
+        const std::vector<s16> in{impl->queue.Pop()};
+        const size_t num_in{in.size() / num_channels};
+        s16* const out{reinterpret_cast<s16*>(buffer)};
+        const size_t out_frames = impl->time_stretch.Process(in.data(), num_in, out, num_frames);
+        samples_written = out_frames * num_channels;

-    memcpy(buffer, impl->queue.data(), frames_to_write * sizeof(s16) * impl->GetNumChannels());
-    impl->queue.erase(impl->queue.begin(),
-                      impl->queue.begin() + frames_to_write * impl->GetNumChannels());
+        if (impl->should_flush) {
+            impl->time_stretch.Flush();
+            impl->should_flush = false;
+        }
+    } else {
+        samples_written = impl->queue.Pop(buffer, samples_to_write);
+    }

-    if (frames_to_write < num_frames) {
-        // Fill the rest of the frames with silence
-        memset(buffer + frames_to_write * sizeof(s16) * impl->GetNumChannels(), 0,
-               (num_frames - frames_to_write) * sizeof(s16) * impl->GetNumChannels());
+    if (samples_written >= num_channels) {
+        std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16),
+                    num_channels * sizeof(s16));
+    }
+
+    // Fill the rest of the frames with last_frame
+    for (size_t i = samples_written; i < samples_to_write; i += num_channels) {
+        std::memcpy(buffer + i * sizeof(s16), &impl->last_frame[0], num_channels * sizeof(s16));
    }

    return num_frames;
 }

-void SinkStreamImpl::StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) {}
+void CubebSinkStream::StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) {}

 std::vector<std::string> ListCubebSinkDevices() {
    std::vector<std::string> device_list;
--- a/src/audio_core/null_sink.h
+++ b/src/audio_core/null_sink.h
@ -21,6 +21,12 @@ public:
 private:
    struct NullSinkStreamImpl final : SinkStream {
        void EnqueueSamples(u32 /*num_channels*/, const std::vector<s16>& /*samples*/) override {}
+
+        size_t SamplesInQueue(u32 /*num_channels*/) const override {
+            return 0;
+        }
+
+        void Flush() override {}
    } null_sink_stream;
 };

--- a/src/audio_core/sink_stream.h
+++ b/src/audio_core/sink_stream.h
@ -25,6 +25,10 @@ public:
     * @param samples Samples in interleaved stereo PCM16 format.
     */
    virtual void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) = 0;
+
+    virtual std::size_t SamplesInQueue(u32 num_channels) const = 0;
+
+    virtual void Flush() = 0;
 };

 using SinkStreamPtr = std::unique_ptr<SinkStream>;
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@ -73,6 +73,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples) {
 void Stream::PlayNextBuffer() {
    if (!IsPlaying()) {
        // Ensure we are in playing state before playing the next buffer
+        sink_stream.Flush();
        return;
    }

@ -83,6 +84,7 @@ void Stream::PlayNextBuffer() {

    if (queued_buffers.empty()) {
        // No queued buffers - we are effectively paused
+        sink_stream.Flush();
        return;
    }

@ -90,6 +92,7 @@ void Stream::PlayNextBuffer() {
    queued_buffers.pop();

    VolumeAdjustSamples(active_buffer->Samples());
+
    sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());

    CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
--- a/src/audio_core/time_stretch.cpp
+++ b/src/audio_core/time_stretch.cpp
@ -0,0 +1,68 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include "audio_core/time_stretch.h"
+#include "common/logging/log.h"
+
+namespace AudioCore {
+
+TimeStretcher::TimeStretcher(u32 sample_rate, u32 channel_count)
+    : m_sample_rate(sample_rate), m_channel_count(channel_count) {
+    m_sound_touch.setChannels(channel_count);
+    m_sound_touch.setSampleRate(sample_rate);
+    m_sound_touch.setPitch(1.0);
+    m_sound_touch.setTempo(1.0);
+}
+
+void TimeStretcher::Clear() {
+    m_sound_touch.clear();
+}
+
+void TimeStretcher::Flush() {
+    m_sound_touch.flush();
+}
+
+size_t TimeStretcher::Process(const s16* in, size_t num_in, s16* out, size_t num_out) {
+    const double time_delta = static_cast<double>(num_out) / m_sample_rate; // seconds
+
+    // We were given actual_samples number of samples, and num_samples were requested from us.
+    double current_ratio = static_cast<double>(num_in) / static_cast<double>(num_out);
+
+    const double max_latency = 1.0; // seconds
+    const double max_backlog = m_sample_rate * max_latency;
+    const double backlog_fullness = m_sound_touch.numSamples() / max_backlog;
+    if (backlog_fullness > 5.0) {
+        // Too many samples in backlog: Don't push anymore on
+        num_in = 0;
+    }
+
+    // We ideally want the backlog to be about 50% full.
+    // This gives some headroom both ways to prevent underflow and overflow.
+    // We tweak current_ratio to encourage this.
+    constexpr double tweak_time_scale = 0.05; // seconds
+    const double tweak_correction = (backlog_fullness - 0.5) * (time_delta / tweak_time_scale);
+    current_ratio *= std::pow(1.0 + 2.0 * tweak_correction, tweak_correction < 0 ? 3.0 : 1.0);
+
+    // This low-pass filter smoothes out variance in the calculated stretch ratio.
+    // The time-scale determines how responsive this filter is.
+    constexpr double lpf_time_scale = 2.0; // seconds
+    const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
+    m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);
+
+    // Place a lower limit of 5% speed.  When a game boots up, there will be
+    // many silence samples.  These do not need to be timestretched.
+    m_stretch_ratio = std::max(m_stretch_ratio, 0.05);
+    m_sound_touch.setTempo(m_stretch_ratio);
+
+    LOG_DEBUG(Audio, "{:5}/{:5} ratio:{:0.6f} backlog:{:0.6f}", num_in, num_out, m_stretch_ratio,
+              backlog_fullness);
+
+    m_sound_touch.putSamples(in, num_in);
+    return m_sound_touch.receiveSamples(out, num_out);
+}
+
+} // namespace AudioCore
--- a/src/audio_core/time_stretch.h
+++ b/src/audio_core/time_stretch.h
@ -0,0 +1,36 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <SoundTouch.h>
+#include "common/common_types.h"
+
+namespace AudioCore {
+
+class TimeStretcher {
+public:
+    TimeStretcher(u32 sample_rate, u32 channel_count);
+
+    /// @param in       Input sample buffer
+    /// @param num_in   Number of input frames in `in`
+    /// @param out      Output sample buffer
+    /// @param num_out  Desired number of output frames in `out`
+    /// @returns Actual number of frames written to `out`
+    size_t Process(const s16* in, size_t num_in, s16* out, size_t num_out);
+
+    void Clear();
+
+    void Flush();
+
+private:
+    u32 m_sample_rate;
+    u32 m_channel_count;
+    soundtouch::SoundTouch m_sound_touch;
+    double m_stretch_ratio = 1.0;
+};
+
+} // namespace AudioCore