✨ Added fourier_transform()

CoolLibs · Nov 5, 2023 · d96cb22 · d96cb22
1 parent 34db1ac
commit d96cb22
Show file tree

Hide file tree

Showing 6 changed files with 198 additions and 43 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -37,7 +37,7 @@ add_subdirectory(lib/libnyquist)
 target_link_libraries(Audio PRIVATE libnyquist::libnyquist)
 
 # ---Add dj_fft---
-target_include_directories(Audio SYSTEM PUBLIC lib/dj_fft)
+target_include_directories(Audio SYSTEM PRIVATE lib/dj_fft)
 
 # ---Add RtAudioWrapper---
 add_subdirectory(lib/RtAudioWrapper)

diff --git a/include/Audio/Audio.hpp b/include/Audio/Audio.hpp
@@ -3,5 +3,5 @@
 #include "../../src/InputStream.hpp"
 #include "../../src/Player.hpp"
 #include "../../src/compute_volume.hpp"
-#include "../../src/load_audio_file.hpp"
-#include "dj_fft.h"
+#include "../../src/fourier_transform.hpp"
+#include "../../src/load_audio_file.hpp"
diff --git a/src/Player.hpp b/src/Player.hpp
@@ -1,5 +1,4 @@
 #pragma once
-
 #include <rtaudio/RtAudio.h>
 #include <cstdint>
 #include <vector>

diff --git a/src/fourier_transform.cpp b/src/fourier_transform.cpp
@@ -0,0 +1,92 @@
+#include "fourier_transform.hpp"
+#include <algorithm>
+#include <complex>
+#include <vector>
+#include "dj_fft.h"
+
+namespace Audio {
+
+template<std::integral T>
+static auto next_power_of_two(T n) -> T
+{
+    if (n != 0 && !(n & (n - 1)))
+        return n; // n is already a power of 2
+
+    T power = 1;
+    while (power < n)
+    {
+        power <<= 1;
+    }
+
+    return power;
+}
+
+/// Since the FFT requires a size that is a power of two, we add 0s at the end of the data.
+/// https://mechanicalvibration.com/Zero_Padding_FFTs.html
+static void zero_pad(std::vector<std::complex<float>>& data)
+{
+    data.resize(next_power_of_two(data.size()));
+}
+
+auto fourier_transform(size_t samples_count, ForEachSample const& for_each_sample, float audio_data_sample_rate, float max_frequency_in_hz) -> Spectrum
+{
+    // Create a vector of complex numbers containing the audio data
+    auto fft_input = std::vector<std::complex<float>>{};
+    fft_input.reserve(next_power_of_two(samples_count));
+    for_each_sample([&](float const sample) {
+        fft_input.emplace_back(sample);
+    });
+
+    // Make sure the size of fft_input is a power of 2.
+    zero_pad(fft_input);
+
+    // Compute the fft
+    auto        fft_output = dj::fft1d(fft_input, dj::fft_dir::DIR_FWD);
+    float const delta_between_frequencies{audio_data_sample_rate / static_cast<float>(fft_output.size())}; // The values in the `fft_output` correspond to frequencies between 0 and sample_rate, evenly spaced.
+
+    // TODO(Audio-Philippe) Instead of computing the fft on a signal with many samples, and then resizing it to fit the requested `max_output_frequency_in_hz`, we could reduce it's sample rate before computing the fft, to minimize the number of frequencies that are computed for nothing (since they will be discarded afterwards anyways).
+
+    // Ignore the redondant information and the frequencies that are above max_frequency_in_hz.
+    {
+        size_t final_size = fft_output.size() / 2; // The second half is a mirror of the first half so we don't need it.
+        if (max_frequency_in_hz != -1.f)
+        {
+            final_size = std::min(
+                final_size,
+                static_cast<size_t>(max_frequency_in_hz / delta_between_frequencies)
+            );
+        }
+        fft_output.resize(final_size);
+    }
+
+    // Compute the amplitude corresponding to each frequency
+    auto spectrum = std::vector<float>{};
+    spectrum.reserve(fft_output.size());
+    std::transform(fft_output.begin(), fft_output.end(), std::back_inserter(spectrum), [](std::complex<float> const z) {
+        return std::abs(z);
+    });
+
+    return {spectrum, delta_between_frequencies};
+}
+
+auto fourier_transform(std::span<float const> audio_data, float audio_data_sample_rate, float max_frequency_in_hz) -> Spectrum
+{
+    return fourier_transform(
+        audio_data.size(), [&](std::function<void(float)> const& callback) {
+            for (float const sample : audio_data)
+                callback(sample);
+        },
+        audio_data_sample_rate, max_frequency_in_hz
+    );
+}
+
+auto Spectrum::at_frequency(float frequency_in_hertz) const -> float
+{
+    assert(frequency_in_hertz >= 0.f);
+    auto const idx = static_cast<size_t>(frequency_in_hertz / frequency_delta_between_values_in_data);
+    if (idx >= data.size())
+        return 0.f;
+    return data[idx];
+}
+
+} // namespace Audio
diff --git a/src/fourier_transform.hpp b/src/fourier_transform.hpp
@@ -0,0 +1,33 @@
+#pragma once
+#include <functional>
+#include <span>
+
+namespace Audio {
+
+/// Function that takes a function and calls it on each audio sample constituting the data you wish to analyze.
+using ForEachSample = std::function<void(std::function<void(float)> const&)>;
+
+struct Spectrum {
+    /// Amplitudes of each frequency, where the first frequency is 0 Hz and they are evenly spaced and there is a delta of `frequency_delta_between_values_in_data` between each frequency.
+    std::vector<float> data;
+    /// In hz
+    float frequency_delta_between_values_in_data;
+
+    /// Evaluates the spectrum at the given frequency (in Hertz) and returns its amplitude.
+    [[nodiscard]] auto at_frequency(float frequency_in_hertz) const -> float;
+};
+
+/// Computes the fourier transform of the given signal.
+/// `for_each_sample` is a function that takes a callback and calls it for each of the samples of your audio data (see our tests for an example).
+/// Note that you should apply a window function to your audio data, to make sure it is 0 at the beginning and the end: https://digitalsoundandmusic.com/2-3-11-windowing-functions-to-eliminate-spectral-leakage/
+/// You can optionally set `max_frequency_in_hz` to tell us the highest frequency that you are interested in, and we will not compute more than that, and return only frequencies up to that value.
+/// NB: If the `samples_count` is not a power of two, we will zero-pad the audio data to reach the next power of two: https://mechanicalvibration.com/Zero_Padding_FFTs.html
+auto fourier_transform(size_t samples_count, ForEachSample const& for_each_sample, float audio_data_sample_rate, float max_frequency_in_hz = -1.f) -> Spectrum;
+
+/// Computes the fourier transform of the given `audio_data` signal.
+/// Note that you should apply a window function to your `audio_data`, to make sure it is 0 at the beginning and the end: https://digitalsoundandmusic.com/2-3-11-windowing-functions-to-eliminate-spectral-leakage/
+/// You can optionally set `max_frequency_in_hz` to tell us the highest frequency that you are interested in, and we will not compute more than that, and return only frequencies up to that value.
+/// NB: If the `samples_count` is not a power of two, we will zero-pad the `audio_data` to reach the next power of two: https://mechanicalvibration.com/Zero_Padding_FFTs.html
+auto fourier_transform(std::span<float const> audio_data, float audio_data_sample_rate, float max_frequency_in_hz = -1.f) -> Spectrum;
+
+} // namespace Audio
diff --git a/tests/tests.cpp b/tests/tests.cpp
@@ -11,6 +11,12 @@
 
 // Learn how to use Dear ImGui: https://coollibs.github.io/contribute/Programming/dear-imgui
 
+static auto window(int64_t idx, int64_t size) -> float
+{
+    float const t = static_cast<float>(idx) / static_cast<float>(size - 1);
+    return 1.f - std::abs(2.f * t - 1.f); // Triangular window
+}
+
 auto main(int argc, char* argv[]) -> int
 {
     const int  exit_code              = doctest::Context{}.run();                   // Run all unit tests
@@ -25,36 +31,46 @@ auto main(int argc, char* argv[]) -> int
             [](RtAudioErrorType /* type */, std::string const& error_message) {
                 std::cerr << error_message << '\n';
             }};
+        static constexpr size_t nb_samples_in_input_stream{512};
+        input_stream.set_nb_of_retained_samples(nb_samples_in_input_stream);
         // Load the audio file
         Audio::load_audio_file(Audio::player(), exe_path::dir() / "../../tests/res/Monteverdi - L'Orfeo, Toccata.mp3");
         Audio::player().play();
 
-        static constexpr int64_t         N = 1024;  // input size NB: Must be a power of 2 for dj::fft1d
-        std::vector<std::complex<float>> myData(N); // input data
+        static constexpr int64_t fft_size{8000};
+        float                    max_spectrum_frequency_in_hz{15000.f};
 
         quick_imgui::loop("Audio tests", [&]() { // Open a window and run all the ImGui-related code
-            for (int64_t i = 0; i < N; i++)
-                myData[static_cast<size_t>(i)] = Audio::player().sample_unaltered_volume(i + Audio::player().current_frame_index(), 0);
-            auto const fftData = dj::fft1d(myData, dj::fft_dir::DIR_FWD);
-            auto       data    = std::vector<float>{};
-            std::transform(fftData.begin(), fftData.end(), std::back_inserter(data), [](auto const x) {
-                return std::abs(x);
-            });
+            auto const spectrum = Audio::fourier_transform(
+                fft_size,
+                [&](std::function<void(float)> const& callback) {
+                    for (int64_t i = 0; i < fft_size; i++)
+                        callback(
+                            window(i, fft_size)
+                            * Audio::player().sample_unaltered_volume(i + Audio::player().current_frame_index())
+                        );
+                },
+                static_cast<float>(Audio::player().audio_data().sample_rate),
+                max_spectrum_frequency_in_hz
+            );
 
             ImGui::Begin("Audio tests");
             // Player
             ImGui::SeparatorText("Player");
             if (ImGui::Button(Audio::player().properties().is_muted ? "Unmute" : "Mute"))
                 Audio::player().properties().is_muted = !Audio::player().properties().is_muted;
-            ImGui::PlotHistogram(
+            ImGui::PlotLines(
                 "Spectrum",
-                data.data(),
-                static_cast<int>(data.size()) / 2, // The second half is a mirror of the first half, so ne need to display it.
+                spectrum.data.data(),
+                static_cast<int>(spectrum.data.size()),
                 0, nullptr,
                 0.f, 1.f,
                 {0.f, 100.f}
             );
+            ImGui::SliderFloat("Max spectrum frequency", &max_spectrum_frequency_in_hz, 0.f, 22000.f, "%.0f Hertz");
+
             // Input stream
+            ImGui::NewLine();
             ImGui::SeparatorText("Input stream");
             auto const input_device_ids = input_stream.device_ids();
             if (ImGui::BeginCombo("Input device", input_stream.current_device_name().c_str()))
@@ -72,11 +88,11 @@ auto main(int argc, char* argv[]) -> int
                 ImGui::EndCombo();
             }
             auto data_from_input_stream = std::vector<float>{};
-            input_stream.for_each_sample(512, [&](float const sample) {
+            input_stream.for_each_sample(nb_samples_in_input_stream, [&](float const sample) {
                 data_from_input_stream.push_back(sample);
             });
-            ImGui::PlotHistogram(
-                "Input Data",
+            ImGui::PlotLines(
+                "Waveform",
                 data_from_input_stream.data(),
                 static_cast<int>(data_from_input_stream.size()),
                 0, nullptr,
@@ -115,32 +131,47 @@ TEST_CASE("Loading a .mp3 file")
     CHECK(Audio::player().audio_data().samples.size() == 9819648);
 }
 
-TEST_CASE("FFT")
+static auto is_big(float x) -> bool
 {
-    // Load the audio file
-    Audio::load_audio_file(Audio::player(), exe_path::dir() / "../../tests/res/10-1000-10000-20000.wav");
+    return x > 5.f;
+}
+static auto is_small(float x) -> bool
+{
+    return x < 0.1f;
+}
 
+static constexpr float TAU = 6.2831853071f;
+
+TEST_CASE("Fourier transform")
+{
     auto fft_input = std::vector<std::complex<float>>{};
 
-    static constexpr int64_t N = 65536; // FFT size. NB: Must be a power of 2
-    for (int64_t i = 0; i < N; i++)
-        fft_input.emplace_back(Audio::player().sample_unaltered_volume(i, 0)); // Only use 1 channel. This is simple, but ideally you should average the values across all the channels.
-
-    auto const spectrum = dj::fft1d(fft_input, dj::fft_dir::DIR_FWD);
-
-    CHECK(spectrum.size() == N);
-    CHECK(std::abs(spectrum[16]) == doctest::Approx(38.669884));
-    CHECK(std::abs(spectrum[1598]) == doctest::Approx(27.571739));
-    CHECK(std::abs(spectrum[1599]) == doctest::Approx(21.486385));
-    CHECK(std::abs(spectrum[15984]) == doctest::Approx(29.728823));
-    CHECK(std::abs(spectrum[15985]) == doctest::Approx(18.963114));
-    CHECK(std::abs(spectrum[31968]) == doctest::Approx(10.106586));
-    CHECK(std::abs(spectrum[31969]) == doctest::Approx(35.716843));
-    CHECK(std::abs(spectrum[33567]) == doctest::Approx(35.765961));
-    CHECK(std::abs(spectrum[33568]) == doctest::Approx(10.012813));
-    CHECK(std::abs(spectrum[49551]) == doctest::Approx(19.058596));
-    CHECK(std::abs(spectrum[49552]) == doctest::Approx(29.651283));
-    CHECK(std::abs(spectrum[63937]) == doctest::Approx(21.579424));
-    CHECK(std::abs(spectrum[63938]) == doctest::Approx(27.487740));
-    CHECK(std::abs(spectrum[65520]) == doctest::Approx(38.676113));
+    static constexpr int64_t sample_rate = 44000; // Allows us to detect frequencies up to sample_rate / 2 = 22000Hz
+    static constexpr int64_t fft_size    = 8000;  // Will give us a good enough resolution (fft_size / 2 = 4000 values, spread between 0Hz and 22000Hz)
+
+    auto const spectrum = Audio::fourier_transform(
+        fft_size, [&](std::function<void(float)> const& callback) {
+            for (int64_t i = 0; i < fft_size; i++)
+            {
+                float time = static_cast<float>(i) / static_cast<float>(sample_rate);
+                callback(
+                    window(i, fft_size)
+                    * (std::sin(10.f * time * TAU)      // 10Hz frequency
+                       + std::sin(1000.f * time * TAU)  // 1000Hz frequency
+                       + std::sin(10000.f * time * TAU) // 10000Hz frequency
+                       + std::sin(20000.f * time * TAU) // 20000Hz frequency
+                    )
+                );
+            }
+        },
+        static_cast<float>(sample_rate)
+    );
+
+    CHECK(is_big(spectrum.at_frequency(10.f)));
+    CHECK(is_big(spectrum.at_frequency(1000.f)));
+    CHECK(is_big(spectrum.at_frequency(10000.f)));
+    CHECK(is_big(spectrum.at_frequency(20000.f)));
+    CHECK(is_small(spectrum.at_frequency(500.f)));
+    CHECK(is_small(spectrum.at_frequency(5000.f)));
+    CHECK(is_small(spectrum.at_frequency(15000.f)));
 }