From d96cb22710fbb9d554760ec261cd69f43e6c671c Mon Sep 17 00:00:00 2001 From: Jules Fouchy Date: Sun, 5 Nov 2023 15:15:11 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Added=20fourier=5Ftransform()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- include/Audio/Audio.hpp | 4 +- src/Player.hpp | 1 - src/fourier_transform.cpp | 92 ++++++++++++++++++++++++++++++++ src/fourier_transform.hpp | 33 ++++++++++++ tests/tests.cpp | 109 ++++++++++++++++++++++++-------------- 6 files changed, 198 insertions(+), 43 deletions(-) create mode 100644 src/fourier_transform.cpp create mode 100644 src/fourier_transform.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e2e938..1e77105 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,7 +37,7 @@ add_subdirectory(lib/libnyquist) target_link_libraries(Audio PRIVATE libnyquist::libnyquist) # ---Add dj_fft--- -target_include_directories(Audio SYSTEM PUBLIC lib/dj_fft) +target_include_directories(Audio SYSTEM PRIVATE lib/dj_fft) # ---Add RtAudioWrapper--- add_subdirectory(lib/RtAudioWrapper) diff --git a/include/Audio/Audio.hpp b/include/Audio/Audio.hpp index 4f711d6..0fc590b 100644 --- a/include/Audio/Audio.hpp +++ b/include/Audio/Audio.hpp @@ -3,5 +3,5 @@ #include "../../src/InputStream.hpp" #include "../../src/Player.hpp" #include "../../src/compute_volume.hpp" -#include "../../src/load_audio_file.hpp" -#include "dj_fft.h" \ No newline at end of file +#include "../../src/fourier_transform.hpp" +#include "../../src/load_audio_file.hpp" \ No newline at end of file diff --git a/src/Player.hpp b/src/Player.hpp index 9dcfcda..8685716 100644 --- a/src/Player.hpp +++ b/src/Player.hpp @@ -1,5 +1,4 @@ #pragma once - #include #include #include diff --git a/src/fourier_transform.cpp b/src/fourier_transform.cpp new file mode 100644 index 0000000..87bc615 --- /dev/null +++ b/src/fourier_transform.cpp @@ -0,0 +1,92 @@ +#include "fourier_transform.hpp" +#include +#include +#include +#include "dj_fft.h" + +namespace Audio { + +template +static auto next_power_of_two(T n) -> T +{ + if (n != 0 && !(n & (n - 1))) + return n; // n is already a power of 2 + + T power = 1; + while (power < n) + { + power <<= 1; + } + + return power; +} + +/// Since the FFT requires a size that is a power of two, we add 0s at the end of the data. +/// https://mechanicalvibration.com/Zero_Padding_FFTs.html +static void zero_pad(std::vector>& data) +{ + data.resize(next_power_of_two(data.size())); +} + +auto fourier_transform(size_t samples_count, ForEachSample const& for_each_sample, float audio_data_sample_rate, float max_frequency_in_hz) -> Spectrum +{ + // Create a vector of complex numbers containing the audio data + auto fft_input = std::vector>{}; + fft_input.reserve(next_power_of_two(samples_count)); + for_each_sample([&](float const sample) { + fft_input.emplace_back(sample); + }); + + // Make sure the size of fft_input is a power of 2. + zero_pad(fft_input); + + // Compute the fft + auto fft_output = dj::fft1d(fft_input, dj::fft_dir::DIR_FWD); + float const delta_between_frequencies{audio_data_sample_rate / static_cast(fft_output.size())}; // The values in the `fft_output` correspond to frequencies between 0 and sample_rate, evenly spaced. + + // TODO(Audio-Philippe) Instead of computing the fft on a signal with many samples, and then resizing it to fit the requested `max_output_frequency_in_hz`, we could reduce it's sample rate before computing the fft, to minimize the number of frequencies that are computed for nothing (since they will be discarded afterwards anyways). + + // Ignore the redondant information and the frequencies that are above max_frequency_in_hz. + { + size_t final_size = fft_output.size() / 2; // The second half is a mirror of the first half so we don't need it. + if (max_frequency_in_hz != -1.f) + { + final_size = std::min( + final_size, + static_cast(max_frequency_in_hz / delta_between_frequencies) + ); + } + fft_output.resize(final_size); + } + + // Compute the amplitude corresponding to each frequency + auto spectrum = std::vector{}; + spectrum.reserve(fft_output.size()); + std::transform(fft_output.begin(), fft_output.end(), std::back_inserter(spectrum), [](std::complex const z) { + return std::abs(z); + }); + + return {spectrum, delta_between_frequencies}; +} + +auto fourier_transform(std::span audio_data, float audio_data_sample_rate, float max_frequency_in_hz) -> Spectrum +{ + return fourier_transform( + audio_data.size(), [&](std::function const& callback) { + for (float const sample : audio_data) + callback(sample); + }, + audio_data_sample_rate, max_frequency_in_hz + ); +} + +auto Spectrum::at_frequency(float frequency_in_hertz) const -> float +{ + assert(frequency_in_hertz >= 0.f); + auto const idx = static_cast(frequency_in_hertz / frequency_delta_between_values_in_data); + if (idx >= data.size()) + return 0.f; + return data[idx]; +} + +} // namespace Audio \ No newline at end of file diff --git a/src/fourier_transform.hpp b/src/fourier_transform.hpp new file mode 100644 index 0000000..32a736c --- /dev/null +++ b/src/fourier_transform.hpp @@ -0,0 +1,33 @@ +#pragma once +#include +#include + +namespace Audio { + +/// Function that takes a function and calls it on each audio sample constituting the data you wish to analyze. +using ForEachSample = std::function const&)>; + +struct Spectrum { + /// Amplitudes of each frequency, where the first frequency is 0 Hz and they are evenly spaced and there is a delta of `frequency_delta_between_values_in_data` between each frequency. + std::vector data; + /// In hz + float frequency_delta_between_values_in_data; + + /// Evaluates the spectrum at the given frequency (in Hertz) and returns its amplitude. + [[nodiscard]] auto at_frequency(float frequency_in_hertz) const -> float; +}; + +/// Computes the fourier transform of the given signal. +/// `for_each_sample` is a function that takes a callback and calls it for each of the samples of your audio data (see our tests for an example). +/// Note that you should apply a window function to your audio data, to make sure it is 0 at the beginning and the end: https://digitalsoundandmusic.com/2-3-11-windowing-functions-to-eliminate-spectral-leakage/ +/// You can optionally set `max_frequency_in_hz` to tell us the highest frequency that you are interested in, and we will not compute more than that, and return only frequencies up to that value. +/// NB: If the `samples_count` is not a power of two, we will zero-pad the audio data to reach the next power of two: https://mechanicalvibration.com/Zero_Padding_FFTs.html +auto fourier_transform(size_t samples_count, ForEachSample const& for_each_sample, float audio_data_sample_rate, float max_frequency_in_hz = -1.f) -> Spectrum; + +/// Computes the fourier transform of the given `audio_data` signal. +/// Note that you should apply a window function to your `audio_data`, to make sure it is 0 at the beginning and the end: https://digitalsoundandmusic.com/2-3-11-windowing-functions-to-eliminate-spectral-leakage/ +/// You can optionally set `max_frequency_in_hz` to tell us the highest frequency that you are interested in, and we will not compute more than that, and return only frequencies up to that value. +/// NB: If the `samples_count` is not a power of two, we will zero-pad the `audio_data` to reach the next power of two: https://mechanicalvibration.com/Zero_Padding_FFTs.html +auto fourier_transform(std::span audio_data, float audio_data_sample_rate, float max_frequency_in_hz = -1.f) -> Spectrum; + +} // namespace Audio \ No newline at end of file diff --git a/tests/tests.cpp b/tests/tests.cpp index 446afcf..501c7b2 100644 --- a/tests/tests.cpp +++ b/tests/tests.cpp @@ -11,6 +11,12 @@ // Learn how to use Dear ImGui: https://coollibs.github.io/contribute/Programming/dear-imgui +static auto window(int64_t idx, int64_t size) -> float +{ + float const t = static_cast(idx) / static_cast(size - 1); + return 1.f - std::abs(2.f * t - 1.f); // Triangular window +} + auto main(int argc, char* argv[]) -> int { const int exit_code = doctest::Context{}.run(); // Run all unit tests @@ -25,36 +31,46 @@ auto main(int argc, char* argv[]) -> int [](RtAudioErrorType /* type */, std::string const& error_message) { std::cerr << error_message << '\n'; }}; + static constexpr size_t nb_samples_in_input_stream{512}; + input_stream.set_nb_of_retained_samples(nb_samples_in_input_stream); // Load the audio file Audio::load_audio_file(Audio::player(), exe_path::dir() / "../../tests/res/Monteverdi - L'Orfeo, Toccata.mp3"); Audio::player().play(); - static constexpr int64_t N = 1024; // input size NB: Must be a power of 2 for dj::fft1d - std::vector> myData(N); // input data + static constexpr int64_t fft_size{8000}; + float max_spectrum_frequency_in_hz{15000.f}; quick_imgui::loop("Audio tests", [&]() { // Open a window and run all the ImGui-related code - for (int64_t i = 0; i < N; i++) - myData[static_cast(i)] = Audio::player().sample_unaltered_volume(i + Audio::player().current_frame_index(), 0); - auto const fftData = dj::fft1d(myData, dj::fft_dir::DIR_FWD); - auto data = std::vector{}; - std::transform(fftData.begin(), fftData.end(), std::back_inserter(data), [](auto const x) { - return std::abs(x); - }); + auto const spectrum = Audio::fourier_transform( + fft_size, + [&](std::function const& callback) { + for (int64_t i = 0; i < fft_size; i++) + callback( + window(i, fft_size) + * Audio::player().sample_unaltered_volume(i + Audio::player().current_frame_index()) + ); + }, + static_cast(Audio::player().audio_data().sample_rate), + max_spectrum_frequency_in_hz + ); ImGui::Begin("Audio tests"); // Player ImGui::SeparatorText("Player"); if (ImGui::Button(Audio::player().properties().is_muted ? "Unmute" : "Mute")) Audio::player().properties().is_muted = !Audio::player().properties().is_muted; - ImGui::PlotHistogram( + ImGui::PlotLines( "Spectrum", - data.data(), - static_cast(data.size()) / 2, // The second half is a mirror of the first half, so ne need to display it. + spectrum.data.data(), + static_cast(spectrum.data.size()), 0, nullptr, 0.f, 1.f, {0.f, 100.f} ); + ImGui::SliderFloat("Max spectrum frequency", &max_spectrum_frequency_in_hz, 0.f, 22000.f, "%.0f Hertz"); + // Input stream + ImGui::NewLine(); ImGui::SeparatorText("Input stream"); auto const input_device_ids = input_stream.device_ids(); if (ImGui::BeginCombo("Input device", input_stream.current_device_name().c_str())) @@ -72,11 +88,11 @@ auto main(int argc, char* argv[]) -> int ImGui::EndCombo(); } auto data_from_input_stream = std::vector{}; - input_stream.for_each_sample(512, [&](float const sample) { + input_stream.for_each_sample(nb_samples_in_input_stream, [&](float const sample) { data_from_input_stream.push_back(sample); }); - ImGui::PlotHistogram( - "Input Data", + ImGui::PlotLines( + "Waveform", data_from_input_stream.data(), static_cast(data_from_input_stream.size()), 0, nullptr, @@ -115,32 +131,47 @@ TEST_CASE("Loading a .mp3 file") CHECK(Audio::player().audio_data().samples.size() == 9819648); } -TEST_CASE("FFT") +static auto is_big(float x) -> bool { - // Load the audio file - Audio::load_audio_file(Audio::player(), exe_path::dir() / "../../tests/res/10-1000-10000-20000.wav"); + return x > 5.f; +} +static auto is_small(float x) -> bool +{ + return x < 0.1f; +} +static constexpr float TAU = 6.2831853071f; + +TEST_CASE("Fourier transform") +{ auto fft_input = std::vector>{}; - static constexpr int64_t N = 65536; // FFT size. NB: Must be a power of 2 - for (int64_t i = 0; i < N; i++) - fft_input.emplace_back(Audio::player().sample_unaltered_volume(i, 0)); // Only use 1 channel. This is simple, but ideally you should average the values across all the channels. - - auto const spectrum = dj::fft1d(fft_input, dj::fft_dir::DIR_FWD); - - CHECK(spectrum.size() == N); - CHECK(std::abs(spectrum[16]) == doctest::Approx(38.669884)); - CHECK(std::abs(spectrum[1598]) == doctest::Approx(27.571739)); - CHECK(std::abs(spectrum[1599]) == doctest::Approx(21.486385)); - CHECK(std::abs(spectrum[15984]) == doctest::Approx(29.728823)); - CHECK(std::abs(spectrum[15985]) == doctest::Approx(18.963114)); - CHECK(std::abs(spectrum[31968]) == doctest::Approx(10.106586)); - CHECK(std::abs(spectrum[31969]) == doctest::Approx(35.716843)); - CHECK(std::abs(spectrum[33567]) == doctest::Approx(35.765961)); - CHECK(std::abs(spectrum[33568]) == doctest::Approx(10.012813)); - CHECK(std::abs(spectrum[49551]) == doctest::Approx(19.058596)); - CHECK(std::abs(spectrum[49552]) == doctest::Approx(29.651283)); - CHECK(std::abs(spectrum[63937]) == doctest::Approx(21.579424)); - CHECK(std::abs(spectrum[63938]) == doctest::Approx(27.487740)); - CHECK(std::abs(spectrum[65520]) == doctest::Approx(38.676113)); + static constexpr int64_t sample_rate = 44000; // Allows us to detect frequencies up to sample_rate / 2 = 22000Hz + static constexpr int64_t fft_size = 8000; // Will give us a good enough resolution (fft_size / 2 = 4000 values, spread between 0Hz and 22000Hz) + + auto const spectrum = Audio::fourier_transform( + fft_size, [&](std::function const& callback) { + for (int64_t i = 0; i < fft_size; i++) + { + float time = static_cast(i) / static_cast(sample_rate); + callback( + window(i, fft_size) + * (std::sin(10.f * time * TAU) // 10Hz frequency + + std::sin(1000.f * time * TAU) // 1000Hz frequency + + std::sin(10000.f * time * TAU) // 10000Hz frequency + + std::sin(20000.f * time * TAU) // 20000Hz frequency + ) + ); + } + }, + static_cast(sample_rate) + ); + + CHECK(is_big(spectrum.at_frequency(10.f))); + CHECK(is_big(spectrum.at_frequency(1000.f))); + CHECK(is_big(spectrum.at_frequency(10000.f))); + CHECK(is_big(spectrum.at_frequency(20000.f))); + CHECK(is_small(spectrum.at_frequency(500.f))); + CHECK(is_small(spectrum.at_frequency(5000.f))); + CHECK(is_small(spectrum.at_frequency(15000.f))); } \ No newline at end of file