Skip to content

Commit

Permalink
✨ Added fourier_transform()
Browse files Browse the repository at this point in the history
  • Loading branch information
JulesFouchy committed Nov 5, 2023
1 parent 34db1ac commit d96cb22
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 43 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ add_subdirectory(lib/libnyquist)
target_link_libraries(Audio PRIVATE libnyquist::libnyquist)

# ---Add dj_fft---
target_include_directories(Audio SYSTEM PUBLIC lib/dj_fft)
target_include_directories(Audio SYSTEM PRIVATE lib/dj_fft)

# ---Add RtAudioWrapper---
add_subdirectory(lib/RtAudioWrapper)
Expand Down
4 changes: 2 additions & 2 deletions include/Audio/Audio.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
#include "../../src/InputStream.hpp"
#include "../../src/Player.hpp"
#include "../../src/compute_volume.hpp"
#include "../../src/load_audio_file.hpp"
#include "dj_fft.h"
#include "../../src/fourier_transform.hpp"
#include "../../src/load_audio_file.hpp"
1 change: 0 additions & 1 deletion src/Player.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#pragma once

#include <rtaudio/RtAudio.h>
#include <cstdint>
#include <vector>
Expand Down
92 changes: 92 additions & 0 deletions src/fourier_transform.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#include "fourier_transform.hpp"
#include <algorithm>
#include <complex>
#include <vector>
#include "dj_fft.h"

namespace Audio {

template<std::integral T>
static auto next_power_of_two(T n) -> T
{
if (n != 0 && !(n & (n - 1)))
return n; // n is already a power of 2

T power = 1;
while (power < n)
{
power <<= 1;
}

return power;
}

/// Since the FFT requires a size that is a power of two, we add 0s at the end of the data.
/// https://mechanicalvibration.com/Zero_Padding_FFTs.html
static void zero_pad(std::vector<std::complex<float>>& data)
{
data.resize(next_power_of_two(data.size()));
}

auto fourier_transform(size_t samples_count, ForEachSample const& for_each_sample, float audio_data_sample_rate, float max_frequency_in_hz) -> Spectrum
{
// Create a vector of complex numbers containing the audio data
auto fft_input = std::vector<std::complex<float>>{};
fft_input.reserve(next_power_of_two(samples_count));
for_each_sample([&](float const sample) {
fft_input.emplace_back(sample);
});

// Make sure the size of fft_input is a power of 2.
zero_pad(fft_input);

// Compute the fft
auto fft_output = dj::fft1d(fft_input, dj::fft_dir::DIR_FWD);
float const delta_between_frequencies{audio_data_sample_rate / static_cast<float>(fft_output.size())}; // The values in the `fft_output` correspond to frequencies between 0 and sample_rate, evenly spaced.

// TODO(Audio-Philippe) Instead of computing the fft on a signal with many samples, and then resizing it to fit the requested `max_output_frequency_in_hz`, we could reduce it's sample rate before computing the fft, to minimize the number of frequencies that are computed for nothing (since they will be discarded afterwards anyways).

// Ignore the redondant information and the frequencies that are above max_frequency_in_hz.
{
size_t final_size = fft_output.size() / 2; // The second half is a mirror of the first half so we don't need it.
if (max_frequency_in_hz != -1.f)
{
final_size = std::min(
final_size,
static_cast<size_t>(max_frequency_in_hz / delta_between_frequencies)
);
}
fft_output.resize(final_size);
}

// Compute the amplitude corresponding to each frequency
auto spectrum = std::vector<float>{};
spectrum.reserve(fft_output.size());
std::transform(fft_output.begin(), fft_output.end(), std::back_inserter(spectrum), [](std::complex<float> const z) {
return std::abs(z);
});

return {spectrum, delta_between_frequencies};
}

auto fourier_transform(std::span<float const> audio_data, float audio_data_sample_rate, float max_frequency_in_hz) -> Spectrum
{
return fourier_transform(
audio_data.size(), [&](std::function<void(float)> const& callback) {
for (float const sample : audio_data)
callback(sample);
},
audio_data_sample_rate, max_frequency_in_hz
);
}

auto Spectrum::at_frequency(float frequency_in_hertz) const -> float
{
assert(frequency_in_hertz >= 0.f);
auto const idx = static_cast<size_t>(frequency_in_hertz / frequency_delta_between_values_in_data);
if (idx >= data.size())
return 0.f;
return data[idx];
}

} // namespace Audio
33 changes: 33 additions & 0 deletions src/fourier_transform.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#pragma once
#include <functional>
#include <span>

namespace Audio {

/// Function that takes a function and calls it on each audio sample constituting the data you wish to analyze.
using ForEachSample = std::function<void(std::function<void(float)> const&)>;

struct Spectrum {
/// Amplitudes of each frequency, where the first frequency is 0 Hz and they are evenly spaced and there is a delta of `frequency_delta_between_values_in_data` between each frequency.
std::vector<float> data;
/// In hz
float frequency_delta_between_values_in_data;

/// Evaluates the spectrum at the given frequency (in Hertz) and returns its amplitude.
[[nodiscard]] auto at_frequency(float frequency_in_hertz) const -> float;
};

/// Computes the fourier transform of the given signal.
/// `for_each_sample` is a function that takes a callback and calls it for each of the samples of your audio data (see our tests for an example).
/// Note that you should apply a window function to your audio data, to make sure it is 0 at the beginning and the end: https://digitalsoundandmusic.com/2-3-11-windowing-functions-to-eliminate-spectral-leakage/
/// You can optionally set `max_frequency_in_hz` to tell us the highest frequency that you are interested in, and we will not compute more than that, and return only frequencies up to that value.
/// NB: If the `samples_count` is not a power of two, we will zero-pad the audio data to reach the next power of two: https://mechanicalvibration.com/Zero_Padding_FFTs.html
auto fourier_transform(size_t samples_count, ForEachSample const& for_each_sample, float audio_data_sample_rate, float max_frequency_in_hz = -1.f) -> Spectrum;

/// Computes the fourier transform of the given `audio_data` signal.
/// Note that you should apply a window function to your `audio_data`, to make sure it is 0 at the beginning and the end: https://digitalsoundandmusic.com/2-3-11-windowing-functions-to-eliminate-spectral-leakage/
/// You can optionally set `max_frequency_in_hz` to tell us the highest frequency that you are interested in, and we will not compute more than that, and return only frequencies up to that value.
/// NB: If the `samples_count` is not a power of two, we will zero-pad the `audio_data` to reach the next power of two: https://mechanicalvibration.com/Zero_Padding_FFTs.html
auto fourier_transform(std::span<float const> audio_data, float audio_data_sample_rate, float max_frequency_in_hz = -1.f) -> Spectrum;

} // namespace Audio
109 changes: 70 additions & 39 deletions tests/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@

// Learn how to use Dear ImGui: https://coollibs.github.io/contribute/Programming/dear-imgui

static auto window(int64_t idx, int64_t size) -> float
{
float const t = static_cast<float>(idx) / static_cast<float>(size - 1);
return 1.f - std::abs(2.f * t - 1.f); // Triangular window
}

auto main(int argc, char* argv[]) -> int
{
const int exit_code = doctest::Context{}.run(); // Run all unit tests
Expand All @@ -25,36 +31,46 @@ auto main(int argc, char* argv[]) -> int
[](RtAudioErrorType /* type */, std::string const& error_message) {
std::cerr << error_message << '\n';
}};
static constexpr size_t nb_samples_in_input_stream{512};
input_stream.set_nb_of_retained_samples(nb_samples_in_input_stream);
// Load the audio file
Audio::load_audio_file(Audio::player(), exe_path::dir() / "../../tests/res/Monteverdi - L'Orfeo, Toccata.mp3");
Audio::player().play();

static constexpr int64_t N = 1024; // input size NB: Must be a power of 2 for dj::fft1d
std::vector<std::complex<float>> myData(N); // input data
static constexpr int64_t fft_size{8000};
float max_spectrum_frequency_in_hz{15000.f};

quick_imgui::loop("Audio tests", [&]() { // Open a window and run all the ImGui-related code
for (int64_t i = 0; i < N; i++)
myData[static_cast<size_t>(i)] = Audio::player().sample_unaltered_volume(i + Audio::player().current_frame_index(), 0);
auto const fftData = dj::fft1d(myData, dj::fft_dir::DIR_FWD);
auto data = std::vector<float>{};
std::transform(fftData.begin(), fftData.end(), std::back_inserter(data), [](auto const x) {
return std::abs(x);
});
auto const spectrum = Audio::fourier_transform(
fft_size,
[&](std::function<void(float)> const& callback) {
for (int64_t i = 0; i < fft_size; i++)
callback(
window(i, fft_size)
* Audio::player().sample_unaltered_volume(i + Audio::player().current_frame_index())
);
},
static_cast<float>(Audio::player().audio_data().sample_rate),
max_spectrum_frequency_in_hz
);

ImGui::Begin("Audio tests");
// Player
ImGui::SeparatorText("Player");
if (ImGui::Button(Audio::player().properties().is_muted ? "Unmute" : "Mute"))
Audio::player().properties().is_muted = !Audio::player().properties().is_muted;
ImGui::PlotHistogram(
ImGui::PlotLines(
"Spectrum",
data.data(),
static_cast<int>(data.size()) / 2, // The second half is a mirror of the first half, so ne need to display it.
spectrum.data.data(),
static_cast<int>(spectrum.data.size()),
0, nullptr,
0.f, 1.f,
{0.f, 100.f}
);
ImGui::SliderFloat("Max spectrum frequency", &max_spectrum_frequency_in_hz, 0.f, 22000.f, "%.0f Hertz");

// Input stream
ImGui::NewLine();
ImGui::SeparatorText("Input stream");
auto const input_device_ids = input_stream.device_ids();
if (ImGui::BeginCombo("Input device", input_stream.current_device_name().c_str()))
Expand All @@ -72,11 +88,11 @@ auto main(int argc, char* argv[]) -> int
ImGui::EndCombo();
}
auto data_from_input_stream = std::vector<float>{};
input_stream.for_each_sample(512, [&](float const sample) {
input_stream.for_each_sample(nb_samples_in_input_stream, [&](float const sample) {
data_from_input_stream.push_back(sample);
});
ImGui::PlotHistogram(
"Input Data",
ImGui::PlotLines(
"Waveform",
data_from_input_stream.data(),
static_cast<int>(data_from_input_stream.size()),
0, nullptr,
Expand Down Expand Up @@ -115,32 +131,47 @@ TEST_CASE("Loading a .mp3 file")
CHECK(Audio::player().audio_data().samples.size() == 9819648);
}

TEST_CASE("FFT")
static auto is_big(float x) -> bool
{
// Load the audio file
Audio::load_audio_file(Audio::player(), exe_path::dir() / "../../tests/res/10-1000-10000-20000.wav");
return x > 5.f;
}
static auto is_small(float x) -> bool
{
return x < 0.1f;
}

static constexpr float TAU = 6.2831853071f;

TEST_CASE("Fourier transform")
{
auto fft_input = std::vector<std::complex<float>>{};

static constexpr int64_t N = 65536; // FFT size. NB: Must be a power of 2
for (int64_t i = 0; i < N; i++)
fft_input.emplace_back(Audio::player().sample_unaltered_volume(i, 0)); // Only use 1 channel. This is simple, but ideally you should average the values across all the channels.

auto const spectrum = dj::fft1d(fft_input, dj::fft_dir::DIR_FWD);

CHECK(spectrum.size() == N);
CHECK(std::abs(spectrum[16]) == doctest::Approx(38.669884));
CHECK(std::abs(spectrum[1598]) == doctest::Approx(27.571739));
CHECK(std::abs(spectrum[1599]) == doctest::Approx(21.486385));
CHECK(std::abs(spectrum[15984]) == doctest::Approx(29.728823));
CHECK(std::abs(spectrum[15985]) == doctest::Approx(18.963114));
CHECK(std::abs(spectrum[31968]) == doctest::Approx(10.106586));
CHECK(std::abs(spectrum[31969]) == doctest::Approx(35.716843));
CHECK(std::abs(spectrum[33567]) == doctest::Approx(35.765961));
CHECK(std::abs(spectrum[33568]) == doctest::Approx(10.012813));
CHECK(std::abs(spectrum[49551]) == doctest::Approx(19.058596));
CHECK(std::abs(spectrum[49552]) == doctest::Approx(29.651283));
CHECK(std::abs(spectrum[63937]) == doctest::Approx(21.579424));
CHECK(std::abs(spectrum[63938]) == doctest::Approx(27.487740));
CHECK(std::abs(spectrum[65520]) == doctest::Approx(38.676113));
static constexpr int64_t sample_rate = 44000; // Allows us to detect frequencies up to sample_rate / 2 = 22000Hz
static constexpr int64_t fft_size = 8000; // Will give us a good enough resolution (fft_size / 2 = 4000 values, spread between 0Hz and 22000Hz)

auto const spectrum = Audio::fourier_transform(
fft_size, [&](std::function<void(float)> const& callback) {
for (int64_t i = 0; i < fft_size; i++)
{
float time = static_cast<float>(i) / static_cast<float>(sample_rate);
callback(
window(i, fft_size)
* (std::sin(10.f * time * TAU) // 10Hz frequency
+ std::sin(1000.f * time * TAU) // 1000Hz frequency
+ std::sin(10000.f * time * TAU) // 10000Hz frequency
+ std::sin(20000.f * time * TAU) // 20000Hz frequency
)
);
}
},
static_cast<float>(sample_rate)
);

CHECK(is_big(spectrum.at_frequency(10.f)));
CHECK(is_big(spectrum.at_frequency(1000.f)));
CHECK(is_big(spectrum.at_frequency(10000.f)));
CHECK(is_big(spectrum.at_frequency(20000.f)));
CHECK(is_small(spectrum.at_frequency(500.f)));
CHECK(is_small(spectrum.at_frequency(5000.f)));
CHECK(is_small(spectrum.at_frequency(15000.f)));
}

0 comments on commit d96cb22

Please sign in to comment.