Skip to content

Commit

Permalink
fix dumb bug in input randomiser, add basic double->string benchmarks
Browse files Browse the repository at this point in the history
* input randomiser was feeding a constant value previously, now actually randomising
* start to_string with the current method (sstream), an faster more correct version (sstream_cached), snprintf, and std::to_chars
** NOTE: only std::to_chars and sstream_cached are correct in the face of locales

Run on (4 X 3500 MHz CPU s)
CPU Caches:
  L1 Data 32K (x4)
  L1 Instruction 32K (x4)
  L2 Unified 262K (x4)
  L3 Unified 6291K (x1)
-------------------------------------------------------------------------------------------------------------
Benchmark                                                                   Time             CPU   Iterations
-------------------------------------------------------------------------------------------------------------
RandFloatStrs/double_from_string_sstream                                 1012 ns         1001 ns       640000
RandFloatStrs/double_from_string_strtod                                   276 ns          276 ns      2488889
RandFloatStrs/double_from_string_strtod_fixed                             312 ns          308 ns      2133333
RandFloatStrs/double_from_string_strtod_fixed_const_ref                   307 ns          300 ns      2240000
RandFloatStrs/double_from_string_std_from_chars                           194 ns          188 ns      3733333
RandFloatCommaStrs/double_from_string_strtod_fixed_comma_ref              315 ns          314 ns      2240000
RandFloatCommaStrs/double_from_string_strtod_fixed_comma_const_ref        306 ns          305 ns      2357895
RandFloats/string_from_double_sstream                                    1372 ns         1381 ns       497778
RandFloats/string_from_double_sstream_cached                             1136 ns         1123 ns       640000
RandFloats/string_from_double_snprintf                                    536 ns          516 ns      1000000
RandFloats/string_from_double_std_to_chars                                116 ns          115 ns      6400000
  • Loading branch information
Crzyrndm committed Feb 29, 2020
1 parent 0adb8a6 commit 7ba36b5
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 39 deletions.
149 changes: 149 additions & 0 deletions benchmarks/microbenchmarks/double_to_string.cpp
Original file line number Diff line number Diff line change
@@ -1,2 +1,151 @@
// A core part of the xlsx serialisation routine is taking doubles from memory and stringifying them
// this has a few requirements
// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator)
// - outputs up to 15 significant figures (excel only serialises numbers up to 15sf)

#include "benchmark/benchmark.h"
#include <locale>
#include <random>
#include <sstream>

namespace {

// setup a large quantity of random doubles as strings
template <bool Decimal_Locale = true>
class RandomFloats : public benchmark::Fixture
{
static constexpr size_t Number_of_Elements = 1 << 20;
static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated");

std::vector<double> inputs;

size_t index = 0;
const char *locale_str = nullptr;

public:
void SetUp(const ::benchmark::State &state)
{
if (Decimal_Locale)
{
locale_str = setlocale(LC_ALL, "C");
}
else
{
locale_str = setlocale(LC_ALL, "de-DE");
}
std::random_device rd; // obtain a seed for the random number engine
std::mt19937 gen(rd());
// doing full range is stupid (<double>::min/max()...), it just ends up generating very large numbers
// uniform is probably not the best distribution to use here, but it will do for now
std::uniform_real_distribution<double> dis(-1'000, 1'000);
// generate a large quantity of doubles to deserialise
inputs.reserve(Number_of_Elements);
for (int i = 0; i < Number_of_Elements; ++i)
{
double d = dis(gen);
inputs.push_back(d);
}
}

void TearDown(const ::benchmark::State &state)
{
// restore locale
setlocale(LC_ALL, locale_str);
// gbench is keeping the fixtures alive somewhere, need to clear the data after use
inputs = std::vector<double>{};
}

double &get_rand()
{
return inputs[++index & (Number_of_Elements - 1)];
}
};

/// Takes in a double and outputs a string form of that number which will
/// serialise and deserialise without loss of precision
std::string serialize_number_to_string(double num)
{
// more digits and excel won't match
constexpr int Excel_Digit_Precision = 15; //sf
std::stringstream ss;
ss.precision(Excel_Digit_Precision);
ss << num;
return ss.str();
}

class number_serialiser
{
static constexpr int Excel_Digit_Precision = 15; //sf
std::ostringstream ss;

public:
explicit number_serialiser()
{
ss.precision(Excel_Digit_Precision);
ss.imbue(std::locale("C"));
}

std::string serialise(double d)
{
ss.str(""); // reset string buffer
ss.clear(); // reset any error flags
ss << d;
return ss.str();
}
};

using RandFloats = RandomFloats<true>;
} // namespace

BENCHMARK_F(RandFloats, string_from_double_sstream)
(benchmark::State &state)
{
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
serialize_number_to_string(get_rand()));
}
}

BENCHMARK_F(RandFloats, string_from_double_sstream_cached)
(benchmark::State &state)
{
number_serialiser ser;
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
ser.serialise(get_rand()));
}
}

BENCHMARK_F(RandFloats, string_from_double_snprintf)
(benchmark::State &state)
{
while (state.KeepRunning())
{
char buf[16];
int len = snprintf(buf, sizeof(buf), "%16f", get_rand());

benchmark::DoNotOptimize(
std::string(buf, len));
}
}

// locale names are different between OS's, and std::from_chars is only complete in MSVC
#ifdef _MSC_VER

#include <charconv>
BENCHMARK_F(RandFloats, string_from_double_std_to_chars)
(benchmark::State &state)
{
while (state.KeepRunning())
{
char buf[16];
std::to_chars_result result = std::to_chars(buf, buf + std::size(buf), get_rand());

benchmark::DoNotOptimize(
std::string(buf, result.ptr));
}
}

#endif
82 changes: 43 additions & 39 deletions benchmarks/microbenchmarks/string_to_double.cpp
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
// A core part of the xlsx parsing routine is taking strings from the xml parser and parsing these to a double
// this has a few requirements
// - expect numbers in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator)
// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator)
// - handles atleast 15 significant figures (excel only serialises numbers up to 15sf)

#include <benchmark/benchmark.h>
#include <locale>
#include <random>
#include <sstream>

namespace {

// setup a large quantity of random doubles as strings
template <bool Decimal_Locale = true>
class RandomFloats : public benchmark::Fixture
class RandomFloatStrs : public benchmark::Fixture
{
static constexpr size_t Number_of_Elements = 1 << 20;
static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated");

std::vector<std::string> inputs;

size_t index = 0;
const char *locale_str;
const char *locale_str = nullptr;

public:
void SetUp(const ::benchmark::State &state)
Expand Down Expand Up @@ -50,19 +53,18 @@ class RandomFloats : public benchmark::Fixture
{
// restore locale
setlocale(LC_ALL, locale_str);
// gbench is keeping the fixtures alive somewhere, need to clear the data...
// gbench is keeping the fixtures alive somewhere, need to clear the data after use
inputs = std::vector<std::string>{};
}

std::string &get_rand()
{
return inputs[++index & Number_of_Elements];
return inputs[++index & (Number_of_Elements - 1)];
}
};

// method used by xlsx_consumer.cpp in commit - ba01de47a7d430764c20ec9ac9600eec0eb38bcf
// std::istringstream with the locale set to "C"
#include <sstream>
struct number_converter
{
number_converter()
Expand All @@ -82,32 +84,6 @@ struct number_converter
double result;
};

using RandFloats = RandomFloats<true>;

BENCHMARK_F(RandFloats, double_from_string_sstream)
(benchmark::State &state)
{
number_converter converter;
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
converter.stold(get_rand()));
}
}

// using strotod
// https://en.cppreference.com/w/cpp/string/byte/strtof
// this naive usage is broken in the face of locales (fails condition 1)
#include <cstdlib>
BENCHMARK_F(RandFloats, double_from_string_strtod)
(benchmark::State &state)
{
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
strtod(get_rand().c_str(), nullptr));
}
}

// to resolve the locale issue with strtod, a little preprocessing of the input is required
struct number_converter_mk2
Expand Down Expand Up @@ -151,7 +127,37 @@ struct number_converter_mk2
bool should_convert_to_comma = false;
};

BENCHMARK_F(RandFloats, double_from_string_strtod_fixed)
using RandFloatStrs = RandomFloatStrs<true>;
// german locale uses ',' as the seperator
using RandFloatCommaStrs = RandomFloatStrs<false>;
} // namespace

BENCHMARK_F(RandFloatStrs, double_from_string_sstream)
(benchmark::State &state)
{
number_converter converter;
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
converter.stold(get_rand()));
}
}

// using strotod
// https://en.cppreference.com/w/cpp/string/byte/strtof
// this naive usage is broken in the face of locales (fails condition 1)
#include <cstdlib>
BENCHMARK_F(RandFloatStrs, double_from_string_strtod)
(benchmark::State &state)
{
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
strtod(get_rand().c_str(), nullptr));
}
}

BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed)
(benchmark::State &state)
{
number_converter_mk2 converter;
Expand All @@ -162,7 +168,7 @@ BENCHMARK_F(RandFloats, double_from_string_strtod_fixed)
}
}

BENCHMARK_F(RandFloats, double_from_string_strtod_fixed_const_ref)
BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed_const_ref)
(benchmark::State &state)
{
number_converter_mk2 converter;
Expand All @@ -178,7 +184,7 @@ BENCHMARK_F(RandFloats, double_from_string_strtod_fixed_const_ref)
#ifdef _MSC_VER

#include <charconv>
BENCHMARK_F(RandFloats, double_from_string_std_from_chars)
BENCHMARK_F(RandFloatStrs, double_from_string_std_from_chars)
(benchmark::State &state)
{
while (state.KeepRunning())
Expand All @@ -191,9 +197,7 @@ BENCHMARK_F(RandFloats, double_from_string_std_from_chars)
}

// not using the standard "C" locale with '.' seperator
// german locale uses ',' as the seperator
using RandFloatsComma = RandomFloats<false>;
BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_ref)
BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_ref)
(benchmark::State &state)
{
number_converter_mk2 converter;
Expand All @@ -204,7 +208,7 @@ BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_ref)
}
}

BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_const_ref)
BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_const_ref)
(benchmark::State &state)
{
number_converter_mk2 converter;
Expand Down

0 comments on commit 7ba36b5

Please sign in to comment.