diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 4ab939351..3c00ccda1 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -41,3 +41,8 @@ foreach(BENCHMARK_SOURCE IN ITEMS ${BENCHMARK_SOURCES}) $) endif() endforeach() + +option(XLNT_MICROBENCH_ENABLED "Enable small benchmarks typically used for development" OFF) +if (XLNT_MICROBENCH_ENABLED) + add_subdirectory(microbenchmarks) +endif() \ No newline at end of file diff --git a/benchmarks/microbenchmarks/CMakeLists.txt b/benchmarks/microbenchmarks/CMakeLists.txt new file mode 100644 index 000000000..745df1b82 --- /dev/null +++ b/benchmarks/microbenchmarks/CMakeLists.txt @@ -0,0 +1,36 @@ +# FetchContent added in cmake v3.11 +# https://cmake.org/cmake/help/v3.11/module/FetchContent.html +# this file is behind a feature flag (XLNT_MICROBENCH_ENABLED) so the primary build is not affected +cmake_minimum_required(VERSION 3.11) +project(xlnt_ubench) + +# acquire google benchmark dependency +# disable generation of the various test projects +set(BENCHMARK_ENABLE_TESTING OFF) +# gtest not required +set(BENCHMARK_ENABLE_GTEST_TESTS OFF) + +include(FetchContent) +FetchContent_Declare( + googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark + GIT_TAG v1.5.0 +) +# download if not already present +FetchContent_GetProperties(googlebenchmark) +if(NOT googlebenchmark_POPULATED) + FetchContent_Populate(googlebenchmark) + add_subdirectory(${googlebenchmark_SOURCE_DIR} ${googlebenchmark_BINARY_DIR}) +endif() +# equivalent of add_subdirectory, now available for use +FetchContent_MakeAvailable(googlebenchmark) + + +add_executable(xlnt_ubench) +target_sources(xlnt_ubench + PRIVATE + string_to_double.cpp + double_to_string.cpp +) +target_link_libraries(xlnt_ubench benchmark_main xlnt) +target_compile_features(xlnt_ubench PRIVATE cxx_std_17) \ No newline at end of file diff --git a/benchmarks/microbenchmarks/double_to_string.cpp b/benchmarks/microbenchmarks/double_to_string.cpp new file mode 100644 index 000000000..0b82803f6 --- /dev/null +++ b/benchmarks/microbenchmarks/double_to_string.cpp @@ -0,0 +1,207 @@ +// A core part of the xlsx serialisation routine is taking doubles from memory and stringifying them +// this has a few requirements +// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator) +// - outputs up to 15 significant figures (excel only serialises numbers up to 15sf) + +#include "benchmark/benchmark.h" +#include +#include +#include + +namespace { + +// setup a large quantity of random doubles as strings +template +class RandomFloats : public benchmark::Fixture +{ + static constexpr size_t Number_of_Elements = 1 << 20; + static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated"); + + std::vector inputs; + + size_t index = 0; + const char *locale_str = nullptr; + +public: + void SetUp(const ::benchmark::State &state) + { + if (Decimal_Locale) + { + locale_str = setlocale(LC_ALL, "C"); + } + else + { + locale_str = setlocale(LC_ALL, "de-DE"); + } + std::random_device rd; // obtain a seed for the random number engine + std::mt19937 gen(rd()); + // doing full range is stupid (::min/max()...), it just ends up generating very large numbers + // uniform is probably not the best distribution to use here, but it will do for now + std::uniform_real_distribution dis(-1'000, 1'000); + // generate a large quantity of doubles to deserialise + inputs.reserve(Number_of_Elements); + for (int i = 0; i < Number_of_Elements; ++i) + { + double d = dis(gen); + inputs.push_back(d); + } + } + + void TearDown(const ::benchmark::State &state) + { + // restore locale + setlocale(LC_ALL, locale_str); + // gbench is keeping the fixtures alive somewhere, need to clear the data after use + inputs = std::vector{}; + } + + double &get_rand() + { + return inputs[++index & (Number_of_Elements - 1)]; + } +}; + +/// Takes in a double and outputs a string form of that number which will +/// serialise and deserialise without loss of precision +std::string serialize_number_to_string(double num) +{ + // more digits and excel won't match + constexpr int Excel_Digit_Precision = 15; //sf + std::stringstream ss; + ss.precision(Excel_Digit_Precision); + ss << num; + return ss.str(); +} + +class number_serialiser +{ + static constexpr int Excel_Digit_Precision = 15; //sf + std::ostringstream ss; + +public: + explicit number_serialiser() + { + ss.precision(Excel_Digit_Precision); + ss.imbue(std::locale("C")); + } + + std::string serialise(double d) + { + ss.str(""); // reset string buffer + ss.clear(); // reset any error flags + ss << d; + return ss.str(); + } +}; + +class number_serialiser_mk2 +{ + static constexpr int Excel_Digit_Precision = 15; //sf + bool should_convert_comma; + + void convert_comma(char *buf, int len) + { + char *buf_end = buf + len; + char *decimal = std::find(buf, buf_end, ','); + if (decimal != buf_end) + { + *decimal = '.'; + } + } + +public: + explicit number_serialiser_mk2() + : should_convert_comma(std::use_facet>(std::locale{}).decimal_point() == ',') + { + } + + std::string serialise(double d) + { + char buf[Excel_Digit_Precision + 1]; // need space for trailing '\0' + int len = snprintf(buf, sizeof(buf), "%.15g", d); + if (should_convert_comma) + { + convert_comma(buf, len); + } + return std::string(buf, len); + } +}; + +using RandFloats = RandomFloats; +using RandFloatsComma = RandomFloats; +} // namespace + +BENCHMARK_F(RandFloats, string_from_double_sstream) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + serialize_number_to_string(get_rand())); + } +} + +BENCHMARK_F(RandFloats, string_from_double_sstream_cached) +(benchmark::State &state) +{ + number_serialiser ser; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + ser.serialise(get_rand())); + } +} + +BENCHMARK_F(RandFloats, string_from_double_snprintf) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + char buf[16]; + int len = snprintf(buf, sizeof(buf), "%.15g", get_rand()); + + benchmark::DoNotOptimize( + std::string(buf, len)); + } +} + +BENCHMARK_F(RandFloats, string_from_double_snprintf_fixed) +(benchmark::State &state) +{ + number_serialiser_mk2 ser; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + ser.serialise(get_rand())); + } +} + +// locale names are different between OS's, and std::from_chars is only complete in MSVC +#ifdef _MSC_VER + +#include +BENCHMARK_F(RandFloats, string_from_double_std_to_chars) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + char buf[16]; + std::to_chars_result result = std::to_chars(buf, buf + std::size(buf), get_rand()); + + benchmark::DoNotOptimize( + std::string(buf, result.ptr)); + } +} + +BENCHMARK_F(RandFloatsComma, string_from_double_snprintf_fixed_comma) +(benchmark::State &state) +{ + number_serialiser_mk2 ser; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + ser.serialise(get_rand())); + } +} + +#endif \ No newline at end of file diff --git a/benchmarks/microbenchmarks/string_to_double.cpp b/benchmarks/microbenchmarks/string_to_double.cpp new file mode 100644 index 000000000..5392f0a25 --- /dev/null +++ b/benchmarks/microbenchmarks/string_to_double.cpp @@ -0,0 +1,223 @@ +// A core part of the xlsx parsing routine is taking strings from the xml parser and parsing these to a double +// this has a few requirements +// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator) +// - handles atleast 15 significant figures (excel only serialises numbers up to 15sf) + +#include +#include +#include +#include + +namespace { + +// setup a large quantity of random doubles as strings +template +class RandomFloatStrs : public benchmark::Fixture +{ + static constexpr size_t Number_of_Elements = 1 << 20; + static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated"); + + std::vector inputs; + + size_t index = 0; + const char *locale_str = nullptr; + +public: + void SetUp(const ::benchmark::State &state) + { + if (Decimal_Locale) + { + locale_str = setlocale(LC_ALL, "C"); + } + else + { + locale_str = setlocale(LC_ALL, "de-DE"); + } + std::random_device rd; // obtain a seed for the random number engine + std::mt19937 gen(rd()); + // doing full range is stupid (::min/max()...), it just ends up generating very large numbers + // uniform is probably not the best distribution to use here, but it will do for now + std::uniform_real_distribution dis(-1'000, 1'000); + // generate a large quantity of doubles to deserialise + inputs.reserve(Number_of_Elements); + for (int i = 0; i < Number_of_Elements; ++i) + { + double d = dis(gen); + char buf[16]; + snprintf(buf, 16, "%.15f", d); + inputs.push_back(std::string(buf)); + } + } + + void TearDown(const ::benchmark::State &state) + { + // restore locale + setlocale(LC_ALL, locale_str); + // gbench is keeping the fixtures alive somewhere, need to clear the data after use + inputs = std::vector{}; + } + + std::string &get_rand() + { + return inputs[++index & (Number_of_Elements - 1)]; + } +}; + +// method used by xlsx_consumer.cpp in commit - ba01de47a7d430764c20ec9ac9600eec0eb38bcf +// std::istringstream with the locale set to "C" +struct number_converter +{ + number_converter() + { + stream.imbue(std::locale("C")); + } + + double stold(const std::string &s) + { + stream.str(s); + stream.clear(); + stream >> result; + return result; + } + + std::istringstream stream; + double result; +}; + + +// to resolve the locale issue with strtod, a little preprocessing of the input is required +struct number_converter_mk2 +{ + explicit number_converter_mk2() + : should_convert_to_comma(std::use_facet>(std::locale{}).decimal_point() == ',') + { + } + + double stold(std::string &s) const noexcept + { + assert(!s.empty()); + if (should_convert_to_comma) + { + auto decimal_pt = std::find(s.begin(), s.end(), '.'); + if (decimal_pt != s.end()) + { + *decimal_pt = ','; + } + } + return strtod(s.c_str(), nullptr); + } + + double stold(const std::string &s) const + { + assert(!s.empty()); + if (!should_convert_to_comma) + { + return strtod(s.c_str(), nullptr); + } + std::string copy(s); + auto decimal_pt = std::find(copy.begin(), copy.end(), '.'); + if (decimal_pt != copy.end()) + { + *decimal_pt = ','; + } + return strtod(copy.c_str(), nullptr); + } + +private: + bool should_convert_to_comma = false; +}; + +using RandFloatStrs = RandomFloatStrs; +// german locale uses ',' as the seperator +using RandFloatCommaStrs = RandomFloatStrs; +} // namespace + +BENCHMARK_F(RandFloatStrs, double_from_string_sstream) +(benchmark::State &state) +{ + number_converter converter; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + converter.stold(get_rand())); + } +} + +// using strotod +// https://en.cppreference.com/w/cpp/string/byte/strtof +// this naive usage is broken in the face of locales (fails condition 1) +#include +BENCHMARK_F(RandFloatStrs, double_from_string_strtod) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + strtod(get_rand().c_str(), nullptr)); + } +} + +BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + converter.stold(get_rand())); + } +} + +BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed_const_ref) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + const std::string &inp = get_rand(); + benchmark::DoNotOptimize( + converter.stold(inp)); + } +} + +// locale names are different between OS's, and std::from_chars is only complete in MSVC +#ifdef _MSC_VER + +#include +BENCHMARK_F(RandFloatStrs, double_from_string_std_from_chars) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + const std::string &input = get_rand(); + double output; + benchmark::DoNotOptimize( + std::from_chars(input.data(), input.data() + input.size(), output)); + } +} + +// not using the standard "C" locale with '.' seperator +BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_ref) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + converter.stold(get_rand())); + } +} + +BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_const_ref) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + const std::string &inp = get_rand(); + benchmark::DoNotOptimize( + converter.stold(inp)); + } +} + +#endif \ No newline at end of file diff --git a/benchmarks/spreadsheet-load.cpp b/benchmarks/spreadsheet-load.cpp index 0caadb025..64318c791 100644 --- a/benchmarks/spreadsheet-load.cpp +++ b/benchmarks/spreadsheet-load.cpp @@ -5,7 +5,7 @@ namespace { using milliseconds_d = std::chrono::duration; -void run_test(const xlnt::path &file, int runs = 10) +void run_load_test(const xlnt::path &file, int runs = 10) { std::cout << file.string() << "\n\n"; @@ -24,10 +24,35 @@ void run_test(const xlnt::path &file, int runs = 10) std::cout << milliseconds_d(test_timings.back()).count() << " ms\n"; } } + +void run_save_test(const xlnt::path &file, int runs = 10) +{ + std::cout << file.string() << "\n\n"; + + xlnt::workbook wb; + wb.load(file); + const xlnt::path save_path(file.filename()); + + std::vector test_timings; + + for (int i = 0; i < runs; ++i) + { + auto start = std::chrono::steady_clock::now(); + + wb.save(save_path); + + auto end = std::chrono::steady_clock::now(); + test_timings.push_back(end - start); + std::cout << milliseconds_d(test_timings.back()).count() << " ms\n"; + } +} } // namespace int main() { - run_test(path_helper::benchmark_file("large.xlsx")); - run_test(path_helper::benchmark_file("very_large.xlsx")); + run_load_test(path_helper::benchmark_file("large.xlsx")); + run_load_test(path_helper::benchmark_file("very_large.xlsx")); + + run_save_test(path_helper::benchmark_file("large.xlsx")); + run_save_test(path_helper::benchmark_file("very_large.xlsx")); } \ No newline at end of file diff --git a/include/xlnt/cell/index_types.hpp b/include/xlnt/cell/index_types.hpp index 2c14a0e00..1834d92e4 100644 --- a/include/xlnt/cell/index_types.hpp +++ b/include/xlnt/cell/index_types.hpp @@ -92,26 +92,11 @@ class XLNT_API column_t /// column_t(const char *column_string); - /// - /// Copy constructor. Constructs a column by copying it from other. - /// - column_t(const column_t &other); - - /// - /// Move constructor. Constructs a column by moving it from other. - /// - column_t(column_t &&other); - /// /// Returns a string representation of this column index. /// std::string column_string() const; - /// - /// Sets this column to be the same as rhs's and return reference to self. - /// - column_t &operator=(column_t rhs); - /// /// Sets this column to be equal to rhs and return reference to self. /// diff --git a/include/xlnt/utils/numeric.hpp b/include/xlnt/utils/numeric.hpp index 28f37a909..cbf030949 100644 --- a/include/xlnt/utils/numeric.hpp +++ b/include/xlnt/utils/numeric.hpp @@ -34,21 +34,6 @@ namespace xlnt { namespace detail { -/// -/// Takes in any number and outputs a string form of that number which will -/// serialise and deserialise without loss of precision -/// -template -std::string serialize_number_to_string(Number num) -{ - // more digits and excel won't match - constexpr int Excel_Digit_Precision = 15; //sf - std::stringstream ss; - ss.precision(Excel_Digit_Precision); - ss << num; - return ss.str(); -} - /// /// constexpr abs /// @@ -117,45 +102,72 @@ bool float_equals(const LNumber &lhs, const RNumber &rhs, return ((lhs + scaled_fuzz) >= rhs) && ((rhs + scaled_fuzz) >= lhs); } -struct number_converter +class number_serialiser { - explicit number_converter() - : should_convert_to_comma(std::use_facet>(std::locale{}).decimal_point() == ',') + static constexpr int Excel_Digit_Precision = 15; //sf + bool should_convert_comma; + + static void convert_comma_to_pt(char *buf, int len) { + char *buf_end = buf + len; + char *decimal = std::find(buf, buf_end, ','); + if (decimal != buf_end) + { + *decimal = '.'; + } } - double stold(std::string &s) const noexcept + static void convert_pt_to_comma(char *buf, size_t len) + { + char *buf_end = buf + len; + char *decimal = std::find(buf, buf_end, '.'); + if (decimal != buf_end) + { + *decimal = ','; + } + } + +public: + explicit number_serialiser() + : should_convert_comma(std::use_facet>(std::locale{}).decimal_point() == ',') + { + } + + std::string serialise(double d) const + { + char buf[30]; + int len = snprintf(buf, sizeof(buf), "%.15g", d); + if (should_convert_comma) + { + convert_comma_to_pt(buf, len); + } + return std::string(buf, static_cast(len)); + } + + double deserialise(std::string &s) const noexcept { assert(!s.empty()); - if (should_convert_to_comma) + if (should_convert_comma) { - auto decimal_pt = std::find(s.begin(), s.end(), '.'); - if (decimal_pt != s.end()) - { - *decimal_pt = ','; - } + // s.data() doesn't have a non-const overload until c++17, hence this little dance + convert_pt_to_comma(&s[0], s.size()); } return strtod(s.c_str(), nullptr); } - double stold(const std::string &s) const + double deserialise(const std::string &s) const { assert(!s.empty()); - if (!should_convert_to_comma) + if (!should_convert_comma) { return strtod(s.c_str(), nullptr); } - std::string copy(s); - auto decimal_pt = std::find(copy.begin(), copy.end(), '.'); - if (decimal_pt != copy.end()) - { - *decimal_pt = ','; - } - return strtod(copy.c_str(), nullptr); + char buf[30]; + assert(s.size() < sizeof(buf)); + auto copy_end = std::copy(s.begin(), s.end(), buf); + convert_pt_to_comma(buf, static_cast(copy_end - buf)); + return strtod(buf, nullptr); } - -private: - bool should_convert_to_comma = false; }; } // namespace detail diff --git a/include/xlnt/worksheet/range_reference.hpp b/include/xlnt/worksheet/range_reference.hpp index 76ba94cf3..907854a8d 100644 --- a/include/xlnt/worksheet/range_reference.hpp +++ b/include/xlnt/worksheet/range_reference.hpp @@ -69,8 +69,6 @@ class XLNT_API range_reference range_reference(column_t column_index_start, row_t row_index_start, column_t column_index_end, row_t row_index_end); - range_reference(const range_reference &ref); - /// /// Returns true if the range has a width and height of 1 cell. /// @@ -151,11 +149,6 @@ class XLNT_API range_reference /// bool operator!=(const char *reference_string) const; - /// - /// Assigns the extents of the provided range to this range. - /// - range_reference &operator=(const range_reference &ref); - private: /// /// The top left cell in the range diff --git a/source/cell/cell.cpp b/source/cell/cell.cpp index ef6e7b241..e47f2a939 100644 --- a/source/cell/cell.cpp +++ b/source/cell/cell.cpp @@ -199,7 +199,7 @@ cell::cell(detail::cell_impl *d) bool cell::garbage_collectible() const { - return !(has_value() || is_merged() || phonetics_visible() || has_formula() || has_format() || has_hyperlink()); + return d_->is_garbage_collectible(); } void cell::value(std::nullptr_t) diff --git a/source/cell/index_types.cpp b/source/cell/index_types.cpp index 3c306a004..cecc6b0e3 100644 --- a/source/cell/index_types.cpp +++ b/source/cell/index_types.cpp @@ -109,27 +109,11 @@ column_t::column_t(const char *column_string) { } -column_t::column_t(const column_t &other) - : column_t(other.index) -{ -} - -column_t::column_t(column_t &&other) -{ - swap(*this, other); -} - std::string column_t::column_string() const { return column_string_from_index(index); } -column_t &column_t::operator=(column_t rhs) -{ - swap(*this, rhs); - return *this; -} - column_t &column_t::operator=(const std::string &rhs) { return *this = column_t(rhs); diff --git a/source/detail/header_footer/header_footer_code.cpp b/source/detail/header_footer/header_footer_code.cpp index f7560eb82..014234de0 100644 --- a/source/detail/header_footer/header_footer_code.cpp +++ b/source/detail/header_footer/header_footer_code.cpp @@ -27,7 +27,7 @@ namespace xlnt { namespace detail { -std::array, 3> decode_header_footer(const std::string &hf_string) +std::array, 3> decode_header_footer(const std::string &hf_string, const number_serialiser &serialiser) { std::array, 3> result; @@ -216,7 +216,7 @@ std::array, 3> decode_header_footer(const std::s tokens.push_back(token); } - const auto parse_section = [&tokens, &result](hf_code code) { + const auto parse_section = [&tokens, &result, &serialiser](hf_code code) { std::vector end_codes{hf_code::left_section, hf_code::center_section, hf_code::right_section}; end_codes.erase(std::find(end_codes.begin(), end_codes.end(), code)); @@ -297,7 +297,7 @@ std::array, 3> decode_header_footer(const std::s current_run.second = xlnt::font(); } - current_run.second.get().size(std::stod(current_token.value)); + current_run.second.get().size(serialiser.deserialise(current_token.value)); break; } @@ -460,7 +460,7 @@ std::array, 3> decode_header_footer(const std::s return result; } -std::string encode_header_footer(const rich_text &t, header_footer::location where) +std::string encode_header_footer(const rich_text &t, header_footer::location where, const number_serialiser &serialiser) { const auto location_code_map = std::unordered_map #include #include +#include namespace xlnt { namespace detail { -std::array, 3> decode_header_footer(const std::string &hf_string); -std::string encode_header_footer(const rich_text &t, header_footer::location where); +std::array, 3> decode_header_footer(const std::string &hf_string, const number_serialiser &serialiser); +std::string encode_header_footer(const rich_text &t, header_footer::location where, const number_serialiser& serialiser); } // namespace detail } // namespace xlnt diff --git a/source/detail/implementations/cell_impl.hpp b/source/detail/implementations/cell_impl.hpp index 52e594443..110974147 100644 --- a/source/detail/implementations/cell_impl.hpp +++ b/source/detail/implementations/cell_impl.hpp @@ -47,7 +47,7 @@ struct cell_impl cell_impl(cell_impl &&other) = default; cell_impl &operator=(const cell_impl &other) = default; cell_impl &operator=(cell_impl &&other) = default; - + cell_type type_; worksheet_impl *parent_; @@ -65,6 +65,11 @@ struct cell_impl optional hyperlink_; optional format_; optional comment_; + + bool is_garbage_collectible() const + { + return !(type_ != cell_type::empty || is_merged_ || phonetics_visible_ || formula_.is_set() || format_.is_set() || hyperlink_.is_set()); + } }; inline bool operator==(const cell_impl &lhs, const cell_impl &rhs) diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp index ea64b1318..bde76349f 100644 --- a/source/detail/serialization/xlsx_consumer.cpp +++ b/source/detail/serialization/xlsx_consumer.cpp @@ -307,14 +307,14 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) } // inside element -std::pair parse_row(xml::parser *parser, xlnt::detail::number_converter &converter, std::vector &parsed_cells) +std::pair parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector &parsed_cells) { std::pair props; for (auto &attr : parser->attribute_map()) { if (string_equal(attr.first.name(), "dyDescent")) { - props.first.dy_descent = converter.stold(attr.second.value); + props.first.dy_descent = converter.deserialise(attr.second.value); } else if (string_equal(attr.first.name(), "spans")) { @@ -322,7 +322,7 @@ std::pair parse_row(xml::parser *parser, xlnt::detail } else if (string_equal(attr.first.name(), "ht")) { - props.first.height = converter.stold(attr.second.value); + props.first.height = converter.deserialise(attr.second.value); } else if (string_equal(attr.first.name(), "s")) { @@ -382,7 +382,7 @@ std::pair parse_row(xml::parser *parser, xlnt::detail } // inside element -Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_converter &converter) +Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser &converter) { Sheet_Data sheet_data; int level = 1; // nesting level @@ -480,7 +480,7 @@ cell xlsx_consumer::read_cell() if (parser().attribute_present("ht")) { - row_properties.height = converter_.stold(parser().attribute("ht")); + row_properties.height = converter_.deserialise(parser().attribute("ht")); } if (parser().attribute_present("customHeight")) @@ -495,7 +495,7 @@ cell xlsx_consumer::read_cell() if (parser().attribute_present(qn("x14ac", "dyDescent"))) { - row_properties.dy_descent = converter_.stold(parser().attribute(qn("x14ac", "dyDescent"))); + row_properties.dy_descent = converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent"))); } if (parser().attribute_present("spans")) @@ -602,7 +602,7 @@ cell xlsx_consumer::read_cell() } else if (type == "s") { - cell.d_->value_numeric_ = converter_.stold(value_string); + cell.d_->value_numeric_ = converter_.deserialise(value_string); cell.data_type(cell::type::shared_string); } else if (type == "b") // boolean @@ -611,7 +611,7 @@ cell xlsx_consumer::read_cell() } else if (type == "n") // numeric { - cell.value(converter_.stold(value_string)); + cell.value(converter_.deserialise(value_string)); } else if (!value_string.empty() && value_string[0] == '#') { @@ -863,23 +863,23 @@ std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id) if (parser().attribute_present("baseColWidth")) { ws.d_->format_properties_.base_col_width = - converter_.stold(parser().attribute("baseColWidth")); + converter_.deserialise(parser().attribute("baseColWidth")); } if (parser().attribute_present("defaultColWidth")) { ws.d_->format_properties_.default_column_width = - converter_.stold(parser().attribute("defaultColWidth")); + converter_.deserialise(parser().attribute("defaultColWidth")); } if (parser().attribute_present("defaultRowHeight")) { ws.d_->format_properties_.default_row_height = - converter_.stold(parser().attribute("defaultRowHeight")); + converter_.deserialise(parser().attribute("defaultRowHeight")); } if (parser().attribute_present(qn("x14ac", "dyDescent"))) { ws.d_->format_properties_.dy_descent = - converter_.stold(parser().attribute(qn("x14ac", "dyDescent"))); + converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent"))); } skip_attributes(); @@ -899,7 +899,7 @@ std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id) optional width = [this](xml::parser &p) -> xlnt::optional { if (p.attribute_present("width")) { - return (converter_.stold(p.attribute("width")) * 7 - 5) / 7; + return (converter_.deserialise(p.attribute("width")) * 7 - 5) / 7; } return xlnt::optional(); }(parser()); @@ -1000,7 +1000,7 @@ void xlsx_consumer::read_worksheet_sheetdata() case cell::type::empty: case cell::type::number: case cell::type::date: { - ws_cell_impl->value_numeric_ = converter_.stold(cell.value); + ws_cell_impl->value_numeric_ = converter_.deserialise(cell.value); break; } case cell::type::shared_string: { @@ -1196,12 +1196,12 @@ worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id) { page_margins margins; - margins.top(converter_.stold(parser().attribute("top"))); - margins.bottom(converter_.stold(parser().attribute("bottom"))); - margins.left(converter_.stold(parser().attribute("left"))); - margins.right(converter_.stold(parser().attribute("right"))); - margins.header(converter_.stold(parser().attribute("header"))); - margins.footer(converter_.stold(parser().attribute("footer"))); + margins.top(converter_.deserialise(parser().attribute("top"))); + margins.bottom(converter_.deserialise(parser().attribute("bottom"))); + margins.left(converter_.deserialise(parser().attribute("left"))); + margins.right(converter_.deserialise(parser().attribute("right"))); + margins.header(converter_.deserialise(parser().attribute("header"))); + margins.footer(converter_.deserialise(parser().attribute("footer"))); ws.page_margins(margins); } @@ -1251,27 +1251,27 @@ worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id) if (current_hf_element == qn("spreadsheetml", "oddHeader")) { - odd_header = decode_header_footer(read_text()); + odd_header = decode_header_footer(read_text(), converter_); } else if (current_hf_element == qn("spreadsheetml", "oddFooter")) { - odd_footer = decode_header_footer(read_text()); + odd_footer = decode_header_footer(read_text(), converter_); } else if (current_hf_element == qn("spreadsheetml", "evenHeader")) { - even_header = decode_header_footer(read_text()); + even_header = decode_header_footer(read_text(), converter_); } else if (current_hf_element == qn("spreadsheetml", "evenFooter")) { - even_footer = decode_header_footer(read_text()); + even_footer = decode_header_footer(read_text(), converter_); } else if (current_hf_element == qn("spreadsheetml", "firstHeader")) { - first_header = decode_header_footer(read_text()); + first_header = decode_header_footer(read_text(), converter_); } else if (current_hf_element == qn("spreadsheetml", "firstFooter")) { - first_footer = decode_header_footer(read_text()); + first_footer = decode_header_footer(read_text(), converter_); } else { @@ -2308,7 +2308,7 @@ void xlsx_consumer::read_stylesheet() while (in_element(qn("spreadsheetml", "gradientFill"))) { expect_start_element(qn("spreadsheetml", "stop"), xml::content::complex); - auto position = converter_.stold(parser().attribute("position")); + auto position = converter_.deserialise(parser().attribute("position")); expect_start_element(qn("spreadsheetml", "color"), xml::content::complex); auto color = read_color(); expect_end_element(qn("spreadsheetml", "color")); @@ -2356,7 +2356,7 @@ void xlsx_consumer::read_stylesheet() if (font_property_element == qn("spreadsheetml", "sz")) { - new_font.size(converter_.stold(parser().attribute("val"))); + new_font.size(converter_.deserialise(parser().attribute("val"))); } else if (font_property_element == qn("spreadsheetml", "name")) { @@ -3169,7 +3169,7 @@ rich_text xlsx_consumer::read_rich_text(const xml::qname &parent) if (current_run_property_element == xml::qname(xmlns, "sz")) { - run.second.get().size(converter_.stold(parser().attribute("val"))); + run.second.get().size(converter_.deserialise(parser().attribute("val"))); } else if (current_run_property_element == xml::qname(xmlns, "rFont")) { @@ -3307,7 +3307,7 @@ xlnt::color xlsx_consumer::read_color() if (parser().attribute_present("tint")) { - result.tint(converter_.stold(parser().attribute("tint"))); + result.tint(converter_.deserialise(parser().attribute("tint"))); } return result; diff --git a/source/detail/serialization/xlsx_consumer.hpp b/source/detail/serialization/xlsx_consumer.hpp index 963e1361e..c9a987ee2 100644 --- a/source/detail/serialization/xlsx_consumer.hpp +++ b/source/detail/serialization/xlsx_consumer.hpp @@ -416,7 +416,7 @@ class xlsx_consumer detail::cell_impl *current_cell_; detail::worksheet_impl *current_worksheet_; - number_converter converter_; + number_serialiser converter_; }; } // namespace detail diff --git a/source/detail/serialization/xlsx_producer.cpp b/source/detail/serialization/xlsx_producer.cpp index e0a52870e..1646b70bc 100644 --- a/source/detail/serialization/xlsx_producer.cpp +++ b/source/detail/serialization/xlsx_producer.cpp @@ -2267,16 +2267,18 @@ void xlsx_producer::write_worksheet(const relationship &rel) { write_attribute("enableFormatConditionsCalculation", props.enable_format_condition_calculation.get()); } - + // outlinePr is optional in the spec but is being written every time? write_start_element(xmlns, "outlinePr"); write_attribute("summaryBelow", "1"); write_attribute("summaryRight", "1"); write_end_element(xmlns, "outlinePr"); - write_start_element(xmlns, "pageSetUpPr"); - write_attribute("fitToPage", write_bool(ws.page_setup().fit_to_page())); - write_end_element(xmlns, "pageSetUpPr"); - + if (ws.has_page_setup()) + { + write_start_element(xmlns, "pageSetUpPr"); + write_attribute("fitToPage", write_bool(ws.page_setup().fit_to_page())); + write_end_element(xmlns, "pageSetUpPr"); + } write_end_element(xmlns, "sheetPr"); } @@ -2418,7 +2420,7 @@ void xlsx_producer::write_worksheet(const relationship &rel) if (props.width.is_set()) { double width = (props.width.get() * 7 + 5) / 7; - write_attribute("width", serialize_number_to_string(width)); + write_attribute("width", converter_.serialise(width)); } if (props.best_fit) @@ -2481,12 +2483,19 @@ void xlsx_producer::write_worksheet(const relationship &rel) { for (auto column = dimension.top_left().column(); column <= dimension.bottom_right().column(); ++column) { - if (!ws.has_cell(cell_reference(column, check_row))) continue; - auto cell = ws.cell(cell_reference(column, check_row)); - if (cell.garbage_collectible()) continue; + auto ref = cell_reference(column, check_row); + auto cell = ws.d_->cell_map_.find(ref); + if (cell == ws.d_->cell_map_.end()) + { + continue; + } + if (cell->second.is_garbage_collectible()) + { + continue; + } - first_block_column = std::min(first_block_column, cell.column()); - last_block_column = std::max(last_block_column, cell.column()); + first_block_column = std::min(first_block_column, cell->second.column_); + last_block_column = std::max(last_block_column, cell->second.column_); if (row == check_row) { @@ -2520,7 +2529,7 @@ void xlsx_producer::write_worksheet(const relationship &rel) if (props.height.is_set()) { auto height = props.height.get(); - write_attribute("ht", serialize_number_to_string(height)); + write_attribute("ht", converter_.serialise(height)); } if (props.hidden) @@ -2647,7 +2656,7 @@ void xlsx_producer::write_worksheet(const relationship &rel) case cell::type::number: write_start_element(xmlns, "v"); - write_characters(serialize_number_to_string(cell.value())); + write_characters(converter_.serialise(cell.value())); write_end_element(xmlns, "v"); break; @@ -2884,26 +2893,26 @@ void xlsx_producer::write_worksheet(const relationship &rel) { if (hf.has_odd_even_header(location)) { - odd_header.append(encode_header_footer(hf.odd_header(location), location)); - even_header.append(encode_header_footer(hf.even_header(location), location)); + odd_header.append(encode_header_footer(hf.odd_header(location), location, converter_)); + even_header.append(encode_header_footer(hf.even_header(location), location, converter_)); } if (hf.has_odd_even_footer(location)) { - odd_footer.append(encode_header_footer(hf.odd_footer(location), location)); - even_footer.append(encode_header_footer(hf.even_footer(location), location)); + odd_footer.append(encode_header_footer(hf.odd_footer(location), location, converter_)); + even_footer.append(encode_header_footer(hf.even_footer(location), location, converter_)); } } else { if (hf.has_header(location)) { - odd_header.append(encode_header_footer(hf.header(location), location)); + odd_header.append(encode_header_footer(hf.header(location), location, converter_)); } if (hf.has_footer(location)) { - odd_footer.append(encode_header_footer(hf.footer(location), location)); + odd_footer.append(encode_header_footer(hf.footer(location), location, converter_)); } } @@ -2911,12 +2920,12 @@ void xlsx_producer::write_worksheet(const relationship &rel) { if (hf.has_first_page_header(location)) { - first_header.append(encode_header_footer(hf.first_page_header(location), location)); + first_header.append(encode_header_footer(hf.first_page_header(location), location, converter_)); } if (hf.has_first_page_footer(location)) { - first_footer.append(encode_header_footer(hf.first_page_footer(location), location)); + first_footer.append(encode_header_footer(hf.first_page_footer(location), location, converter_)); } } } @@ -3383,7 +3392,7 @@ void xlsx_producer::write_color(const xlnt::color &color) } if (color.has_tint()) { - write_attribute("tint", serialize_number_to_string(color.tint())); + write_attribute("tint", converter_.serialise(color.tint())); } } diff --git a/source/detail/serialization/xlsx_producer.hpp b/source/detail/serialization/xlsx_producer.hpp index 296025b9c..4660f3c2d 100644 --- a/source/detail/serialization/xlsx_producer.hpp +++ b/source/detail/serialization/xlsx_producer.hpp @@ -30,6 +30,7 @@ #include #include +#include namespace xml { class serializer; @@ -208,6 +209,7 @@ class xlsx_producer detail::cell_impl *current_cell_; detail::worksheet_impl *current_worksheet_; + detail::number_serialiser converter_; }; } // namespace detail diff --git a/source/worksheet/range_reference.cpp b/source/worksheet/range_reference.cpp index c53a33efb..490cac55c 100644 --- a/source/worksheet/range_reference.cpp +++ b/source/worksheet/range_reference.cpp @@ -46,12 +46,6 @@ range_reference::range_reference(const char *range_string) { } -range_reference::range_reference(const range_reference &ref) -{ - top_left_ = ref.top_left_; - bottom_right_ = ref.bottom_right_; -} - range_reference::range_reference(const std::string &range_string) : top_left_("A1"), bottom_right_("A1") { @@ -183,11 +177,4 @@ XLNT_API bool operator!=(const char *reference_string, const range_reference &re return ref != reference_string; } -range_reference &range_reference::operator=(const range_reference &ref) -{ - top_left_ = ref.top_left_; - bottom_right_ = ref.bottom_right_; - return *this; -} - } // namespace xlnt diff --git a/source/worksheet/worksheet.cpp b/source/worksheet/worksheet.cpp index 866f02076..bb7141e2a 100644 --- a/source/worksheet/worksheet.cpp +++ b/source/worksheet/worksheet.cpp @@ -579,8 +579,40 @@ column_t worksheet::highest_column_or_props() const range_reference worksheet::calculate_dimension() const { - return range_reference(lowest_column(), lowest_row_or_props(), - highest_column(), highest_row_or_props()); + // partially optimised version of: + // return range_reference(lowest_column(), lowest_row_or_props(), + // highest_column(), highest_row_or_props()); + // + if (d_->cell_map_.empty() && d_->row_properties_.empty()) + { + return range_reference(constants::min_column(), constants::min_row(), + constants::min_column(), constants::min_row()); + } + row_t min_row_prop = constants::max_row(); + row_t max_row_prop = constants::min_row(); + for (const auto &row_prop : d_->row_properties_) + { + min_row_prop = std::min(min_row_prop, row_prop.first); + max_row_prop = std::max(max_row_prop, row_prop.first); + } + if (d_->cell_map_.empty()) + { + return range_reference(constants::min_column(), min_row_prop, + constants::min_column(), max_row_prop); + } + // find min and max row/column in cell map + column_t min_col = constants::max_column(); + column_t max_col = constants::min_column(); + row_t min_row = min_row_prop; + row_t max_row = max_row_prop; + for (auto &c : d_->cell_map_) + { + min_col = std::min(min_col, c.second.column_); + max_col = std::max(max_col, c.second.column_); + min_row = std::min(min_row, c.second.row_); + max_row = std::max(max_row, c.second.row_); + } + return range_reference(min_col, min_row, max_col, max_row); } range worksheet::range(const std::string &reference_string) diff --git a/tests/detail/numeric_util_test_suite.cpp b/tests/detail/numeric_util_test_suite.cpp index 1d8026bab..c2536f840 100644 --- a/tests/detail/numeric_util_test_suite.cpp +++ b/tests/detail/numeric_util_test_suite.cpp @@ -40,17 +40,18 @@ class numeric_test_suite : public test_suite void test_serialise_number() { + xlnt::detail::number_serialiser serialiser; // excel serialises numbers as floating point values with <= 15 digits of precision - xlnt_assert(xlnt::detail::serialize_number_to_string(1) == "1"); + xlnt_assert(serialiser.serialise(1) == "1"); // trailing zeroes are ignored - xlnt_assert(xlnt::detail::serialize_number_to_string(1.0) == "1"); - xlnt_assert(xlnt::detail::serialize_number_to_string(1.0f) == "1"); + xlnt_assert(serialiser.serialise(1.0) == "1"); + xlnt_assert(serialiser.serialise(1.0f) == "1"); // one to 1 relation - xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456) == "1.23456"); - xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456789012345) == "1.23456789012345"); - xlnt_assert(xlnt::detail::serialize_number_to_string(123456.789012345) == "123456.789012345"); - xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456789012345e+67) == "1.23456789012345e+67"); - xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456789012345e-67) == "1.23456789012345e-67"); + xlnt_assert(serialiser.serialise(1.23456) == "1.23456"); + xlnt_assert(serialiser.serialise(1.23456789012345) == "1.23456789012345"); + xlnt_assert(serialiser.serialise(123456.789012345) == "123456.789012345"); + xlnt_assert(serialiser.serialise(1.23456789012345e+67) == "1.23456789012345e+67"); + xlnt_assert(serialiser.serialise(1.23456789012345e-67) == "1.23456789012345e-67"); } void test_float_equals_zero()