Skip to content

Commit

Permalink
tools: Add filter_iterator utility
Browse files Browse the repository at this point in the history
This adds the filter_iterator input iterator adapter to filter input
values. With the additional is_not_space predicate these restores
the option to filter-out whitespace before parsing hex.
This is useful e.g. when loading hex from a file.
  • Loading branch information
chfast committed Jun 2, 2022
1 parent 9a604de commit ebd8622
Show file tree
Hide file tree
Showing 6 changed files with 222 additions and 6 deletions.
1 change: 1 addition & 0 deletions test/unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ add_executable(
loader_mock.h
loader_test.cpp
mocked_host_test.cpp
filter_iterator_test.cpp
tooling_test.cpp
hex_test.cpp
)
Expand Down
107 changes: 107 additions & 0 deletions test/unittests/filter_iterator_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// EVMC: Ethereum Client-VM Connector API.
// Copyright 2022 The EVMC Authors.
// Licensed under the Apache License, Version 2.0.

#include <tools/evmc/filter_iterator.hpp>
#include <gtest/gtest.h>
#include <cctype>

using evmc::skip_space_iterator;

namespace
{
std::string remove_space(std::string_view in)
{
// Copy input to additional buffer. This helps with out-of-buffer reads detection by sanitizers.
const std::vector<char> in_buffer(in.begin(), in.end());

// Filter the input.
std::string out;
std::copy(skip_space_iterator{in_buffer.begin(), in_buffer.end()},
skip_space_iterator{in_buffer.end(), in_buffer.end()}, std::back_inserter(out));
return out;
}

bool is_positive(int x) noexcept
{
return x > 0;
}
} // namespace


TEST(filter_iterator, filter_positive_integers)
{
std::vector<int> in{1, 0, 0, 2, -3, 3, 4, 5, 0, 6, 7, -1, -2, 0, 8, 9, -10};
std::vector<int> out;

using iter = evmc::filter_iterator<std::vector<int>::const_iterator, is_positive>;
std::copy(iter{in.begin(), in.end()}, iter{in.end(), in.end()}, std::back_inserter(out));
ASSERT_EQ(out.size(), 9u);
EXPECT_EQ(out[0], 1);
EXPECT_EQ(out[1], 2);
EXPECT_EQ(out[2], 3);
EXPECT_EQ(out[3], 4);
EXPECT_EQ(out[4], 5);
EXPECT_EQ(out[5], 6);
EXPECT_EQ(out[6], 7);
EXPECT_EQ(out[7], 8);
EXPECT_EQ(out[8], 9);
}


TEST(skip_space_iterator, empty)
{
EXPECT_EQ(remove_space(""), "");
EXPECT_EQ(remove_space(" "), "");
EXPECT_EQ(remove_space(" "), "");
}

TEST(skip_space_iterator, filter_middle)
{
EXPECT_EQ(remove_space("x y"), "xy");
EXPECT_EQ(remove_space("x y"), "xy");
}

TEST(skip_space_iterator, filter_front)
{
EXPECT_EQ(remove_space(" x"), "x");
EXPECT_EQ(remove_space(" x"), "x");
}

TEST(skip_space_iterator, filter_back)
{
EXPECT_EQ(remove_space("x "), "x");
EXPECT_EQ(remove_space("x "), "x");
}

TEST(skip_space_iterator, filter_mixed)
{
EXPECT_EQ(remove_space(" x y z "), "xyz");
EXPECT_EQ(remove_space(" x y z "), "xyz");
}

TEST(skip_space_iterator, isspace)
{
// Test internal isspace() compliance with std::isspace().
// The https://en.cppreference.com/w/cpp/string/byte/isspace has the list of "space" characters.

for (int i = int{std::numeric_limits<char>::min()}; i <= std::numeric_limits<char>::max(); ++i)
{
const auto c = static_cast<char>(i);
EXPECT_EQ(evmc::isspace(c), (std::isspace(c) != 0));
switch (c)
{
case ' ':
case '\f':
case '\n':
case '\r':
case '\t':
case '\v':
EXPECT_TRUE(evmc::isspace(c));
break;
default:
EXPECT_FALSE(evmc::isspace(c));
break;
}
}
}
18 changes: 18 additions & 0 deletions test/unittests/hex_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// Licensed under the Apache License, Version 2.0.

#include <evmc/hex.hpp>
#include <tools/evmc/filter_iterator.hpp>
#include <gtest/gtest.h>

using namespace evmc;
Expand Down Expand Up @@ -72,3 +73,20 @@ TEST(hex, validate_hex)
EXPECT_FALSE(validate_hex("0"));
EXPECT_FALSE(validate_hex("WXYZ"));
}

TEST(hex, from_hex_skip_space)
{
// Combine from_hex with skip_space_iterator.
static constexpr auto from_hex_skip_space = [](std::string_view hex) {
bytes out;
const auto status =
from_hex(skip_space_iterator{hex.begin(), hex.end()},
skip_space_iterator{hex.end(), hex.end()}, std::back_inserter(out));
EXPECT_TRUE(status);
return out;
};
EXPECT_EQ(from_hex_skip_space("0x010203"), (bytes{0x01, 0x02, 0x03}));
EXPECT_EQ(from_hex_skip_space("0x 010203 "), (bytes{0x01, 0x02, 0x03}));
EXPECT_EQ(from_hex_skip_space(" 0 x 0 1 0 2 0 3 "), (bytes{0x01, 0x02, 0x03}));
EXPECT_EQ(from_hex_skip_space("\f 0\r x 0 1\t 0 2 \v0 3 \n"), (bytes{0x01, 0x02, 0x03}));
}
2 changes: 1 addition & 1 deletion tools/evmc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
hunter_add_package(CLI11)
find_package(CLI11 REQUIRED)

add_executable(evmc-tool main.cpp)
add_executable(evmc-tool main.cpp filter_iterator.hpp)
add_executable(evmc::tool ALIAS evmc-tool)
set_target_properties(evmc-tool PROPERTIES OUTPUT_NAME evmc)
set_source_files_properties(main.cpp PROPERTIES
Expand Down
88 changes: 88 additions & 0 deletions tools/evmc/filter_iterator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// EVMC: Ethereum Client-VM Connector API.
// Copyright 2022 The EVMC Authors.
// Licensed under the Apache License, Version 2.0.
#pragma once

#include <iterator>

namespace evmc
{
/// The constexpr variant of std::isspace().
inline constexpr bool isspace(char ch) noexcept
{
// Implementation taken from LLVM's libc.
return ch == ' ' || (static_cast<unsigned>(ch) - '\t') < 5;
}

inline constexpr bool is_not_space(char ch) noexcept
{
return !isspace(ch);
}

/// The filter iterator adaptor creates a view of an iterator range in which some elements of the
/// range are skipped. A predicate function controls which elements are skipped. When the predicate
/// is applied to an element, if it returns true then the element is retained and if it returns
/// false then the element is skipped over. When skipping over elements, it is necessary for the
/// filter adaptor to know when to stop so as to avoid going past the end of the underlying range.
/// A filter iterator is therefore constructed with pair of iterators indicating the range of
/// elements in the unfiltered sequence to be traversed.
///
/// Similar to boost::filter_iterator.
template <typename BaseIterator,
bool predicate(typename std::iterator_traits<BaseIterator>::value_type) noexcept>
struct filter_iterator
{
using difference_type = typename std::iterator_traits<BaseIterator>::difference_type;
using value_type = typename std::iterator_traits<BaseIterator>::value_type;
using pointer = typename std::iterator_traits<BaseIterator>::pointer;
using reference = typename std::iterator_traits<BaseIterator>::reference;
using iterator_category = std::input_iterator_tag;

private:
BaseIterator base;
BaseIterator base_end;
value_type value;

constexpr void forward_to_next_value() noexcept
{
for (; base != base_end; ++base)
{
value = *base;
if (predicate(value))
break;
}
}

public:
constexpr filter_iterator(BaseIterator it, BaseIterator end) noexcept : base{it}, base_end{end}
{
forward_to_next_value();
}

constexpr auto operator*() noexcept
{
// We should not read from an input base iterator twice. So the only read is in
// forward_to_next_value() and here we return the cached value.
return value;
}

constexpr void operator++() noexcept
{
++base;
forward_to_next_value();
}

constexpr bool operator!=(const filter_iterator& o) noexcept { return base != o.base; }
constexpr bool operator==(const filter_iterator& o) noexcept { return base == o.base; }
};

/// The input filter iterator which skips whitespace characters from the base input iterator.
template <typename BaseIterator>
struct skip_space_iterator : filter_iterator<BaseIterator, is_not_space>
{
using filter_iterator<BaseIterator, is_not_space>::filter_iterator;
};

template <typename BaseIterator>
skip_space_iterator(BaseIterator, BaseIterator) -> skip_space_iterator<BaseIterator>;
} // namespace evmc
12 changes: 7 additions & 5 deletions tools/evmc/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Copyright 2019-2020 The EVMC Authors.
// Licensed under the Apache License, Version 2.0.

#include "filter_iterator.hpp"
#include <CLI/CLI.hpp>
#include <evmc/hex.hpp>
#include <evmc/loader.h>
Expand All @@ -19,12 +20,13 @@ evmc::bytes load_from_hex(const std::string& str)
{
const auto path = str.substr(1);
std::ifstream file{path};
const std::string content{std::istreambuf_iterator<char>{file},
std::istreambuf_iterator<char>{}};
auto o = evmc::from_hex(content);
if (!o)
const std::istreambuf_iterator<char> file_begin{file};
const std::istreambuf_iterator<char> file_end;
evmc::bytes out;
if (!evmc::from_hex(evmc::skip_space_iterator{file_begin, file_end},
evmc::skip_space_iterator{file_end, file_end}, std::back_inserter(out)))
throw std::invalid_argument{"invalid hex in " + path};
return std::move(*o);
return out;
}

return evmc::from_hex(str).value(); // Should be validated already.
Expand Down

0 comments on commit ebd8622

Please sign in to comment.