Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

removing movemask from platform #2302

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,8 @@ if (BUILD_TESTS OR BUILD_BENCHMARKS)
DIRECTORY algorithm/simd/detail/test/
TEST algorithm_simd_detail_simd_any_of_test SOURCES SimdAnyOfTest.cpp
TEST algorithm_simd_detail_simd_for_each_test SOURCES SimdForEachTest.cpp
TEST algorithm_simd_detail_simd_for_each_test SOURCES SimdForEachTest.cpp
TEST algorithm_simd_detail_ignore_test SOURCES IgnoreTest.cpp
TEST algorithm_simd_detail_unroll_utils_test SOURCES UnrollUtilsTest.cpp
# disabled until C++20
# TEST algorithm_simd_detail_simd_traits_test SOURCES TraitsTest.cpp
Expand Down
2 changes: 1 addition & 1 deletion folly/algorithm/simd/FindFixed.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ std::optional<std::size_t> findSplitFirstRegister(

template <typename Scalar, typename Reg>
std::optional<std::size_t> firstTrue(Reg reg) {
auto [bits, bitsPerElement] = folly::movemask<Scalar>(reg);
auto [bits, bitsPerElement] = folly::simd::movemask<Scalar>(reg);
if (bits) {
return std::countr_zero(bits) / bitsPerElement();
}
Expand Down
21 changes: 16 additions & 5 deletions folly/algorithm/simd/Movemask.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
FOLLY_PUSH_WARNING
FOLLY_GCC_DISABLE_WARNING("-Wignored-attributes")

namespace folly {
namespace folly::simd {

/*
* This is a low level utility used for simd search algorithms.
Expand All @@ -43,7 +43,7 @@ namespace folly {
* for both x86 and arm.
*
* Interface looks like this:
* folly::movemask<-scalar type->(nativeRegister)
* folly::simd::movemask<-scalar type->(nativeRegister)
* -> std::pair<Bits, BitsPerElement>;
*
* Bits - unsigned integral, containing the bitmask (first is lowest bit).
Expand All @@ -53,7 +53,7 @@ namespace folly {
*
* std::optional<std::uint32_t> firstTrueUint16(auto simdRegister) {
* auto [bits, bitsPerElement] =
* folly::movemask<std::uint16_t>(simdRegister);
* folly::simd::movemask<std::uint16_t>(simdRegister);
* if (!bits) {
* return std::nullopt;
* }
Expand All @@ -71,7 +71,11 @@ template <typename Scalar, typename Reg>
auto movemask(Reg reg) {
std::integral_constant<std::uint32_t, sizeof(Scalar) == 2 ? 2 : 1>
bitsPerElement;
auto mmask = static_cast<std::uint32_t>([&] {

using uint_t = std::
conditional_t<std::is_same_v<Reg, __m128i>, std::uint16_t, std::uint32_t>;

auto mmask = static_cast<uint_t>([&] {
if constexpr (std::is_same_v<Reg, __m128i>) {
if constexpr (sizeof(Scalar) <= 2) {
return _mm_movemask_epi8(reg);
Expand Down Expand Up @@ -142,6 +146,13 @@ auto movemask(Reg reg) {

#endif

} // namespace folly
#if !FOLLY_X64 && !FOLLY_AARCH64

template <typename Scalar, typename Reg>
void movemask(Reg reg) = delete;

#endif

} // namespace folly::simd

FOLLY_POP_WARNING
13 changes: 11 additions & 2 deletions folly/algorithm/simd/detail/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")

oncall("fbcode_entropy_wardens_folly")

cpp_library(
name = "ignore",
headers = ["Ignore.h"],
exported_deps = [
"//folly/lang:bits",
],
)

cpp_library(
name = "simd_any_of",
headers = ["SimdAnyOf.h"],
Expand All @@ -19,7 +27,7 @@ cpp_library(
name = "simd_char_platform",
headers = ["SimdCharPlatform.h"],
exported_deps = [
":simd_for_each",
":ignore",
"//folly:portability",
"//folly/algorithm/simd:movemask",
"//folly/lang:bits",
Expand All @@ -30,9 +38,10 @@ cpp_library(
name = "simd_for_each",
headers = ["SimdForEach.h"],
exported_deps = [
":ignore",
":unroll_utils",
"//folly:c_portability",
"//folly:traits",
"//folly/algorithm/simd/detail:unroll_utils",
],
)

Expand Down
61 changes: 61 additions & 0 deletions folly/algorithm/simd/detail/Ignore.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <folly/lang/Bits.h>

#include <type_traits>

namespace folly::simd::detail {

/**
* ignore(_none/_extrema)
*
* Tag types for handling the tails.
* ignore_none indicates that the whole register is used.
* ignore_extrema.first, .last show how many elements are out of the data.
*
* For example 3 elements, starting from the second for an 8 element register
* will be ignore_extrema{.first = 1, .last = 4}
*/

struct ignore_extrema {
int first = 0;
int last = 0;
};

struct ignore_none {};

/*
* NOTE: for ignore none we don't clear anything, even if some bits are not
* doing anything. We expect mmask to only have zeroes in masked out elements.
*
* Maybe we need to revisit that at some point.
*/
template <int Cardinal, typename Uint, typename BitsPerElement, typename Ignore>
void mmaskClearIgnored(std::pair<Uint, BitsPerElement>& mmask, Ignore ignore) {
if constexpr (std::is_same_v<Ignore, ignore_extrema>) {
mmask.first = set_rzero(mmask.first, ignore.first * BitsPerElement{});

static constexpr int kTopBitsAlwaysIgnored =
sizeof(Uint) * 8 - Cardinal * BitsPerElement{};
mmask.first = set_lzero(
mmask.first, ignore.last * BitsPerElement{} + kTopBitsAlwaysIgnored);
}
}

} // namespace folly::simd::detail
49 changes: 7 additions & 42 deletions folly/algorithm/simd/detail/SimdCharPlatform.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

#include <folly/Portability.h>
#include <folly/algorithm/simd/Movemask.h>
#include <folly/algorithm/simd/detail/SimdForEach.h>
#include <folly/algorithm/simd/detail/Ignore.h>
#include <folly/algorithm/simd/detail/SimdCharPlatform.h>
#include <folly/lang/Bits.h>

#include <array>
Expand Down Expand Up @@ -68,47 +69,16 @@ namespace simd::detail {
* - le_unsigned(reg_t, char) - by lane less than or equal to char.
*
* logical ops:
* - movemask - take a bitmask
* - any(logical_t, ignore) - return true if any the lanes are true
* - logical_or(logical_t, logical_t) - by lane logical or
*
* mmask ops:
* - clear(mmask, ignore) - sets ignored bits to 0
*
*/

#if FOLLY_X64 || FOLLY_AARCH64

template <typename Platform>
struct SimdCharPlatformCommon : Platform {
using logical_t = typename Platform::logical_t;
using movemask_result_t =
decltype(folly::movemask<std::uint8_t>(logical_t{}));
using mmask_t = typename movemask_result_t::first_type;
static constexpr std::uint32_t kMmaskBitsPerElement =
typename movemask_result_t::second_type{}();

template <typename Uint>
FOLLY_NODISCARD FOLLY_ALWAYS_INLINE static Uint setLowerNBits(int n) {
if (sizeof(Uint) == 8 && n == 64) {
return static_cast<Uint>(-1);
}
return static_cast<Uint>((std::uint64_t{1} << n) - 1);
}

FOLLY_NODISCARD FOLLY_ALWAYS_INLINE static mmask_t clear(
mmask_t mmask, ignore_extrema ignore) {
mmask_t clearFirst =
~setLowerNBits<mmask_t>(ignore.first * kMmaskBitsPerElement);
mmask_t clearLast = setLowerNBits<mmask_t>(
(Platform::kCardinal - ignore.last) * kMmaskBitsPerElement);
return mmask & clearFirst & clearLast;
}

FOLLY_NODISCARD FOLLY_ALWAYS_INLINE static mmask_t clear(
mmask_t mmask, ignore_none) {
return mmask;
}

// These are aligned loads but there is no point in generating
// aligned load instructions, so we call loadu.
Expand All @@ -122,18 +92,13 @@ struct SimdCharPlatformCommon : Platform {
return Platform::unsafeLoadu(ptr, ignore_none{});
}

FOLLY_ALWAYS_INLINE
static mmask_t movemask(logical_t log) {
return folly::movemask<std::uint8_t>(log).first;
}

using Platform::any;

FOLLY_ALWAYS_INLINE
static bool any(typename Platform::logical_t log, ignore_extrema ignore) {
auto mmask = movemask(log);
mmask = clear(mmask, ignore);
return mmask;
auto mmask = movemask<std::uint8_t>(log);
mmaskClearIgnored<Platform::kCardinal>(mmask, ignore);
return mmask.first;
}

static auto toArray(typename Platform::reg_t x) {
Expand Down Expand Up @@ -186,7 +151,7 @@ struct SimdCharSse2PlatformSpecific {

FOLLY_ALWAYS_INLINE
static bool any(logical_t log, ignore_none) {
return folly::movemask<std::uint8_t>(log).first;
return movemask<std::uint8_t>(log).first;
}
};

Expand Down Expand Up @@ -234,7 +199,7 @@ struct SimdCharAvx2PlatformSpecific {

FOLLY_ALWAYS_INLINE
static bool any(logical_t log, ignore_none) {
return folly::movemask<std::uint8_t>(log).first;
return simd::movemask<std::uint8_t>(log).first;
}
};

Expand Down
18 changes: 1 addition & 17 deletions folly/algorithm/simd/detail/SimdForEach.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <folly/CPortability.h>
#include <folly/Traits.h>
#include <folly/algorithm/simd/detail/Ignore.h>
#include <folly/algorithm/simd/detail/UnrollUtils.h>

#include <array>
Expand All @@ -35,23 +36,6 @@ namespace simd::detail {
// to mess that up.
//

/**
* ignore(_none/_extrema)
*
* Tag types for handling the tails.
* ignore_none indicates that the whole register is used.
* ignore_extrema.first, .last show how many elements are out of the data.
*
* For example 3 elements, starting from the second for an 8 element register
* will be ignore_extrema{.first = 1, .last = 4}
*/
struct ignore_extrema {
int first = 0;
int last = 0;
};

struct ignore_none {};

/**
* simdForEachAligning<unrolling>(cardinal, f, l, delegate);
*
Expand Down
9 changes: 9 additions & 0 deletions folly/algorithm/simd/detail/test/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ cpp_unittest(
],
)

cpp_unittest(
name = "ignore_test",
srcs = ["IgnoreTest.cpp"],
deps = [
"//folly/algorithm/simd/detail:ignore",
"//folly/portability:gtest",
],
)

cpp_unittest(
name = "traits_test",
srcs = ["TraitsTest.cpp"],
Expand Down
45 changes: 45 additions & 0 deletions folly/algorithm/simd/detail/test/IgnoreTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <folly/algorithm/simd/detail/Ignore.h>

#include <cstdint>

#include <folly/portability/GTest.h>

namespace folly::simd::detail {

struct IgnoreTest : ::testing::Test {};

TEST_F(IgnoreTest, MaskClearIgnored) {
auto mmask =
std::pair{std::uint8_t{0xff}, std::integral_constant<std::uint32_t, 2>{}};

// mostly relying on folly::clear_<>_n_bits working correctly
// simd any of also covers a lot of cases.
// this is just the bare minimal smoke test.

mmaskClearIgnored<4>(mmask, ignore_none{});
EXPECT_EQ(0xff, mmask.first);

mmaskClearIgnored<4>(mmask, ignore_extrema{1, 2});
EXPECT_EQ(0b0000'1100, mmask.first);

mmaskClearIgnored<2>(mmask, ignore_extrema{0, 1});
EXPECT_EQ(0b0000'0000, mmask.first);
}

} // namespace folly::simd::detail
2 changes: 1 addition & 1 deletion folly/algorithm/simd/detail/test/SimdAnyOfTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ TEST(SimdAnyOfSimple, Ignore) {
buffer.fill(' ');
for (auto& c : buffer) {
c = 'a';
anySpacesTest({&c, 1}, false);
ASSERT_NO_FATAL_FAILURE(anySpacesTest({&c, 1}, false));
c = ' ';
}
}
Expand Down
4 changes: 2 additions & 2 deletions folly/algorithm/simd/test/MovemaskTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ void allOneTrueTests() {
std::array<T, N> arr;
arr.fill(kFalse);

ASSERT_EQ(0, folly::movemask<T>(loadReg<Reg>(arr)).first);
ASSERT_EQ(0, folly::simd::movemask<T>(loadReg<Reg>(arr)).first);

for (std::size_t i = 0; i != N; ++i) {
arr[i] = kTrue;
auto [bits, bitsPerElement] = folly::movemask<T>(loadReg<Reg>(arr));
auto [bits, bitsPerElement] = folly::simd::movemask<T>(loadReg<Reg>(arr));
std::uint64_t oneElement = safeShift(1, bitsPerElement()) - 1;
std::uint64_t expectedBits = safeShift(oneElement, i * bitsPerElement());

Expand Down
2 changes: 2 additions & 0 deletions folly/detail/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ cpp_library(
exported_deps = [
"//folly:portability",
"//folly:range",
"//folly/algorithm/simd:movemask",
"//folly/algorithm/simd/detail:ignore",
"//folly/algorithm/simd/detail:simd_char_platform",
"//folly/algorithm/simd/detail:simd_for_each",
"//folly/lang:bits",
Expand Down
Loading
Loading