Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

32bit support #179

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 82 additions & 16 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,46 +13,88 @@ defaults:
jobs:
build:
runs-on: ${{ matrix.runs-on }}
name: ${{ matrix.sys.compiler }} / ${{ matrix.build-system }} / ${{ matrix.config.name }} / date-polyfill ${{ matrix.sys.date-polyfill}}
name: ${{ matrix.toolchain.compiler }} / ${{ matrix.build-system.name }} / ${{ matrix.config.name }} / targets ${{ matrix.target-arch.name }} / date-polyfill ${{ matrix.toolchain.date-polyfill}} / Refactoring ${{ matrix.toolchain.build_refactoring }}
strategy:
fail-fast: false
matrix:
runs-on: [windows-latest]
sys:
toolchain:
- { compiler: msvc, date-polyfill: 'ON', build_refactoring: 'OFF' }
- { compiler: msvc, date-polyfill: 'OFF', build_refactoring: 'ON' }
- { compiler: clang, date-polyfill: 'ON', version: 17, build_refactoring: 'OFF'}
- { compiler: clang, date-polyfill: 'OFF', version: 17, build_refactoring: 'ON' }
target-arch:
- {
name: "Win64",
vs-flags: "-A x64", # only used by VS generator
cl-flags: "/arch (x64)",
gnu-flags: "-m64",
msvc-dev-cmd: "win64"
}
- {
name: "Win32",
vs-flags: "-A Win32", # only used by VS generator
cl-flags: "/arch (x86)",
gnu-flags: "-m32",
msvc-dev-cmd: "win32"
}
config:
- { name: Debug }
- { name: Release }
build-system:
- "Visual Studio 17 2022"
- "Ninja"
- { name: "Visual Studio 17 2022" }
- { name: "Ninja" }

steps:

- name: Setup MSVC
if: matrix.sys.compiler == 'msvc' && matrix.build-system != 'Visual Studio 17 2022'
- name: Setup MSVC (only for non-VS buildsystems)
if: matrix.toolchain.compiler == 'msvc' && !startsWith(matrix.build-system.name, 'Visual Studio')
uses: ilammy/msvc-dev-cmd@v1
with:
# Ninja will take cues of which target architecture to use from the (powershell)
# msvc environment so we need to setup everything at this level.
arch: ${{matrix.target-arch.msvc-dev-cmd}}

- name: Install LLVM and Clang
if: matrix.sys.compiler == 'clang'
if: matrix.toolchain.compiler == 'clang'
uses: egor-tensin/setup-clang@v1
with:
version: ${{matrix.sys.version}}
version: ${{matrix.toolchain.version}}
platform: x64

- name: Setup clang
if: matrix.sys.compiler == 'clang'
if: matrix.toolchain.compiler == 'clang'
run: |
echo "CC=clang" >> $GITHUB_ENV
echo "CXX=clang++" >> $GITHUB_ENV

- name: Setup gnu compilers
if: matrix.toolchain.compiler != 'msvc'
run: |
echo "CMAKE_CXX_FLAGS=${{matrix.target-arch.gnu-flags}}" >> $GITHUB_ENV
echo "CMAKE_CXX_LINK_FLAGS=${{matrix.target-arch.gnu-flags}}" >> $GITHUB_ENV

- name: Setup Ninja + msvc
if: matrix.toolchain.compiler == 'msvc' && matrix.build-system.name == 'Ninja'
run: |
echo "CMAKE_CXX_FLAGS=${{matrix.target-arch.cl-flags}}" >> $GITHUB_ENV
echo "CMAKE_CXX_LINK_FLAGS=${{matrix.target-arch.cl-flags}}" >> $GITHUB_ENV

- name: Setup Visual Studio
if: startsWith(matrix.build-system.name, 'Visual Studio')
run: |
echo "GENERATOR_EXTRA_FLAGS=${{matrix.target-arch.vs-flags}}" >> $GITHUB_ENV

- name: Checkout code
uses: actions/checkout@v4

- name: Setup vcpkg environment
if: matrix.target-arch.name == 'Win32'
run: |
vcpkg install --triplet=x86-windows

- name: Set conda environment
if: matrix.target-arch.name == 'Win64'
uses: mamba-org/setup-micromamba@main
with:
environment-name: myenv
Expand All @@ -62,11 +104,35 @@ jobs:
create-args: |
ninja

- name: Set dependencies install prefix dir for 64bit
if: matrix.target-arch.name == 'Win64'
run: |
echo "SPARROW_DEPS_PREFIX=$CONDA_PREFIX" >> $GITHUB_ENV
echo "SPARROW_INSTALL_PREFIX=$CONDA_PREFIX" >> $GITHUB_ENV

- name: Set dependencies install prefix dir for 32bit
if: matrix.target-arch.name == 'Win32'
run: |
echo "SPARROW_DEPS_PREFIX=$VCPKG_INSTALLED_DIR" >> $GITHUB_ENV
echo "SPARROW_INSTALL_PREFIX=$VCPKG_INSTALLED_DIR" >> $GITHUB_ENV
echo "VCPKG_TARGET_TRIPLET='x86-windows'" >> $GITHUB_ENV
echo "CMAKE_TOOLCHAIN_FILE=$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake" >> $GITHUB_ENV

- name: Enable runtime size/length/offset validity checks
if: matrix.config.name == 'Debug'
run: |
echo "SPARROW_CHECKS='$SPARROW_CHECKS -DSPAROW_ENABLE_SIZE_CHECKS=ON'" >> $GITHUB_ENV

- name: Enable runtime 32bit size/length/offset limit
if: matrix.target-arch.name == 'Win32' # Note: this is not a mandatory limitation in any context, we add this just to have at least one configuration testing that feature
run: |
echo "SPARROW_CHECKS='$SPARROW_CHECKS -DSPAROW_ENABLE_32BIT_SIZE_LIMIT=ON'" >> $GITHUB_ENV

- name: Configure using CMake
run: cmake -Bbuild -DCMAKE_BUILD_TYPE:STRING=${{matrix.config.name}} -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DBUILD_TESTS=ON -DBUILD_EXAMPLES=ON -DUSE_DATE_POLYFILL=${{matrix.sys.date-polyfill}} -DBUILD_REFACTORING=${{matrix.sys.build_refactoring}} -G "${{matrix.build-system}}"
run: cmake -S ./ -B ./build -DCMAKE_BUILD_TYPE:STRING=${{matrix.config.name}} -DCMAKE_PREFIX_PATH=$SPARROW_DEPS_PREFIX -DCMAKE_INSTALL_PREFIX=$SPARROW_INSTALL_PREFIX -DBUILD_TESTS=ON -DBUILD_EXAMPLES=ON -DUSE_DATE_POLYFILL=${{matrix.toolchain.date-polyfill}} -DBUILD_REFACTORING=${{matrix.toolchain.build_refactoring}} -G "${{matrix.build-system.name}}" $GENERATOR_EXTRA_FLAGS $SPARROW_CHECKS

- name: Build
if: matrix.sys.build_refactoring == 'ON'
if: matrix.toolchain.build_refactoring == 'ON'
working-directory: build
run: cmake --build . --config ${{matrix.config.name}} --target sparrow --parallel 8

Expand All @@ -86,24 +152,24 @@ jobs:
uses: actions/upload-artifact@v4
if: success() || failure()
with:
name: test_sparrow_lib_report_Windows_${{ matrix.sys.compiler }}_${{ matrix.build-system }}_${{ matrix.config.name }}_date-polyfill_${{ matrix.sys.date-polyfill}}
name: test_sparrow_lib_report_Windows_${{ matrix.toolchain.compiler }}_${{ matrix.build-system.name }}_${{ matrix.config.name }}_${{ matrix.target-arch.name }}_date-polyfill_${{ matrix.toolchain.date-polyfill}}
path: '**/test_sparrow_lib_report.xml'

- name: Build refactoring
if: matrix.sys.build_refactoring == 'ON'
if: matrix.toolchain.build_refactoring == 'ON'
working-directory: build
run: cmake --build . --config ${{matrix.config.name}} --target test_sparrow_lib_v01 --parallel 8

- name: Run refactoring tests
if: matrix.sys.build_refactoring == 'ON'
if: matrix.toolchain.build_refactoring == 'ON'
working-directory: build
run: cmake --build . --config ${{matrix.config.name}} --target run_tests_with_junit_report_v01

- name: Upload refactoring test results
uses: actions/upload-artifact@v4
if: matrix.sys.build_refactoring == 'ON' && (success() || failure())
if: matrix.toolchain.build_refactoring == 'ON' && (success() || failure())
with:
name: test_sparrow_lib_report_Windows_v01_${{ matrix.sys.compiler }}_${{ matrix.build-system }}_${{ matrix.config.name }}_date-polyfill_${{ matrix.sys.date-polyfill}}
name: test_sparrow_lib_report_Windows_v01_${{ matrix.toolchain.compiler }}_${{ matrix.target-arch.name }}_${{ matrix.build-system.name }}_${{ matrix.config.name }}_date-polyfill_${{ matrix.toolchain.date-polyfill}}_refactoring_${{ matrix.toolchain.build_refactoring }}
path: '**/test_sparrow_lib_report_v01.xml'

- name: Run all examples
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,6 @@
# Clangd cache
.cache
CMakeUserPresets.json

# Vcpkg packages
/vcpkg_installed/
13 changes: 13 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ OPTION(BUILD_DOCS "Build sparrow documentation" OFF)
OPTION(BUILD_EXAMPLES "Build sparrow examples" OFF)
OPTION(USE_DATE_POLYFILL "Use date polyfill implementation" ON)
OPTION(BUILD_REFACTORING "Build the refactoring target" OFF)
OPTION(SPAROW_ENABLE_SIZE_CHECKS "Enables runtime checks that values of sizes/lengths/offsets from the various C++ types are always in range of the supported Arrow lengths" OFF)
OPTION(SPAROW_ENABLE_32BIT_SIZE_LIMIT "Once enabled, all the sizes are assumed to be in the range of 32bit representation. Can be checked at runtime with `SPAROW_ENABLE_SIZE_CHECKS=ON`" OFF)

include(CheckCXXSymbolExists)

Expand All @@ -67,6 +69,7 @@ if(LIBCPP)
add_compile_definitions(_LIBCPP_DISABLE_AVAILABILITY)
endif()


# Linter options
# =============

Expand Down Expand Up @@ -208,6 +211,16 @@ if (BUILD_TESTS)
endif ()
endif ()

if (SPAROW_ENABLE_SIZE_CHECKS)
target_compile_definitions(sparrow INTERFACE SPAROW_ENABLE_SIZE_CHECKS 1)
message("-> sparrow config: size limit runtime checks enabled (SPAROW_ENABLE_SIZE_CHECKS=ON): sizes will be checked at runtime to be between zero and max(in64_t) (or max(int32_t if SPAROW_ENABLE_32BIT_SIZE_LIMIT=ON)")
endif()

if (SPAROW_ENABLE_32BIT_SIZE_LIMIT)
target_compile_definitions(sparrow INTERFACE SPAROW_ENABLE_32BIT_SIZE_LIMIT)
message("-> sparrow config: 32bit size limit enabled (SPAROW_ENABLE_32BIT_SIZE_LIMIT=ON): valid size values are restricted to range zero to max(int32_t)")
endif()

# Docs
# ====

Expand Down
24 changes: 12 additions & 12 deletions include/sparrow/array/array_data_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ namespace sparrow
{
return {
.type = data_descriptor(arrow_type_id<null_type>()),
.length = static_cast<std::int64_t>(size),
.length = to_arrow_length(size),
.offset = 0,
.bitmap = {},
.buffers = {},
Expand Down Expand Up @@ -200,7 +200,7 @@ namespace sparrow

return {
.type = data_descriptor(arrow_type_id<U>()),
.length = static_cast<int64_t>(size),
.length = to_arrow_length(size),
.offset = offset,
.bitmap = detail::make_array_data_bitmap(std::forward<BitmapRange>(bitmap)),
.buffers = std::move(buffers),
Expand Down Expand Up @@ -281,7 +281,7 @@ namespace sparrow

std::vector<array_data::buffer_type> buffers(2);
buffers[0].resize(sizeof(std::int64_t) * (values_size + 1), 0);
size_t acc_size = 0;
size_t acc_size = 0;
auto bitmap_iter = bitmap.begin();
auto value_iter = values.begin();
for(std::size_t i = 0;i < values_size; ++i, ++bitmap_iter, ++value_iter)
Expand All @@ -296,8 +296,8 @@ namespace sparrow
auto iter = buffers[1].begin();
const auto offsets = buffers[0].data<std::int64_t>();
offsets[0] = 0;


value_iter = values.begin();
bitmap_iter = bitmap.begin();
for(std::size_t i = 0; i < values_size; ++i, ++bitmap_iter, ++value_iter)
Expand All @@ -308,21 +308,21 @@ namespace sparrow
SPARROW_ASSERT_TRUE(
std::cmp_less(unwraped_value.size(), std::numeric_limits<std::int64_t>::max())
);
offsets[i + 1] = offsets[i] + static_cast<std::int64_t>(unwraped_value.size());
offsets[i + 1] = offsets[i] + to_arrow_length(unwraped_value.size());
std::ranges::copy(unwraped_value, iter);
std::advance(iter, unwraped_value.size());
}
else
{
offsets[i + 1] = offsets[i];
}
}
}

using T = std::unwrap_ref_decay_t<std::unwrap_ref_decay_t<std::ranges::range_value_t<ValueRange>>>;
using U = get_corresponding_arrow_type_t<T>;
return {
.type = data_descriptor(arrow_type_id<U>()),
.length = static_cast<array_data::length_type>(values.size()),
.length = to_arrow_length(values.size()),
.offset = offset,
.bitmap = detail::make_array_data_bitmap(std::forward<BitmapRange>(bitmap)),
.buffers = std::move(buffers),
Expand Down Expand Up @@ -460,7 +460,7 @@ namespace sparrow
};
return {
.type = data_descriptor(arrow_type_id<std::uint64_t>()),
.length = static_cast<array_data::length_type>(indexes.size()),
.length = to_arrow_length(indexes.size()),
.offset = offset,
.bitmap = detail::make_array_data_bitmap(std::forward<BitmapRange>(bitmap)),
.buffers = {create_buffer()},
Expand Down Expand Up @@ -619,16 +619,16 @@ namespace sparrow

/**
* \brief Creates a default array_data object with the specified layout and values.
*
*
* @tparam Layout The layout of the array_data object.
* @tparam T The type of the value to be repeated.
*
*
* @param n The number of times the value should be repeated.
* @param value The value to be repeated.
*/
template <arrow_layout Layout, class T>
requires is_arrow_base_type_extended<std::decay_t<T>>
array_data make_default_array_data(
array_data make_default_array_data(
typename Layout::size_type n
, T && value)
{
Expand Down
Loading