Skip to content

Commit

Permalink
NumPy 2.0 build support
Browse files Browse the repository at this point in the history
Signed-off-by: Tim Paine <[email protected]>
  • Loading branch information
timkpaine committed Jul 13, 2024
1 parent 323122e commit b19fe80
Show file tree
Hide file tree
Showing 8 changed files with 124 additions and 50 deletions.
14 changes: 0 additions & 14 deletions .github/actions/setup-dependencies/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ runs:

################
# Linux # NOTE: skip for manylinux image
# - name: Linux init steps
# shell: bash
# run: make dependencies-vcpkg
# if: ${{ runner.os == 'Linux' }} # skip

################
# Mac
Expand All @@ -37,16 +33,6 @@ runs:
run: make dependencies-mac
if: ${{ runner.os == 'macOS' }}

# - name: Setup vcpkg cache in shell
# shell: bash
# run: |
# which -a gcc-12
# echo "CC=/usr/local/bin/gcc-12" >> $GITHUB_ENV
# echo "CMAKE_C_COMPILER=/usr/local/bin/gcc-12" >> $GITHUB_ENV
# echo "CXX=/usr/local/bin/g++-12" >> $GITHUB_ENV
# echo "CMAKE_CXX_COMPILER=/usr/local/bin/g++-12" >> $GITHUB_ENV
# if: ${{ runner.os == 'macOS' }}

################
# Windows
- name: Windows init steps (vc143)
Expand Down
93 changes: 79 additions & 14 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,68 @@ jobs:
####################################################
# Test Dependencies/Regressions #
####################################################
test_dependencies:
test_buildtime_dependencies:
needs:
- initialize
strategy:
matrix:
os:
- ubuntu-20.04
python-version:
- 3.9
packages:
- '"numpy>=2" "pandas>=2.2" "pyarrow>=16.1"'

runs-on: ${{ matrix.os }}

steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0

- name: Set up Python ${{ matrix.python-version }}
uses: ./.github/actions/setup-python
with:
version: '${{ matrix.python-version }}'

- name: Set up Caches
uses: ./.github/actions/setup-caches

- name: Install python dependencies
run: make requirements

- name: Install test dependencies
shell: bash
run: sudo apt-get install graphviz

# If we're checking a build-time dependency, install
# the dependency, and then try to build
- name: Install packages - ${{ matrix.packages }} (build time dependency check)
run: python -m pip install -U ${{ matrix.packages }}

- name: Python Wheel Steps - ${{ matrix.packages }} (build time dependency check)
run: make dist-py-cibw
env:
CIBW_BUILD: "cp39-manylinux*"
CIBW_ENVIRONMENT_LINUX: CSP_MANYLINUX="ON" CCACHE_DIR="/host/home/runner/work/csp/csp/.ccache" VCPKG_DEFAULT_BINARY_CACHE="/host${{ env.VCPKG_DEFAULT_BINARY_CACHE }}" VCPKG_DOWNLOADS="/host${{ env.VCPKG_DOWNLOADS }}"
CIBW_BUILD_VERBOSITY: 3

- name: Move Wheel
run: mv dist/*.whl .

- name: Install wheel (build time dependency check)
run: python -m pip install -U *manylinux*.whl --target .

- name: Install packages - ${{ matrix.packages }} (build time dependency check)
run: python -m pip install -U ${{ matrix.packages }}

# Run tests to check dependencies
- name: Python Test Steps (build time dependency check)
run: make test

test_runtime_dependencies:
needs:
- initialize
- build
Expand All @@ -659,10 +720,10 @@ jobs:
- ubuntu-20.04
python-version:
- 3.9
package:
- "sqlalchemy>=2"
- "sqlalchemy<2"
- "numpy==1.19.5"
packages:
- '"sqlalchemy>=2"'
- '"sqlalchemy<2"'
- '"numpy==1.19.5"'

runs-on: ${{ matrix.os }}

Expand All @@ -671,6 +732,7 @@ jobs:
uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0

- name: Set up Python ${{ matrix.python-version }}
uses: ./.github/actions/setup-python
Expand All @@ -684,24 +746,26 @@ jobs:
shell: bash
run: sudo apt-get install graphviz

- name: Download wheel
- name: Download wheel (run time dependency check)
uses: actions/download-artifact@v4
with:
name: csp-dist-${{ runner.os }}-${{ runner.arch }}-${{ matrix.python-version }}

- name: Install wheel
- name: Install wheel (run time dependency check)
run: python -m pip install -U *manylinux*.whl --target .

- name: Install package - ${{ matrix.package }}
run: python -m pip install -U "${{ matrix.package }}"
- name: Install packages - ${{ matrix.packages }} (run time dependency check)
run: python -m pip install -U ${{ matrix.packages }}

- name: Python Test Steps
# Run tests to check dependencies
- name: Python Test Steps (run time dependency check)
run: make test TEST_ARGS="-k TestDBReader"
if: ${{ contains( 'sqlalchemy', matrix.package )}}
if: ${{ contains( matrix.packages, 'sqlalchemy' )}}

- name: Python Test Steps
# For e.g. numpy dep changes, run all tests
- name: Python Test Steps (run time dependency check)
run: make test
if: ${{ contains( 'numpy', matrix.package )}}
if: ${{ contains( matrix.packages, 'numpy' )}}

###########################################################################################################
#.........................................................................................................#
Expand Down Expand Up @@ -750,7 +814,8 @@ jobs:
- build
- test
- test_sdist
- test_dependencies
- test_buildtime_dependencies
- test_runtime_dependencies

if: startsWith(github.ref, 'refs/tags/v')
runs-on: ubuntu-22.04
Expand Down
16 changes: 16 additions & 0 deletions cpp/csp/python/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,22 @@

#define INIT_PYDATETIME if( !PyDateTimeAPI ) { PyDateTime_IMPORT; }

// NumPy 2.0 Migration
#include <numpy/numpyconfig.h>

#if NPY_ABI_VERSION >= 0x02000000
// Define helper for anything that can't
// be handled by the below helper macros
#define CSP_NUMPY_2

#else

// Numpy 2.0 helpers
#define PyDataType_ELSIZE( descr ) ( ( descr ) -> elsize )
#define PyDataType_C_METADATA( descr ) ( ( descr ) -> c_metadata )

#endif

namespace csp::python
{

Expand Down
11 changes: 9 additions & 2 deletions cpp/csp/python/NumpyConversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@


#include <csp/core/Time.h>
#include <csp/python/Common.h>
#include <csp/python/NumpyConversions.h>

#include <locale>
Expand Down Expand Up @@ -59,7 +60,7 @@ int64_t scalingFromNumpyDtUnit( NPY_DATETIMEUNIT base )

NPY_DATETIMEUNIT datetimeUnitFromDescr( PyArray_Descr* descr )
{
PyArray_DatetimeDTypeMetaData* dtypeMeta = (PyArray_DatetimeDTypeMetaData*)(descr -> c_metadata);
PyArray_DatetimeDTypeMetaData* dtypeMeta = (PyArray_DatetimeDTypeMetaData*)( PyDataType_C_METADATA( descr ) );
PyArray_DatetimeMetaData* dtMeta = &(dtypeMeta -> meta);
return dtMeta -> base;
}
Expand All @@ -68,7 +69,7 @@ static std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> wstr_converte

void stringFromNumpyStr( void* data, std::string& out, char numpy_type, int elem_size_bytes )
{
// strings from numpy arrays are fixed width and zero filled.
// strings from numpy arrays are fixed width and zero filled.
// if the last char is 0, can treat as null terminated, else use full width

if( numpy_type == NPY_UNICODELTR)
Expand All @@ -87,7 +88,11 @@ void stringFromNumpyStr( void* data, std::string& out, char numpy_type, int elem
out = wstr_converter.to_bytes( wstr );
}
}
#ifdef CSP_NUMPY_2
else if( numpy_type == NPY_STRINGLTR )
#else
else if( numpy_type == NPY_STRINGLTR || numpy_type == NPY_STRINGLTR2 )
#endif
{
const char * const raw_value = (const char *) data;

Expand Down Expand Up @@ -144,7 +149,9 @@ void validateNumpyTypeVsCspType( const CspTypePtr & type, char numpy_type_char )
// everything works as object
break;
case NPY_STRINGLTR:
#ifndef CSP_NUMPY_2
case NPY_STRINGLTR2:
#endif
case NPY_UNICODELTR:
case NPY_CHARLTR:
if( cspType != csp::CspType::Type::STRING )
Expand Down
4 changes: 2 additions & 2 deletions cpp/csp/python/NumpyConversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ inline PyObject * createNumpyArray( ValueType valueType, const csp::TimeSeriesPr
T lastValue;
if( ts -> valid() )
lastValue = ts -> lastValueTyped<T>();

DateTime lastTime = ( ts -> valid() ? ts -> lastTime() : DateTime() );
switch( valueType )
{
Expand All @@ -219,7 +219,7 @@ inline PyObject * createNumpyArray( ValueType valueType, const csp::TimeSeriesPr
case ValueType::TIMESTAMP_VALUE_TUPLE:
{
PyObject * tuple = PyTuple_New( 2 );
PyTuple_SET_ITEM( tuple, 0, adjustStartAndEndTime( as_nparray( ts, ts -> timeline(), lastTime, startIndex,
PyTuple_SET_ITEM( tuple, 0, adjustStartAndEndTime( as_nparray( ts, ts -> timeline(), lastTime, startIndex,
endIndex, extrapolateEnd ), startPolicy, endPolicy, startDt, endDt ) );
PyTuple_SET_ITEM( tuple, 1, as_nparray( ts, ts -> dataline<T>(), lastValue, startIndex, endIndex, extrapolateEnd ) );
return tuple;
Expand Down
18 changes: 9 additions & 9 deletions cpp/csp/python/NumpyInputAdapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,18 @@ class NumpyCurveAccessor
m_descr = nullptr;
}

NumpyCurveAccessor( PyArrayObject * arr )
NumpyCurveAccessor( PyArrayObject * arr )
{
m_nd = PyArray_NDIM( arr );
if( m_nd < 2 )
CSP_THROW( csp::TypeError, "NumpyCurveAccessor is inefficient for a 1-D Numpy array: use PyArray_GETPTR1 to access indexed values" );

// Preprocess strides and dimensions
npy_intp* strides = PyArray_STRIDES( arr );
npy_intp* dims = PyArray_DIMS( arr );
m_outerStride = strides[0];
m_outerDim = dims[0];
m_innerStrides = strides + 1;
m_innerStrides = strides + 1;
m_innerDims = dims + 1;

m_arr = arr;
Expand All @@ -58,7 +58,7 @@ class NumpyCurveAccessor
{
if( index >= m_outerDim )
CSP_THROW( csp::TypeError, "Requested data index out of range in NumpyCurveAccessor" );

// Create a view to the (n-1) dimensional array with (n-1) potentially unnatural strides
/*
A note on reference counting for the subarray: NewFromDescr will *steal* a reference to the type descr,
Expand Down Expand Up @@ -87,7 +87,7 @@ class NumpyCurveAccessor
private:
char* m_data;
int m_nd;

npy_intp m_outerStride;
npy_intp m_outerDim;
npy_intp* m_innerStrides;
Expand All @@ -103,7 +103,7 @@ class NumpyInputAdapter : public PullInputAdapter<T>
using PyArrayObjectPtr = PyPtr<PyArrayObject>;

public:
NumpyInputAdapter( Engine * engine, CspTypePtr & type, PyArrayObject * datetimes,
NumpyInputAdapter( Engine * engine, CspTypePtr & type, PyArrayObject * datetimes,
PyArrayObject * values ) : PullInputAdapter<T>( engine, type, PushMode::LAST_VALUE ),
m_datetimes( PyArrayObjectPtr::incref( datetimes ) ),
m_values( PyArrayObjectPtr::incref( values ) ),
Expand All @@ -113,7 +113,7 @@ class NumpyInputAdapter : public PullInputAdapter<T>
PyArray_Descr* vals_descr = PyArray_DESCR(m_values.ptr());

m_size = static_cast<int>(PyArray_SIZE( datetimes ));
m_elem_size = vals_descr -> elsize;
m_elem_size = PyDataType_ELSIZE(vals_descr);
m_val_type = vals_descr -> type;

char out_type = m_val_type;
Expand All @@ -123,7 +123,7 @@ class NumpyInputAdapter : public PullInputAdapter<T>
m_valueAccessor = std::make_unique<NumpyCurveAccessor>( m_values.ptr() );
}
validateNumpyTypeVsCspType( type, out_type );


auto dt_type = dts_descr -> type;
if( dt_type != NPY_DATETIMELTR && dt_type != NPY_OBJECTLTR )
Expand Down Expand Up @@ -166,7 +166,7 @@ class NumpyInputAdapter : public PullInputAdapter<T>

++m_index;
}

PullInputAdapter<T>::start( start, end );
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/csp/python/adapters/parquetadapterimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ class NumpyUnicodeArrayWriter : public TypedDialectGenericListWriterInterface<st
PyObject_Repr( ( PyObject * ) PyArray_DESCR( arrayObject ) ) ) );
}

auto elementSize = PyArray_DESCR( arrayObject ) -> elsize;
auto elementSize = PyDataType_ELSIZE( PyArray_DESCR( arrayObject ) );
auto ndim = PyArray_NDIM( arrayObject );

CSP_TRUE_OR_THROW_RUNTIME( ndim == 1, "While writing to parquet expected numpy array with 1 dimension" << " got " << ndim );
Expand Down Expand Up @@ -451,7 +451,7 @@ class NumpyUnicodeReaderImpl final : public TypedDialectGenericListReaderInterfa
{
auto arrayObject = reinterpret_cast<PyArrayObject *>(csp::python::toPythonBorrowed( list ));
std::wstring_convert<std::codecvt_utf8<char32_t>,char32_t> converter;
auto elementSize = PyArray_DESCR( arrayObject ) -> elsize;
auto elementSize = PyDataType_ELSIZE( PyArray_DESCR( arrayObject ) );
auto wideValue = converter.from_bytes( value );
auto nElementsToCopy = std::min( int(elementSize / sizeof(char32_t)), int( wideValue.size() + 1 ) );
std::copy_n( wideValue.c_str(), nElementsToCopy, reinterpret_cast<char32_t*>(PyArray_GETPTR1( arrayObject, index )) );
Expand Down
Loading

0 comments on commit b19fe80

Please sign in to comment.