diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index d3c3caf810..3cc57642e6 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -19,7 +19,7 @@ defaults: jobs: main: env: - CACHE_NUMBER_COMPILE: 5 + CACHE_NUMBER_COMPILE: 6 MAMBA_ROOT_PREFIX: /dev/shm/mambaroot runs-on: ${{ matrix.os }} strategy: diff --git a/ci/actions/windll/action.yml b/ci/actions/windll/action.yml index 47507f5d2e..13df4e17a1 100644 --- a/ci/actions/windll/action.yml +++ b/ci/actions/windll/action.yml @@ -5,9 +5,10 @@ runs: steps: - shell: bash run: | - ls /c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise/VC/Redist/ - ls /c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise/VC/Redist/MSVC/ - cp /c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise/VC/Redist/MSVC/14.29.30133/x64/Microsoft.VC142.CRT/vcruntime140_1.dll packages/vaex-core/vaex/ + ls /c/Program\ Files/Microsoft\ Visual\ Studio/2022 + ls /c/Program\ Files/Microsoft\ Visual\ Studio/2022/Enterprise/VC/Redist/ + ls /c/Program\ Files/Microsoft\ Visual\ Studio/2022/Enterprise/VC/Redist/MSVC/ + cp /c/Program\ Files/Microsoft\ Visual\ Studio/2022/Enterprise/VC/Redist/MSVC/14.29.30133/x64/Microsoft.VC142.CRT/vcruntime140_1.dll packages/vaex-core/vaex/ mkdir packages/vaex-core/vendor/pcre cd packages/vaex-core/vendor/pcre curl https://anaconda.org/conda-forge/pcre/8.44/download/win-64/pcre-8.44-h6538335_0.tar.bz2 -Lo pcre.tar.bz2 diff --git a/ci/conda-env.yml b/ci/conda-env.yml index 7683e774a7..e55603287c 100644 --- a/ci/conda-env.yml +++ b/ci/conda-env.yml @@ -11,7 +11,7 @@ dependencies: - cachetools - catboost - diskcache -- fsspec +- fsspec<2022.2.0 - gcsfs - geopandas - graphviz diff --git a/packages/vaex-core/vaex/column.py b/packages/vaex-core/vaex/column.py index 418ec61943..d883e63f7f 100644 --- a/packages/vaex-core/vaex/column.py +++ b/packages/vaex-core/vaex/column.py @@ -264,7 +264,10 @@ def _fingerprint(self): @property def nbytes(self): # consistent with arrow - return self.indices.nbytes + if int(pa.__version__.split(".")[0]) >= 7: + return self.indices.nbytes + self.dictionary.nbytes + else: + return self.indices.nbytes @property def dtype(self): diff --git a/packages/vaex-core/vaex/functions.py b/packages/vaex-core/vaex/functions.py index dc87e1a37b..2add3435f8 100644 --- a/packages/vaex-core/vaex/functions.py +++ b/packages/vaex-core/vaex/functions.py @@ -2513,8 +2513,13 @@ def _astype(x, dtype): units = dtype[len('datetime64')+1:-1] else: units = 'ns' - dtype = pa.timestamp(units) - + if vaex.dtype_of(x) == str and units in ['m', 'h', 'D', 'W', 'M', 'Y']: + # it's a slower path, but it works + # using pc.strptime seems to offset by a day orso? + x = np.array(x, dtype=dtype) + return x.astype(dtype) + else: + dtype = pa.timestamp(units) else: # parse dtype if len(dtype) > len('timedelta64'): units = dtype[len('timedelta64')+1:-1] diff --git a/tests/astype_test.py b/tests/astype_test.py index 3fe5a6933d..65bb03bb89 100644 --- a/tests/astype_test.py +++ b/tests/astype_test.py @@ -53,3 +53,15 @@ def test_astype_timedelta(df_factory): df = df_factory(x=x) df['x_expected'] = df.x.astype('timedelta64[s]') assert x_result.tolist() == df.x_expected.tolist() + + +def test_astype_str_to_datetime(df_factory): + x = ['2020-05', '2021-10', '2022-01'] + y = ['2020', '2021', '2022'] + x_validation = np.array(x, dtype='datetime64[M]') + y_validation = np.array(y, dtype='datetime64[Y]') + df = df_factory(x=x, y=y) + df['x_dt'] = df.x.astype('datetime64[M]') + df['y_dt'] = df.y.astype('datetime64[Y]') + assert all(df.x_dt.values == x_validation) + assert all(df.y_dt.values == y_validation)