Skip to content

Commit

Permalink
Improve dataframe conversion.
Browse files Browse the repository at this point in the history
Merge branch 'release/0.7'
  • Loading branch information
vnmabus committed Mar 24, 2022
2 parents 6641a66 + 0863177 commit cfa7cb0
Show file tree
Hide file tree
Showing 11 changed files with 160 additions and 49 deletions.
18 changes: 18 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
- family-names: "Ramos-Carreño"
given-names: "Carlos"
orcid: "https://orcid.org/0000-0003-2566-7058"
affiliation: "Universidad Autónoma de Madrid"
email: [email protected]
title: "rdata: Read R datasets from Python"
date-released: 2022-03-24
url: "https://github.com/vnmabus/rdata"
license: MIT
keywords:
- rdata
- Python
- R
- parser
- conversion
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
include MANIFEST.in
include VERSION
include rdata/VERSION
include LICENSE
include rdata/py.typed
include *.txt
1 change: 0 additions & 1 deletion VERSION

This file was deleted.

1 change: 1 addition & 0 deletions rdata/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.7
14 changes: 14 additions & 0 deletions rdata/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""rdata: Read R datasets from Python."""
import errno as _errno
import os as _os
import pathlib as _pathlib

Expand All @@ -13,3 +15,15 @@ def _get_test_data_path() -> _pathlib.Path:
Path of the test data.
"""

try:
with open(
_pathlib.Path(_os.path.dirname(__file__)) / 'VERSION',
'r',
) as version_file:
__version__ = version_file.read().strip()
except IOError as e:
if e.errno != _errno.ENOENT:
raise

__version__ = "0.0"
17 changes: 10 additions & 7 deletions rdata/conversion/_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
Any,
Callable,
ChainMap,
Hashable,
List,
Mapping,
MutableMapping,
Expand Down Expand Up @@ -355,12 +354,16 @@ def convert_array(

dimnames = attrs.get('dimnames')
if dimnames:
dimension_names = ["dim_" + str(i) for i, _ in enumerate(dimnames)]
coords: Mapping[Hashable, Any] = {
dimension_names[i]: d
for i, d in enumerate(dimnames)
if d is not None
}
if isinstance(dimnames, Mapping):
dimension_names = list(dimnames.keys())
coords = dimnames
else:
dimension_names = [f"dim_{i}" for i, _ in enumerate(dimnames)]
coords = {
dimension_names[i]: d
for i, d in enumerate(dimnames)
if d is not None
}

value = xarray.DataArray(value, dims=dimension_names, coords=coords)

Expand Down
Binary file added rdata/tests/data/test_full_named_matrix.rda
Binary file not shown.
Binary file added rdata/tests/data/test_half_named_matrix.rda
Binary file not shown.
Binary file added rdata/tests/data/test_named_matrix.rda
Binary file not shown.
70 changes: 69 additions & 1 deletion rdata/tests/test_rdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

import numpy as np
import pandas as pd

import rdata
import xarray

TESTDATA_PATH = rdata.TESTDATA_PATH

Expand Down Expand Up @@ -93,6 +93,74 @@ def test_matrix(self) -> None:
]),
})

def test_named_matrix(self) -> None:
"""Test that a named matrix can be parsed."""
parsed = rdata.parser.parse_file(
TESTDATA_PATH / "test_named_matrix.rda",
)
converted = rdata.conversion.convert(parsed)
reference = xarray.DataArray(
[
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
],
dims=["dim_0", "dim_1"],
coords={
"dim_0": ["dim0_0", "dim0_1"],
"dim_1": ["dim1_0", "dim1_1", "dim1_2"],
},
)

xarray.testing.assert_identical(
converted["test_named_matrix"],
reference,
)

def test_half_named_matrix(self) -> None:
"""Test that a named matrix with no name for a dim can be parsed."""
parsed = rdata.parser.parse_file(
TESTDATA_PATH / "test_half_named_matrix.rda",
)
converted = rdata.conversion.convert(parsed)
reference = xarray.DataArray(
[
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
],
dims=["dim_0", "dim_1"],
coords={
"dim_0": ["dim0_0", "dim0_1"],
},
)

xarray.testing.assert_identical(
converted["test_half_named_matrix"],
reference,
)

def test_full_named_matrix(self) -> None:
"""Test that a named matrix with dim names can be parsed."""
parsed = rdata.parser.parse_file(
TESTDATA_PATH / "test_full_named_matrix.rda",
)
converted = rdata.conversion.convert(parsed)
reference = xarray.DataArray(
[
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
],
dims=["my_dim_0", "my_dim_1"],
coords={
"my_dim_0": ["dim0_0", "dim0_1"],
"my_dim_1": ["dim1_0", "dim1_1", "dim1_2"],
},
)

xarray.testing.assert_identical(
converted["test_full_named_matrix"],
reference,
)

def test_list(self) -> None:
"""Test that list can be parsed."""
parsed = rdata.parser.parse_file(TESTDATA_PATH / "test_list.rda")
Expand Down
86 changes: 47 additions & 39 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
language or its libraries, and thus it is released under a MIT license.
"""
import os
import pathlib
import sys

from setuptools import find_packages, setup
Expand All @@ -16,44 +17,51 @@

DOCLINES = (__doc__ or '').split("\n")

with open(os.path.join(os.path.dirname(__file__),
'VERSION'), 'r') as version_file:
with open(
pathlib.Path(os.path.dirname(__file__)) / 'rdata' / 'VERSION',
'r',
) as version_file:
version = version_file.read().strip()

setup(name='rdata',
version=version,
description=DOCLINES[1],
long_description="\n".join(DOCLINES[3:]),
url='https://github.com/vnmabus/rdata',
author='Carlos Ramos Carreño',
author_email='[email protected]',
include_package_data=True,
platforms=['any'],
license='MIT',
packages=find_packages(),
python_requires='>=3.7, <4',
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Topic :: Scientific/Engineering :: Mathematics',
'Topic :: Software Development :: Libraries :: Python Modules',
'Typing :: Typed',
],
keywords=['rdata', 'r', 'dataset'],
install_requires=['numpy',
'xarray',
'pandas'],
setup_requires=pytest_runner,
tests_require=['pytest-cov',
'numpy>=1.14' # The printing format for numpy changes
],
test_suite='rdata.tests',
zip_safe=False)
setup(
name='rdata',
version=version,
description=DOCLINES[1],
long_description="\n".join(DOCLINES[3:]),
url='https://github.com/vnmabus/rdata',
author='Carlos Ramos Carreño',
author_email='[email protected]',
include_package_data=True,
platforms=['any'],
license='MIT',
packages=find_packages(),
python_requires='>=3.7, <4',
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Topic :: Scientific/Engineering :: Mathematics',
'Topic :: Software Development :: Libraries :: Python Modules',
'Typing :: Typed',
],
keywords=['rdata', 'r', 'dataset'],
install_requires=[
'numpy',
'xarray',
'pandas',
],
setup_requires=pytest_runner,
tests_require=[
'pytest-cov',
'numpy>=1.14', # The printing format for numpy changes
],
test_suite='rdata.tests',
zip_safe=False,
)

0 comments on commit cfa7cb0

Please sign in to comment.