Skip to content

Commit

Permalink
Add RDS support.
Browse files Browse the repository at this point in the history
Merge branch 'release/0.8'
  • Loading branch information
vnmabus committed Jun 7, 2022
2 parents cfa7cb0 + 4ee7516 commit 4faa331
Show file tree
Hide file tree
Showing 11 changed files with 138 additions and 14 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,9 @@ jobs:
pip3 install .
coverage run --source=rdata/ --omit=rdata/tests/ setup.py test;
- name: Generate coverage XML
run: |
coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
uses: codecov/codecov-action@v2
39 changes: 39 additions & 0 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

name: Upload Python Package

on:
release:
types: [published]

permissions:
contents: read

jobs:
deploy:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build package
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
10 changes: 9 additions & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,19 @@ authors:
email: [email protected]
title: "rdata: Read R datasets from Python"
date-released: 2022-03-24
doi: 10.5281/zenodo.6382237
url: "https://github.com/vnmabus/rdata"
license: MIT
keywords:
- rdata
- Python
- R
- parser
- conversion
- conversion
identifiers:
- description: "This is the collection of archived snapshots of all versions of rdata"
type: doi
value: 10.5281/zenodo.6382237
- description: "This is the archived snapshot of version 0.7 of rdata"
type: doi
value: 10.5281/zenodo.6382238
9 changes: 7 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
rdata
=====

|build-status| |docs| |coverage| |landscape| |pypi|
|build-status| |docs| |coverage| |landscape| |pypi| |zenodo|

Read R datasets from Python.

Expand Down Expand Up @@ -130,4 +130,9 @@ Pandas `Categorical` objects:
.. |pypi| image:: https://badge.fury.io/py/rdata.svg
:alt: Pypi version
:scale: 100%
:target: https://pypi.python.org/pypi/rdata/
:target: https://pypi.python.org/pypi/rdata/

.. |zenodo| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.6382237.svg
:alt: Zenodo DOI
:scale: 100%
:target: https://doi.org/10.5281/zenodo.6382237
5 changes: 5 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
# sys.path.insert(0, '/home/carlos/git/rdata/rdata')

import sys

import pkg_resources

try:
release = pkg_resources.get_distribution('rdata').version
except pkg_resources.DistributionNotFound:
Expand Down Expand Up @@ -208,3 +210,6 @@

intersphinx_mapping = {'python': ('https://docs.python.org/3', None),
'pandas': ('http://pandas.pydata.org/pandas-docs/dev', None)}

autodoc_preserve_defaults = True
autodoc_typehints = "description"
2 changes: 1 addition & 1 deletion rdata/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.7
0.8
33 changes: 25 additions & 8 deletions rdata/parser/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from dataclasses import dataclass
from types import MappingProxyType
from typing import (
TYPE_CHECKING,
Any,
BinaryIO,
Callable,
Expand Down Expand Up @@ -722,19 +723,20 @@ def parse_file(
*,
expand_altrep: bool = True,
altrep_constructor_dict: AltRepConstructorMap = DEFAULT_ALTREP_MAP,
extension: str | None = None,
) -> RData:
"""
Parse a R file (.rda or .rdata).
Parameters:
file_or_path (file-like, str, bytes or path-like): File
in the R serialization format.
expand_altrep (bool): Wether to translate ALTREPs to normal objects.
file_or_path: File in the R serialization format.
expand_altrep: Wether to translate ALTREPs to normal objects.
altrep_constructor_dict: Dictionary mapping each ALTREP to
its constructor.
extension: Extension of the file.
Returns:
RData: Data contained in the file (versions and object).
Data contained in the file (versions and object).
See Also:
:func:`parse_data`: Similar function that receives the data directly.
Expand Down Expand Up @@ -802,6 +804,8 @@ def parse_file(
"""
if isinstance(file_or_path, (os.PathLike, str)):
path = pathlib.Path(file_or_path)
if extension is None:
extension = path.suffix
data = path.read_bytes()
else:
# file is a pre-opened file
Expand All @@ -816,6 +820,7 @@ def parse_file(
data,
expand_altrep=expand_altrep,
altrep_constructor_dict=altrep_constructor_dict,
extension=extension,
)


Expand All @@ -824,18 +829,20 @@ def parse_data(
*,
expand_altrep: bool = True,
altrep_constructor_dict: AltRepConstructorMap = DEFAULT_ALTREP_MAP,
extension: str | None = None,
) -> RData:
"""
Parse the data of a R file, received as a sequence of bytes.
Parameters:
data (bytes): Data extracted of a R file.
expand_altrep (bool): Wether to translate ALTREPs to normal objects.
data: Data extracted of a R file.
expand_altrep: Wether to translate ALTREPs to normal objects.
altrep_constructor_dict: Dictionary mapping each ALTREP to
its constructor.
extension: Extension of the file.
Returns:
RData: Data contained in the file (versions and object).
Data contained in the file (versions and object).
See Also:
:func:`parse_file`: Similar function that parses a file directly.
Expand Down Expand Up @@ -911,6 +918,7 @@ def parse_data(
if filetype in {
FileTypes.rdata_binary_v2,
FileTypes.rdata_binary_v3,
None,
} else parse_data
)

Expand All @@ -921,22 +929,31 @@ def parse_data(
elif filetype is FileTypes.xz:
new_data = lzma.decompress(data)
elif filetype in {FileTypes.rdata_binary_v2, FileTypes.rdata_binary_v3}:
if extension == ".rds":
warnings.warn(
f"Wrong extension {extension} for file in RDATA format",
)

view = view[len(magic_dict[filetype]):]
new_data = view
else:
raise NotImplementedError("Unknown file type")
new_data = view
if extension != ".rds":
warnings.warn("Unknown file type: assumed RDS")

return parse_function(
new_data, # type: ignore
expand_altrep=expand_altrep,
altrep_constructor_dict=altrep_constructor_dict,
extension=extension,
)


def parse_rdata_binary(
data: memoryview,
expand_altrep: bool = True,
altrep_constructor_dict: AltRepConstructorMap = DEFAULT_ALTREP_MAP,
extension: str | None = None,
) -> RData:
"""Select the appropiate parser and parse all the info."""
format_type = rdata_format(data)
Expand Down
Binary file added rdata/tests/data/test_dataframe.rds
Binary file not shown.
Binary file added rdata/tests/data/test_dataframe_v3.rds
Binary file not shown.
Binary file added rdata/tests/data/test_full_named_matrix.rds
Binary file not shown.
48 changes: 47 additions & 1 deletion rdata/tests/test_rdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@

import numpy as np
import pandas as pd
import rdata
import xarray

import rdata

TESTDATA_PATH = rdata.TESTDATA_PATH


Expand Down Expand Up @@ -161,6 +162,29 @@ def test_full_named_matrix(self) -> None:
reference,
)

def test_full_named_matrix_rds(self) -> None:
"""Test that a named matrix with dim names can be parsed."""
parsed = rdata.parser.parse_file(
TESTDATA_PATH / "test_full_named_matrix.rds",
)
converted = rdata.conversion.convert(parsed)
reference = xarray.DataArray(
[
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
],
dims=["my_dim_0", "my_dim_1"],
coords={
"my_dim_0": ["dim0_0", "dim0_1"],
"my_dim_1": ["dim1_0", "dim1_1", "dim1_2"],
},
)

xarray.testing.assert_identical(
converted,
reference,
)

def test_list(self) -> None:
"""Test that list can be parsed."""
parsed = rdata.parser.parse_file(TESTDATA_PATH / "test_list.rda")
Expand Down Expand Up @@ -241,6 +265,28 @@ def test_dataframe(self) -> None:
),
)

def test_dataframe_rds(self) -> None:
"""Test dataframe conversion."""
for f in ("test_dataframe.rds", "test_dataframe_v3.rds"):
with self.subTest(file=f):
parsed = rdata.parser.parse_file(
TESTDATA_PATH / f,
)
converted = rdata.conversion.convert(parsed)

pd.testing.assert_frame_equal(
converted,
pd.DataFrame(
{
"class": pd.Categorical(
["a", "b", "b"],
),
"value": [1, 2, 3],
},
index=pd.RangeIndex(start=1, stop=4),
),
)

def test_dataframe_rownames(self) -> None:
"""Test dataframe conversion."""
parsed = rdata.parser.parse_file(
Expand Down

0 comments on commit 4faa331

Please sign in to comment.