From 58d17688e0dad4fd622b58b785ab861ba051c93d Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 12:08:36 +0100 Subject: [PATCH 01/20] Add a page listing default conversions. --- docs/apilist.rst | 22 ++++++++- docs/conf.py | 3 +- docs/conversions.rst | 85 +++++++++++++++++++++++++++++++++ docs/index.rst | 1 + rdata/conversion/__init__.py | 5 ++ rdata/conversion/_conversion.py | 9 ++-- 6 files changed, 119 insertions(+), 6 deletions(-) create mode 100644 docs/conversions.rst diff --git a/docs/apilist.rst b/docs/apilist.rst index 2ebd3bf..0c020b6 100644 --- a/docs/apilist.rst +++ b/docs/apilist.rst @@ -8,7 +8,7 @@ A complete list of all functions and structures provided by rdata. Parse :code:`.rda` format ^^^^^^^^^^^^^^^^^^^^^^^^^ Functions for parsing data in the :code:`.rda` format. These functions return a structure representing -the contents of the file, without transforming it to more appropiate Python objects. Thus, if a different +the contents of the file, without transforming it to more appropriate Python objects. Thus, if a different way of converting R objects to Python objects is needed, it can be done from this structure. .. autosummary:: @@ -19,7 +19,7 @@ way of converting R objects to Python objects is needed, it can be done from thi Conversion of the R objects ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -These objects and functions convert the parsed R objects to appropiate Python objects. The Python object +These objects and functions convert the parsed R objects to appropriate Python objects. The Python object corresponding to a R object is chosen to preserve most original properties, but it could change in the future, if a more fitting Python object is found. @@ -30,3 +30,21 @@ future, if a more fitting Python object is found. rdata.conversion.SimpleConverter rdata.conversion.convert +Auxiliary structures +^^^^^^^^^^^^^^^^^^^^ +These classes are used to represent R objects which have no clear analog in Python, so that the information +therein can be retrieved. + +.. autosummary:: + :toctree: modules + + rdata.conversion.RBuiltin + rdata.conversion.RBytecode + rdata.conversion.RFunction + rdata.conversion.REnvironment + rdata.conversion.RExpression + rdata.conversion.RExternalPointer + rdata.conversion.RLanguage + rdata.conversion.SrcFile + rdata.conversion.SrcFileCopy + rdata.conversion.SrcRef diff --git a/docs/conf.py b/docs/conf.py index e358ad9..26e4609 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -206,13 +206,14 @@ # -- Options for "sphinx.ext.intersphinx" -- intersphinx_mapping = { + "igraph": ("https://python.igraph.org/en/stable/api", None), "matplotlib": ("https://matplotlib.org/stable", None), "numpy": ("https://numpy.org/doc/stable", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), "python": (f"https://docs.python.org/{sys.version_info.major}", None), "scipy": ("https://docs.scipy.org/doc/scipy", None), "sklearn": ("https://scikit-learn.org/stable", None), - "igraph": ("https://python.igraph.org/en/stable/api", None), + "xarray": ('http://xarray.pydata.org/en/stable/', None), } # -- Options for "sphinx.ext.todo" -- diff --git a/docs/conversions.rst b/docs/conversions.rst new file mode 100644 index 0000000..b513ef0 --- /dev/null +++ b/docs/conversions.rst @@ -0,0 +1,85 @@ +Default conversions +=================== + +This page list the default conversions applied to R objects to convert them to +Python objects. + +Basic types +----------- + +The conversion of basic types is performed directly by the +:class:`~rdata.conversion.Converter` used. +Thus, changing the conversion for basic types currently requires creating a +custom :class:`~rdata.conversion.Converter` class. +The default :class:`~rdata.conversion.SimpleConverter` realizes the following +conversions: + +================== ================================================================================================ +R object type Python conversion +================== ================================================================================================ +builtin function :class:`rdata.conversion.RBuiltin`. +bytecode :class:`rdata.conversion.RBytecode`. +char (internal) :class:`str` or :class:`bytes` (depending on the encoding flags). +closure :class:`rdata.conversion.RFunction`. +complex :class:`numpy.ndarray` with 128-bits complex dtype. + + :class:`numpy.ma.MaskedArray` with 128-bits complex dtype if it contains NA values. + + :class:`xarray.DataArray` if it contains labeled dimensions. +environment :class:`rdata.conversion.REnvironment`. + There are three special cases: the empty, base and global environments, which are + all empty by default. The base and global environments may be supplied to the + converter. +expression :class:`rdata.conversion.RExpression`. +external pointer :class:`rdata.conversion.RExternalPointer`. +integer :class:`numpy.ndarray` with 32-bits integer dtype. + + :class:`numpy.ma.MaskedArray` with 32-bits integer dtype if it contains NA values. + + :class:`xarray.DataArray` if it contains labeled dimensions. +language :class:`rdata.conversion.RLanguage`. +list :class:`list` (if untagged). + + :class:`dict` (if tagged). Empty lists are considered tagged. +logical (boolean) :class:`numpy.ndarray` with boolean dtype. + + :class:`numpy.ma.MaskedArray` with boolean dtype if it contains NA values. + + :class:`xarray.DataArray` if it contains labeled dimensions. +missing argument :data:`NotImplemented`. +NULL :data:`None`. +real :class:`numpy.ndarray` with 64-bits floating point dtype. + + :class:`numpy.ma.MaskedArray` with 64-bits floating point dtype if it contains NA values. + + :class:`xarray.DataArray` if it contains labeled dimensions. +reference The referenced value, that is, an object already converted. +S4 object :class:`types.SimpleNamespace`. +special function :class:`rdata.conversion.RBuiltin`. +string :class:`numpy.ndarray` with suitable fixed-length string dtype. +symbol :class:`str`. +vector :class:`list` (if untagged). + + :class:`dict` (if tagged). Empty lists are considered tagged. +================== ================================================================================================ + +Custom classes +-------------- + +In addition, objects containing a `"class"` attribute are passed to a "constructor function", if one is available. +A dictionary of constructor functions can be supplied to the converter, where the key of each element corresponds +to the class name. +When the `"class"` attribute contains several class names, these are tried in order. +The default constructor dictionary allows to convert the following R classes: + +================== ================================================================================================ +R class Python conversion +================== ================================================================================================ +data.frame :class:`pandas.DataFrame`. +factor :class:`pandas.Categorical`. +ordered :class:`pandas.Categorical` (with ordered categories). +srcfile :class:`rdata.conversion.SrcFile`. +srcfilecopy :class:`rdata.conversion.SrcFileCopy`. +srcref :class:`rdata.conversion.SrcRef`. +ts :class:`pandas.Series`. +================== ================================================================================================ diff --git a/docs/index.rst b/docs/index.rst index 158ec87..e265821 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -33,6 +33,7 @@ Its main advantages are: apilist auto_examples/index Try online! + conversions contributors The package rdata is developed `on Github `_. diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py index d781fdf..064723c 100644 --- a/rdata/conversion/__init__.py +++ b/rdata/conversion/__init__.py @@ -4,10 +4,15 @@ Converter as Converter, RBuiltin as RBuiltin, RBytecode as RBytecode, + REnvironment as REnvironment, RExpression as RExpression, + RExternalPointer as RExternalPointer, RFunction as RFunction, RLanguage as RLanguage, SimpleConverter as SimpleConverter, + SrcFile as SrcFile, + SrcFileCopy as SrcFileCopy, + SrcRef as SrcRef, convert as convert, convert_array as convert_array, convert_attrs as convert_attrs, diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index 303d4f3..79fa6f6 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -311,7 +311,7 @@ def convert_char( def convert_symbol( r_symbol: parser.RObject, conversion_function: ConversionFunction, -) -> str | bytes: +) -> str: """ Decode a R symbol to a Python string or bytes. @@ -332,7 +332,7 @@ def convert_symbol( """ if r_symbol.info.type is parser.RObjectType.SYM: symbol = conversion_function(r_symbol.value) - assert isinstance(symbol, (str, bytes)) + assert isinstance(symbol, str) return symbol msg = "Must receive a SYM object" @@ -500,6 +500,7 @@ def ts_constructor( @dataclass class SrcRef: + """Reference to a source file location.""" first_line: int first_byte: int last_line: int @@ -520,6 +521,7 @@ def srcref_constructor( @dataclass class SrcFile: + """Source file.""" filename: str file_encoding: str | None string_encoding: str | None @@ -545,13 +547,14 @@ def srcfile_constructor( @dataclass class SrcFileCopy(SrcFile): + """Source file with a copy of its lines.""" lines: Sequence[str] def srcfilecopy_constructor( obj: REnvironment, attrs: Mapping[str, Any], # noqa: ARG001 -) -> SrcFile: +) -> SrcFileCopy: frame = obj.frame assert frame is not None From e3c5d34cc0bd87262fcc4ba9f743ff92b6f2d20c Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 12:23:34 +0100 Subject: [PATCH 02/20] Fix ruff reporting. --- .github/workflows/ruff.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index e8133f2..f2f91eb 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -5,4 +5,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: chartboost/ruff-action@v1 \ No newline at end of file + - uses: chartboost/ruff-action@v1 + with: + args: check --output-format github \ No newline at end of file From ef10690d97aa63a7615ad277acc8134fa990c068 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 12:25:37 +0100 Subject: [PATCH 03/20] Do not execute Rust twice. --- .github/workflows/ruff.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index f2f91eb..cb80de5 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -1,5 +1,5 @@ name: Ruff -on: [push, pull_request] +on: [push] jobs: ruff: runs-on: ubuntu-latest From 825bedbae4648489cd4a98d78b9ab6f3f9fa3bed Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 12:26:58 +0100 Subject: [PATCH 04/20] Fix Ruff error. --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 26e4609..d2c5d7c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -213,7 +213,7 @@ "python": (f"https://docs.python.org/{sys.version_info.major}", None), "scipy": ("https://docs.scipy.org/doc/scipy", None), "sklearn": ("https://scikit-learn.org/stable", None), - "xarray": ('http://xarray.pydata.org/en/stable/', None), + "xarray": ("http://xarray.pydata.org/en/stable/", None), } # -- Options for "sphinx.ext.todo" -- From 28cbdc7cad48c95b12591b62836edcccfd08a5cb Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 13:06:40 +0100 Subject: [PATCH 05/20] Fix Mypy action. --- .github/workflows/mypy.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index ed12d4e..b15a0f1 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -9,11 +9,12 @@ jobs: name: Mypy steps: - uses: actions/checkout@v4 + - name: Install dependencies + run: | + pip install ".[test,typing]" mypy; - uses: tsuyoshicho/action-mypy@v4 with: github_token: ${{ secrets.github_token }} reporter: github-pr-review # The action will output fail if there are mypy errors - level: error - setup_command: pip install ".[test,typing]" mypy - mypy_flags: '' + level: error \ No newline at end of file From 4972edae347c8d3402dd8a7472390d63d49becd6 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 13:10:28 +0100 Subject: [PATCH 06/20] Check Mypy reporting. --- rdata/conversion/_conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index 79fa6f6..b225441 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -311,7 +311,7 @@ def convert_char( def convert_symbol( r_symbol: parser.RObject, conversion_function: ConversionFunction, -) -> str: +) -> int: """ Decode a R symbol to a Python string or bytes. From 85f946540d78719a7f614128304b060731f83795 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 13:50:37 +0100 Subject: [PATCH 07/20] Fix Mypy install in GH actions. --- .github/workflows/mypy.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index b15a0f1..1f02bf7 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -9,9 +9,11 @@ jobs: name: Mypy steps: - uses: actions/checkout@v4 + - name: Install dependencies run: | - pip install ".[test,typing]" mypy; + pip install --upgrade setuptools ".[test,typing]" mypy; + - uses: tsuyoshicho/action-mypy@v4 with: github_token: ${{ secrets.github_token }} From 7a36330f3c6489d8c9a6dd1fc4edb6f519ca3f71 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 13:55:05 +0100 Subject: [PATCH 08/20] Possible fix for Mypy config. --- .github/workflows/mypy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 1f02bf7..211f424 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -12,7 +12,7 @@ jobs: - name: Install dependencies run: | - pip install --upgrade setuptools ".[test,typing]" mypy; + pip3 install ".[test,typing]" mypy; - uses: tsuyoshicho/action-mypy@v4 with: From 92e137bc5ec26f9db6b2d193f8d2fd3525239e59 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 13:59:43 +0100 Subject: [PATCH 09/20] Try other approach. --- .github/workflows/mypy.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 211f424..beba751 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -9,6 +9,11 @@ jobs: name: Mypy steps: - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" - name: Install dependencies run: | From fd7469d08c42f63fd9ceb9badf35c081dea428a6 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 17:11:02 +0100 Subject: [PATCH 10/20] Specify path in Mypy action --- .github/workflows/mypy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index beba751..98c4e5b 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -23,5 +23,6 @@ jobs: with: github_token: ${{ secrets.github_token }} reporter: github-pr-review + execute_command: "mypy ." # The action will output fail if there are mypy errors level: error \ No newline at end of file From 328666fe301c98d1b2d2202804fc2dcb6843ea67 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 17:16:26 +0100 Subject: [PATCH 11/20] Test. --- .github/workflows/mypy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 98c4e5b..aa6fcf3 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -18,6 +18,7 @@ jobs: - name: Install dependencies run: | pip3 install ".[test,typing]" mypy; + pip3 install mypy .; - uses: tsuyoshicho/action-mypy@v4 with: From c412f660c8dc6aba0d1d5b97d94587aa8d5406dc Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 17:16:41 +0100 Subject: [PATCH 12/20] Test2 --- .github/workflows/mypy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index aa6fcf3..09cebd8 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -18,7 +18,7 @@ jobs: - name: Install dependencies run: | pip3 install ".[test,typing]" mypy; - pip3 install mypy .; + mypy .; - uses: tsuyoshicho/action-mypy@v4 with: From 0ce1ff3bd040d24abd09b97049597e923c72cacd Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 17:19:39 +0100 Subject: [PATCH 13/20] Test3 --- .github/workflows/mypy.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 09cebd8..c47c43e 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -18,6 +18,9 @@ jobs: - name: Install dependencies run: | pip3 install ".[test,typing]" mypy; + + - name: Run MyPy + run: | mypy .; - uses: tsuyoshicho/action-mypy@v4 From 76c9b6242d32013a2263cbe5459da5f5852d5fbc Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 17:22:43 +0100 Subject: [PATCH 14/20] Remove build folder. --- .github/workflows/mypy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index c47c43e..e10ff01 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -18,6 +18,7 @@ jobs: - name: Install dependencies run: | pip3 install ".[test,typing]" mypy; + rm -rf build; - name: Run MyPy run: | From db8dd42ba0b198c19cce2e258711cb820dc6786d Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 17:26:32 +0100 Subject: [PATCH 15/20] Try again. --- .github/workflows/mypy.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index e10ff01..d43cf8f 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -19,10 +19,6 @@ jobs: run: | pip3 install ".[test,typing]" mypy; rm -rf build; - - - name: Run MyPy - run: | - mypy .; - uses: tsuyoshicho/action-mypy@v4 with: From 00e6a3b95342e011657daae5e778739ce05b9dcf Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 17:30:39 +0100 Subject: [PATCH 16/20] Fix Matplotlib version for typing. --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 24095b0..83773da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ docs = [ "sphinx-gallery", ] typing = [ + "matplotlib>=3.8", "mypy", "pandas-stubs", ] From 7fc62b8cac40beb05515844e1408846b1a74d242 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 23 Jan 2024 17:34:48 +0100 Subject: [PATCH 17/20] Hopefully fix Mypy --- .github/workflows/mypy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index d43cf8f..b7069be 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -24,6 +24,7 @@ jobs: with: github_token: ${{ secrets.github_token }} reporter: github-pr-review + install_types: false execute_command: "mypy ." # The action will output fail if there are mypy errors level: error \ No newline at end of file From 5688ca0109aa913c9db1c5893dd45ec14fb02ca2 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 24 Jan 2024 09:48:30 +0100 Subject: [PATCH 18/20] Try to fix again. --- .github/workflows/mypy.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index b7069be..16b7306 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -18,13 +18,12 @@ jobs: - name: Install dependencies run: | pip3 install ".[test,typing]" mypy; - rm -rf build; + rm -rf build; - uses: tsuyoshicho/action-mypy@v4 with: github_token: ${{ secrets.github_token }} reporter: github-pr-review install_types: false - execute_command: "mypy ." # The action will output fail if there are mypy errors level: error \ No newline at end of file From 0955ec289ec005954e5a402d4a0327d4e9406842 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 24 Jan 2024 10:19:28 +0100 Subject: [PATCH 19/20] Try without filter. --- .github/workflows/mypy.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 16b7306..8e8ea76 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -26,4 +26,5 @@ jobs: reporter: github-pr-review install_types: false # The action will output fail if there are mypy errors - level: error \ No newline at end of file + level: error + filter_mode: nofilter \ No newline at end of file From 40005b9f44feece04c355368df4bec1ee98117c3 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 24 Jan 2024 10:22:19 +0100 Subject: [PATCH 20/20] Fix Mypy error. --- rdata/conversion/_conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index b225441..93992e2 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -311,7 +311,7 @@ def convert_char( def convert_symbol( r_symbol: parser.RObject, conversion_function: ConversionFunction, -) -> int: +) -> str | bytes: """ Decode a R symbol to a Python string or bytes.