diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index ed12d4e..8e8ea76 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -9,11 +9,22 @@ jobs: name: Mypy steps: - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + pip3 install ".[test,typing]" mypy; + rm -rf build; + - uses: tsuyoshicho/action-mypy@v4 with: github_token: ${{ secrets.github_token }} reporter: github-pr-review + install_types: false # The action will output fail if there are mypy errors level: error - setup_command: pip install ".[test,typing]" mypy - mypy_flags: '' + filter_mode: nofilter \ No newline at end of file diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index e8133f2..cb80de5 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -1,8 +1,10 @@ name: Ruff -on: [push, pull_request] +on: [push] jobs: ruff: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: chartboost/ruff-action@v1 \ No newline at end of file + - uses: chartboost/ruff-action@v1 + with: + args: check --output-format github \ No newline at end of file diff --git a/docs/apilist.rst b/docs/apilist.rst index 2ebd3bf..0c020b6 100644 --- a/docs/apilist.rst +++ b/docs/apilist.rst @@ -8,7 +8,7 @@ A complete list of all functions and structures provided by rdata. Parse :code:`.rda` format ^^^^^^^^^^^^^^^^^^^^^^^^^ Functions for parsing data in the :code:`.rda` format. These functions return a structure representing -the contents of the file, without transforming it to more appropiate Python objects. Thus, if a different +the contents of the file, without transforming it to more appropriate Python objects. Thus, if a different way of converting R objects to Python objects is needed, it can be done from this structure. .. autosummary:: @@ -19,7 +19,7 @@ way of converting R objects to Python objects is needed, it can be done from thi Conversion of the R objects ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -These objects and functions convert the parsed R objects to appropiate Python objects. The Python object +These objects and functions convert the parsed R objects to appropriate Python objects. The Python object corresponding to a R object is chosen to preserve most original properties, but it could change in the future, if a more fitting Python object is found. @@ -30,3 +30,21 @@ future, if a more fitting Python object is found. rdata.conversion.SimpleConverter rdata.conversion.convert +Auxiliary structures +^^^^^^^^^^^^^^^^^^^^ +These classes are used to represent R objects which have no clear analog in Python, so that the information +therein can be retrieved. + +.. autosummary:: + :toctree: modules + + rdata.conversion.RBuiltin + rdata.conversion.RBytecode + rdata.conversion.RFunction + rdata.conversion.REnvironment + rdata.conversion.RExpression + rdata.conversion.RExternalPointer + rdata.conversion.RLanguage + rdata.conversion.SrcFile + rdata.conversion.SrcFileCopy + rdata.conversion.SrcRef diff --git a/docs/conf.py b/docs/conf.py index e358ad9..d2c5d7c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -206,13 +206,14 @@ # -- Options for "sphinx.ext.intersphinx" -- intersphinx_mapping = { + "igraph": ("https://python.igraph.org/en/stable/api", None), "matplotlib": ("https://matplotlib.org/stable", None), "numpy": ("https://numpy.org/doc/stable", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), "python": (f"https://docs.python.org/{sys.version_info.major}", None), "scipy": ("https://docs.scipy.org/doc/scipy", None), "sklearn": ("https://scikit-learn.org/stable", None), - "igraph": ("https://python.igraph.org/en/stable/api", None), + "xarray": ("http://xarray.pydata.org/en/stable/", None), } # -- Options for "sphinx.ext.todo" -- diff --git a/docs/conversions.rst b/docs/conversions.rst new file mode 100644 index 0000000..b513ef0 --- /dev/null +++ b/docs/conversions.rst @@ -0,0 +1,85 @@ +Default conversions +=================== + +This page list the default conversions applied to R objects to convert them to +Python objects. + +Basic types +----------- + +The conversion of basic types is performed directly by the +:class:`~rdata.conversion.Converter` used. +Thus, changing the conversion for basic types currently requires creating a +custom :class:`~rdata.conversion.Converter` class. +The default :class:`~rdata.conversion.SimpleConverter` realizes the following +conversions: + +================== ================================================================================================ +R object type Python conversion +================== ================================================================================================ +builtin function :class:`rdata.conversion.RBuiltin`. +bytecode :class:`rdata.conversion.RBytecode`. +char (internal) :class:`str` or :class:`bytes` (depending on the encoding flags). +closure :class:`rdata.conversion.RFunction`. +complex :class:`numpy.ndarray` with 128-bits complex dtype. + + :class:`numpy.ma.MaskedArray` with 128-bits complex dtype if it contains NA values. + + :class:`xarray.DataArray` if it contains labeled dimensions. +environment :class:`rdata.conversion.REnvironment`. + There are three special cases: the empty, base and global environments, which are + all empty by default. The base and global environments may be supplied to the + converter. +expression :class:`rdata.conversion.RExpression`. +external pointer :class:`rdata.conversion.RExternalPointer`. +integer :class:`numpy.ndarray` with 32-bits integer dtype. + + :class:`numpy.ma.MaskedArray` with 32-bits integer dtype if it contains NA values. + + :class:`xarray.DataArray` if it contains labeled dimensions. +language :class:`rdata.conversion.RLanguage`. +list :class:`list` (if untagged). + + :class:`dict` (if tagged). Empty lists are considered tagged. +logical (boolean) :class:`numpy.ndarray` with boolean dtype. + + :class:`numpy.ma.MaskedArray` with boolean dtype if it contains NA values. + + :class:`xarray.DataArray` if it contains labeled dimensions. +missing argument :data:`NotImplemented`. +NULL :data:`None`. +real :class:`numpy.ndarray` with 64-bits floating point dtype. + + :class:`numpy.ma.MaskedArray` with 64-bits floating point dtype if it contains NA values. + + :class:`xarray.DataArray` if it contains labeled dimensions. +reference The referenced value, that is, an object already converted. +S4 object :class:`types.SimpleNamespace`. +special function :class:`rdata.conversion.RBuiltin`. +string :class:`numpy.ndarray` with suitable fixed-length string dtype. +symbol :class:`str`. +vector :class:`list` (if untagged). + + :class:`dict` (if tagged). Empty lists are considered tagged. +================== ================================================================================================ + +Custom classes +-------------- + +In addition, objects containing a `"class"` attribute are passed to a "constructor function", if one is available. +A dictionary of constructor functions can be supplied to the converter, where the key of each element corresponds +to the class name. +When the `"class"` attribute contains several class names, these are tried in order. +The default constructor dictionary allows to convert the following R classes: + +================== ================================================================================================ +R class Python conversion +================== ================================================================================================ +data.frame :class:`pandas.DataFrame`. +factor :class:`pandas.Categorical`. +ordered :class:`pandas.Categorical` (with ordered categories). +srcfile :class:`rdata.conversion.SrcFile`. +srcfilecopy :class:`rdata.conversion.SrcFileCopy`. +srcref :class:`rdata.conversion.SrcRef`. +ts :class:`pandas.Series`. +================== ================================================================================================ diff --git a/docs/index.rst b/docs/index.rst index 158ec87..e265821 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -33,6 +33,7 @@ Its main advantages are: apilist auto_examples/index Try online! + conversions contributors The package rdata is developed `on Github `_. diff --git a/pyproject.toml b/pyproject.toml index 24095b0..83773da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ docs = [ "sphinx-gallery", ] typing = [ + "matplotlib>=3.8", "mypy", "pandas-stubs", ] diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py index d781fdf..064723c 100644 --- a/rdata/conversion/__init__.py +++ b/rdata/conversion/__init__.py @@ -4,10 +4,15 @@ Converter as Converter, RBuiltin as RBuiltin, RBytecode as RBytecode, + REnvironment as REnvironment, RExpression as RExpression, + RExternalPointer as RExternalPointer, RFunction as RFunction, RLanguage as RLanguage, SimpleConverter as SimpleConverter, + SrcFile as SrcFile, + SrcFileCopy as SrcFileCopy, + SrcRef as SrcRef, convert as convert, convert_array as convert_array, convert_attrs as convert_attrs, diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index 303d4f3..93992e2 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -332,7 +332,7 @@ def convert_symbol( """ if r_symbol.info.type is parser.RObjectType.SYM: symbol = conversion_function(r_symbol.value) - assert isinstance(symbol, (str, bytes)) + assert isinstance(symbol, str) return symbol msg = "Must receive a SYM object" @@ -500,6 +500,7 @@ def ts_constructor( @dataclass class SrcRef: + """Reference to a source file location.""" first_line: int first_byte: int last_line: int @@ -520,6 +521,7 @@ def srcref_constructor( @dataclass class SrcFile: + """Source file.""" filename: str file_encoding: str | None string_encoding: str | None @@ -545,13 +547,14 @@ def srcfile_constructor( @dataclass class SrcFileCopy(SrcFile): + """Source file with a copy of its lines.""" lines: Sequence[str] def srcfilecopy_constructor( obj: REnvironment, attrs: Mapping[str, Any], # noqa: ARG001 -) -> SrcFile: +) -> SrcFileCopy: frame = obj.frame assert frame is not None