From 7f79f22e4544389baa4e6e22a7f77521cf840784 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 27 Apr 2023 17:27:24 +0100 Subject: [PATCH 1/2] Add `__dataframe_namespace__` Taken over from the array API standard approach --- .../dataframe_api/dataframe_object.py | 26 +++++ spec/purpose_and_scope.md | 110 +++++++++++++++++- 2 files changed, 134 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 29dcf5fa..5ce96d56 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -36,6 +36,32 @@ class DataFrame: **Methods and Attributes** """ + def __dataframe_namespace__( + self: DataFrame, /, *, api_version: Optional[str] = None + ) -> Any: + """ + Returns an object that has all the dataframe API functions on it. + + Parameters + ---------- + api_version: Optional[str] + String representing the version of the dataframe API specification + to be returned, in ``'YYYY.MM'`` form, for example, ``'2023.04'``. + If it is ``None``, it should return the namespace corresponding to + latest version of the dataframe API specification. If the given + version is invalid or not implemented for the given module, an + error should be raised. Default: ``None``. + + Returns + ------- + namespace: Any + An object representing the dataframe API namespace. It should have + every top-level function defined in the specification as an + attribute. It may contain other public names as well, but it is + recommended to only include those names that are part of the + specification. + + """ @classmethod def from_dict(cls, data: Mapping[str, Column]) -> DataFrame: diff --git a/spec/purpose_and_scope.md b/spec/purpose_and_scope.md index 78ef93b9..a08025a6 100644 --- a/spec/purpose_and_scope.md +++ b/spec/purpose_and_scope.md @@ -239,17 +239,123 @@ sugar required for fast analysis of data. ## How to read this document +The API specification itself can be found under {ref}`api-specification`. +For guidance on how to read and understand the type annotations included in +this specification, consult the Python +[documentation](https://docs.python.org/3/library/typing.html). +(how-to-adopt-this-api)= ## How to adopt this API +Most (all) existing dataframe libraries will find something in this API standard +that is incompatible with a current implementation, and that they cannot +change due to backwards compatibility concerns. Therefore we expect that each +of those libraries will want to offer a standard-compliant API in a _new +namespace_. The question then becomes: how does a user access this namespace? +The simplest method is: document the import to use to directly access the +namespace (e.g. `import package_name.dataframe_api`). This has two issues +though: +1. Dataframe-consuming libraries that want to support multiple dataframe + libraries then have to explicitly import each library. +2. It is difficult to _version_ the dataframe API standard implementation (see + {ref}`api-versioning`). -## Definitions +To address both issues, a uniform way must be provided by a conforming +implementation to access the API namespace, namely a [method on the dataframe +object](DataFrame.__dataframe_namespace__): +``` +xp = x.__dataframe_namespace__() +``` +The method must take one keyword, `api_version=None`, to make it possible to +request a specific API version: +``` +xp = x.__dataframe_namespace__(api_version='2023.04') +``` + +The `xp` namespace must contain all functionality specified in +{ref}`api-specification`. The namespace may contain other functionality; however, +including additional functionality is not recommended as doing so may hinder +portability and inter-operation of dataframe libraries within user code. + +### Checking a dataframe object for Compliance + +Dataframe-consuming libraries are likely to want a mechanism for determining +whether a provided dataframe is specification compliant. The recommended +approach to check for compliance is by checking whether a dataframe object has +an `__dataframe_namespace__` attribute, as this is the one distinguishing +feature of a dataframe-compliant object. + +Checking for an `__dataframe_namespace__` attribute can be implemented as a +small utility function similar to the following. + +```python +def is_dataframe_api_obj(x): + return hasattr(x, '__dataframe_namespace__') +``` + + +### Discoverability of conforming implementations + +It may be useful to have a way to discover all packages in a Python +environment which provide a conforming dataframe API implementation, and the +namespace that that implementation resides in. +To assist dataframe-consuming libraries which need to create dataframes originating +from multiple conforming dataframe implementations, or developers who want to perform +for example cross-library testing, libraries may provide an +{pypa}`entry point ` in order to make a dataframe API +namespace discoverable. + +:::{admonition} Optional feature +Given that entry points typically require build system & package installer +specific implementation, this standard chooses to recommend rather than +mandate providing an entry point. +::: + +The following code is an example for how one can discover installed +conforming libraries: + +```python +from importlib.metadata import entry_points + +try: + eps = entry_points()['dataframe_api'] + ep = next(ep for ep in eps if ep.name == 'package_name') +except TypeError: + # The dict interface for entry_points() is deprecated in py3.10, + # supplanted by a new select interface. + ep = entry_points(group='dataframe_api', name='package_name') + +xp = ep.load() +``` + +An entry point must have the following properties: + +- **group**: equal to `dataframe_api`. +- **name**: equal to the package name. +- **object reference**: equal to the dataframe API namespace import path. + + +* * * + +## Conformance + +A conforming implementation of the dataframe API standard must provide and +support all the functions, arguments, data types, syntax, and semantics +described in this specification. + +A conforming implementation of the dataframe API standard may provide +additional values, objects, properties, data types, and functions beyond those +described in this specification. + +Libraries which aim to provide a conforming implementation but haven't yet +completed such an implementation may, and are encouraged to, provide details on +the level of (non-)conformance. For details on how to do this, see +[Verification - measuring conformance](verification_test_suite.md). -## References From 5aab34823e649370563e4a92563e8a915a29720a Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 27 Apr 2023 17:34:36 +0100 Subject: [PATCH 2/2] Add content for the "future API evolution" page Also largely taken over from the array API standard, with changes for the process and the smaller scope. --- spec/API_specification/index.rst | 2 ++ spec/future_API_evolution.md | 52 +++++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst index f41e913d..1a0fbc34 100644 --- a/spec/API_specification/index.rst +++ b/spec/API_specification/index.rst @@ -1,3 +1,5 @@ +.. _api-specification: + API specification ================= diff --git a/spec/future_API_evolution.md b/spec/future_API_evolution.md index 0c538d8a..70754fe4 100644 --- a/spec/future_API_evolution.md +++ b/spec/future_API_evolution.md @@ -1,13 +1,63 @@ +(future-API-evolution)= + # Future API standard evolution ## Scope extensions +Proposals for scope extensions in a future version of the API standard will +be proposed in an issue on the +[data-apis/dataframe-api](https://github.com/data-apis/dataframe-api/issues) +repository, and discussed in public and decided upon. + +```{note} +In the future, once the API standard matures, this may be changed and follow +the more formal process documented at +[data-apis/governance::process_document.md](https://github.com/data-apis/governance/blob/main/process_document.md). +``` ## Backwards compatibility +Functions, objects, keywords and specified behavior are added to this API +standard only if there is a clear need, and functionality is either very +minimally scoped or are already present in multiple existing dataframe +libraries. Therefore it is highly unlikely that future versions of this +standard will make backwards-incompatible changes. +The aim is for future versions to be 100% backwards compatible with older +versions. Any exceptions must have strong rationales and be clearly documented +in the updated API specification and Changelog for a release. -## Versioning +(api-versioning)= + +## Versioning +This API standard uses the following versioning scheme: + +- The version is date-based, in the form `yyyy.mm` (e.g., `2020.12`). +- The version shall not include a standard way to do `alpha`/`beta`/`rc` or + `.post`/`.dev` type versions. + _Rationale: that's for Python packages, not for a standard._ +- The version must be made available at runtime via an attribute + `__dataframe_api_version__` by a compliant implementation, in `'yyyy.mm'` format + as a string, in the namespace that implements the API standard. + _Rationale: dunder version strings are the standard way of doing this._ + +No utilities for dealing with version comparisons need to be provided; given +the format simple string comparisons with Python operators (`=-`, `<`, `>=`, +etc.) will be enough. + +```{note} +Rationale for the `yyyy.mm` versioning scheme choice: +the API will be provided as part of a library, which already has a versioning +scheme (typically PEP 440 compliant and in the form `major.minor.bugfix`), +and a way to access it via `module.__version__`. The API standard version is +completely independent from the package version. Given the standardization +process, it resembles a C/C++ versioning scheme (e.g. `C99`, `C++14`) more +than Python package versioning. +``` + +The frequency of releasing a new version of an API standard will likely be at +regular intervals and on the order of one year, however no assumption on +frequency of new versions appearing must be made.