Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(typing): Fully annotate api.py #3508

Merged
merged 23 commits into from
Aug 2, 2024
Merged

Conversation

dangotbanned
Copy link
Member

@dangotbanned dangotbanned commented Jul 29, 2024

This could later be extended to other modules, but for now api is complete.

Follows work in #3480


Main motivation for this was trying to understand all of the private functions.

There seems to be an assumption that ChartType's can be combined with dict's early on (_check_if_valid_subspec, _check_if_can_be_layered), but later code assumes all derive SchemaBase.

Minimal Repro

Test code block
import altair as alt
import pytest
import pandas as pd

@pytest.mark.parametrize("concat_func", [alt.concat, alt.hconcat, alt.vconcat])
def test_compound_dict(concat_func):
    data = pd.DataFrame({"x": range(3), "y": list("abc")})
    base = alt.Chart(data).mark_point().encode(x="x", y="y")
    mapping = base.to_dict()

    without_top_level = {
        k: v for k, v in mapping.items() if k not in alt.TOPLEVEL_ONLY_KEYS
    }

    compound_1 = concat_func(base, base)
    with pytest.raises(
        ValueError, match=r"Objects with .+ attribute cannot be used within.+Concat"
    ):
        compound_2 = concat_func(base, mapping)

    compound_3 = concat_func(base, without_top_level)
Test Failures
============================= test session starts =============================
platform win32 -- Python 3.12.3, pytest-8.3.2, pluggy-1.5.0
configfile: pyproject.toml
plugins: anyio-4.4.0, cov-5.0.0, xdist-3.6.1
created: 3/3 workers
3 workers [3 items]

FFF                                                                      [100%]
================================== FAILURES ===================================
_________________________ test_compound_dict[hconcat] _________________________

concat_func = <function hconcat at 0x00000114F93D96C0>

    @pytest.mark.parametrize("concat_func", [alt.concat, alt.hconcat, alt.vconcat])
    def test_compound_dict(concat_func):
        data = pd.DataFrame({"x": range(3), "y": list("abc")})
        base = alt.Chart(data).mark_point().encode(x="x", y="y")
        mapping = base.to_dict()
    
        without_top_level = {
            k: v for k, v in mapping.items() if k not in alt.TOPLEVEL_ONLY_KEYS
        }
    
        compound_1 = concat_func(base, base)
        with pytest.raises(
            ValueError, match=r"Objects with .+ attribute cannot be used within.+Concat"
        ):
            compound_2 = concat_func(base, mapping)
    
>       compound_3 = concat_func(base, without_top_level)

tests\vegalite\v5\test_api.py:1197: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
altair\vegalite\v5\api.py:4159: in hconcat
    return HConcatChart(hconcat=charts, **kwargs)  # pyright: ignore
altair\vegalite\v5\api.py:4074: in __init__
    self.data, self.hconcat = _combine_subchart_data(self.data, self.hconcat)
altair\vegalite\v5\api.py:4500: in _combine_subchart_data
    if subdata is not Undefined and all(c.data is subdata for c in subcharts):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

.0 = <list_iterator object at 0x00000114FBA5E770>

>   if subdata is not Undefined and all(c.data is subdata for c in subcharts):
E   AttributeError: 'dict' object has no attribute 'data'

altair\vegalite\v5\api.py:4500: AttributeError
_________________________ test_compound_dict[concat] __________________________

concat_func = <function concat at 0x000001ADE78BD080>

    @pytest.mark.parametrize("concat_func", [alt.concat, alt.hconcat, alt.vconcat])
    def test_compound_dict(concat_func):
        data = pd.DataFrame({"x": range(3), "y": list("abc")})
        base = alt.Chart(data).mark_point().encode(x="x", y="y")
        mapping = base.to_dict()
    
        without_top_level = {
            k: v for k, v in mapping.items() if k not in alt.TOPLEVEL_ONLY_KEYS
        }
    
        compound_1 = concat_func(base, base)
        with pytest.raises(
            ValueError, match=r"Objects with .+ attribute cannot be used within.+Concat"
        ):
            compound_2 = concat_func(base, mapping)
    
>       compound_3 = concat_func(base, without_top_level)

tests\vegalite\v5\test_api.py:1197: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
altair\vegalite\v5\api.py:4062: in concat
    return ConcatChart(concat=charts, **kwargs)  # pyright: ignore
altair\vegalite\v5\api.py:3977: in __init__
    self.data, self.concat = _combine_subchart_data(self.data, self.concat)
altair\vegalite\v5\api.py:4500: in _combine_subchart_data
    if subdata is not Undefined and all(c.data is subdata for c in subcharts):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

.0 = <list_iterator object at 0x000001ADE9EAA590>

>   if subdata is not Undefined and all(c.data is subdata for c in subcharts):
E   AttributeError: 'dict' object has no attribute 'data'

altair\vegalite\v5\api.py:4500: AttributeError
_________________________ test_compound_dict[vconcat] _________________________

concat_func = <function vconcat at 0x000002B4F53CDD00>

    @pytest.mark.parametrize("concat_func", [alt.concat, alt.hconcat, alt.vconcat])
    def test_compound_dict(concat_func):
        data = pd.DataFrame({"x": range(3), "y": list("abc")})
        base = alt.Chart(data).mark_point().encode(x="x", y="y")
        mapping = base.to_dict()
    
        without_top_level = {
            k: v for k, v in mapping.items() if k not in alt.TOPLEVEL_ONLY_KEYS
        }
    
        compound_1 = concat_func(base, base)
        with pytest.raises(
            ValueError, match=r"Objects with .+ attribute cannot be used within.+Concat"
        ):
            compound_2 = concat_func(base, mapping)
    
>       compound_3 = concat_func(base, without_top_level)

tests\vegalite\v5\test_api.py:1197: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
altair\vegalite\v5\api.py:4258: in vconcat
    return VConcatChart(vconcat=charts, **kwargs)  # pyright: ignore
altair\vegalite\v5\api.py:4171: in __init__
    self.data, self.vconcat = _combine_subchart_data(self.data, self.vconcat)
altair\vegalite\v5\api.py:4500: in _combine_subchart_data
    if subdata is not Undefined and all(c.data is subdata for c in subcharts):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

.0 = <list_iterator object at 0x000002B4F79EA710>

>   if subdata is not Undefined and all(c.data is subdata for c in subcharts):
E   AttributeError: 'dict' object has no attribute 'data'

altair\vegalite\v5\api.py:4500: AttributeError
=========================== short test summary info ===========================
FAILED tests/vegalite/v5/test_api.py::test_compound_dict[hconcat] - AttributeError: 'dict' object has no attribute 'data'
FAILED tests/vegalite/v5/test_api.py::test_compound_dict[concat] - AttributeError: 'dict' object has no attribute 'data'
FAILED tests/vegalite/v5/test_api.py::test_compound_dict[vconcat] - AttributeError: 'dict' object has no attribute 'data'
============================== 3 failed in 3.54s ==============================
Finished running tests!

Summary

Without typing information, finding this the conflict between _combine_subchart_data and the earlier _check_if_valid_subspec, _check_if_can_be_layered is much less clear.

Suggestion

Either all of these functions should expect SchemaBase | dict or all SchemaBase.

Unless I'm missing something, there does not seem to be a way to use a dict to build a compound chart this way.

To highlight all the missing annotations to fix and autofix `None` return
Excluding `*args` on `ChartType` wrappers. They need to be defined in alignment in multiple places, which is more complex
…`dict`

Among these, many locations already assume `str` keys in the implementation - without checking
```
altair\vegalite\v5\api.py:4368: error: Argument 1 of "__iadd__" is incompatible with "__add__" of supertype "TopLevelMixin"; supertype defines the argument type as "Chart | RepeatChart | ConcatChart | HConcatChart | VConcatChart | FacetChart | LayerChart"  [override]         def __iadd__(self, other: LayerChart | Chart) -> Self:                            ^~~~~~~~~~~~~~~~~~~~~~~~~ altair\vegalite\v5\api.py:4368: note: This violates the Liskov substitution principle altair\vegalite\v5\api.py:4368: note: See https://mypy.readthedocs.io/en/stable/common_issues.html#incompatible-overrides altair\vegalite\v5\api.py:4376: error: Argument 1 of "__add__" is incompatible with supertype "TopLevelMixin"; supertype defines the argument type as "Chart | RepeatChart | ConcatChart | HConcatChart | VConcatChart | FacetChart | LayerChart"  [override]         def __add__(self, other: LayerChart | Chart) -> Self:                           ^~~~~~~~~~~~~~~~~~~~~~~~~ altair\vegalite\v5\api.py:4376: note: This violates the Liskov substitution principle altair\vegalite\v5\api.py:4376: note: See https://mypy.readthedocs.io/en/stable/common_issues.html#incompatible-overrides
```
This could later be extended to other modules, but for now `api` is complete.
@dangotbanned dangotbanned marked this pull request as ready for review July 29, 2024 16:42
Copy link
Contributor

@binste binste left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's very satisfying to see api.py finally being fully typed :) Didn't have it in me when doing #3143 to go all the way. Thanks!

Either all of these functions should expect SchemaBase | dict or all SchemaBase.

Unless I'm missing something, there does not seem to be a way to use a dict to build a compound chart this way.

Thanks for uncovering it! Agree with your suggestion, dicts don't make sense at this stage.

@binste binste merged commit a847833 into vega:main Aug 2, 2024
13 checks passed
@dangotbanned
Copy link
Member Author

It's very satisfying to see api.py finally being fully typed :) Didn't have it in me when doing #3143 to go all the way. Thanks!

Honestly the amount you did do is no small feat, I'm super appreciative of all that work.

Thanks for that and the review @binste

@binste
Copy link
Contributor

binste commented Aug 2, 2024

Thanks! ❤️

@dangotbanned dangotbanned deleted the api-more-ann branch August 2, 2024 07:13
@dangotbanned
Copy link
Member Author

Either all of these functions should expect SchemaBase | dict or all SchemaBase.
Unless I'm missing something, there does not seem to be a way to use a dict to build a compound chart this way.

Thanks for uncovering it! Agree with your suggestion, dicts don't make sense at this stage.

@binste would it be worth opening an issue for the Minimal Repro section?

I'd still need to double-check the implications of this, but it should mean these two could be simplified significantly:

def _check_if_valid_subspec(
spec: ConcatType | LayerType | dict[str, Any],
classname: Literal[
"ConcatChart",
"FacetChart",
"HConcatChart",
"LayerChart",
"RepeatChart",
"VConcatChart",
],
) -> None:
"""
Check if the spec is a valid sub-spec.
If it is not, then raise a ValueError
"""
err = (
'Objects with "{0}" attribute cannot be used within {1}. '
"Consider defining the {0} attribute in the {1} object instead."
)
if not isinstance(spec, (core.SchemaBase, dict)):
msg = f"Only chart objects can be used in {classname}."
raise ValueError(msg)
for attr in TOPLEVEL_ONLY_KEYS:
if isinstance(spec, core.SchemaBase):
val = getattr(spec, attr, Undefined)
else:
val = spec.get(attr, Undefined)
if val is not Undefined:
raise ValueError(err.format(attr, classname))

def _check_if_can_be_layered(spec: LayerType | dict[str, Any]) -> None:
"""Check if the spec can be layered."""
def _get(spec: LayerType | dict[str, Any], attr: str) -> Any:
if isinstance(spec, core.SchemaBase):
return spec._get(attr)
else:
return spec.get(attr, Undefined)
def _get_any(spec: LayerType | dict[str, Any], *attrs: str) -> bool:
return any(_get(spec, attr) is not Undefined for attr in attrs)
base_msg = "charts cannot be layered. Instead, layer the charts before"
encoding = _get(spec, "encoding")
if encoding is not Undefined:
for channel in ["row", "column", "facet"]:
if _get(encoding, channel) is not Undefined:
msg = f"Faceted {base_msg} faceting."
raise ValueError(msg)
if isinstance(spec, (Chart, LayerChart)):
return
if not isinstance(spec, (core.SchemaBase, dict)):
msg = "Only chart objects can be layered."
raise ValueError(msg)
if isinstance(spec, FacetChart) or _get(spec, "facet") is not Undefined:
msg = f"Faceted {base_msg} faceting."
raise ValueError(msg)
if isinstance(spec, RepeatChart) or _get(spec, "repeat") is not Undefined:
msg = f"Repeat {base_msg} repeating."
raise ValueError(msg)
_concat = ConcatChart, HConcatChart, VConcatChart
if isinstance(spec, _concat) or _get_any(spec, "concat", "hconcat", "vconcat"):
msg = f"Concatenated {base_msg} concatenating."
raise ValueError(msg)

Since dict[str, Any] will be rejected later, even if these pass

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants