diff --git a/src/pyosmeta/models/base.py b/src/pyosmeta/models/base.py index e3fc63d..6450d09 100644 --- a/src/pyosmeta/models/base.py +++ b/src/pyosmeta/models/base.py @@ -5,6 +5,7 @@ import re from datetime import datetime +from enum import Enum from typing import Any, Optional, Set, Union import requests @@ -20,6 +21,11 @@ from pyosmeta.utils_clean import clean_date, clean_markdown +class Partnerships(str, Enum): + astropy = "astropy" + pangeo = "pangeo" + + class UrlValidatorMixin: """A mixin to validate classes that are of the same type across several models. @@ -227,6 +233,7 @@ class ReviewModel(BaseModel): populate_by_name=True, str_strip_whitespace=True, validate_assignment=True, + use_enum_values=True, ) package_name: str | None = "" @@ -252,7 +259,7 @@ class ReviewModel(BaseModel): closed_at: Optional[datetime] = None issue_link: str = None joss: Optional[str] = None - partners: Optional[list[str]] = None + partners: Optional[list[Partnerships]] = None gh_meta: Optional[GhMeta] = None @field_validator( diff --git a/src/pyosmeta/parse_issues.py b/src/pyosmeta/parse_issues.py index 65cef1c..46de704 100644 --- a/src/pyosmeta/parse_issues.py +++ b/src/pyosmeta/parse_issues.py @@ -13,6 +13,17 @@ from .utils_clean import clean_date_accepted_key from .utils_parse import parse_user_names +KEYED_STRING = re.compile(r"\s*(?P\S*?)\s*:\s*(?P.*)\s*") +""" +Parse a key-value string into keys and values. + +Examples: + + >>> text = 'Astropy: Link coming soon to standards' + >>> KEYED_STRING.search(text).groupdict() + {'key': 'Astropy', 'value': 'Link coming soon to standards'} +""" + @dataclass class ProcessIssues: @@ -190,7 +201,7 @@ def _postprocess_meta(self, meta: dict, body: List[str]) -> dict: # this could be made more flexible if it just runs until it runs # out of categories to parse meta["partners"] = self.get_categories( - body, "## Community Partnerships", 3 + body, "## Community Partnerships", 3, keyed=True ) return meta @@ -422,7 +433,11 @@ def process_repo_meta(self, url: str) -> dict[str, Any]: # This works - i could just make it more generic and remove fmt since it's # not used and replace it with a number of values and a test string def get_categories( - self, issue_list: list[str], section_str: str, num_vals: int + self, + issue_list: list[str], + section_str: str, + num_vals: int, + keyed: bool = False, ) -> list[str] | None: """Parse through a pyOS review issue and grab categories associated with a package @@ -440,6 +455,12 @@ def get_categories( num_vals : int Number of categories expected in the list. for instance 3 partner options. + + keyed : bool + If True, treat the category value as a key-value pair separated by a colon + (and just extract the key). + + eg. ``- [x] Astropy: some other text`` would be parsed as ``'astropy'`` """ # Find the starting index of the category section index = [ @@ -473,4 +494,12 @@ def get_categories( categories = [ re.sub(r"(\w+) (\w+)", r"\1-\2", item) for item in cleaned ] - return [item.lower().replace("[^1]", "") for item in categories] + categories = [item.lower().replace("[^1]", "") for item in categories] + if keyed: + categories = [ + KEYED_STRING.search(c).groupdict().get("key") + for c in categories + if KEYED_STRING.search(c) is not None + ] + + return categories diff --git a/tests/data/reviews/partnership_astropy.txt b/tests/data/reviews/partnership_astropy.txt new file mode 100644 index 0000000..3c9a475 --- /dev/null +++ b/tests/data/reviews/partnership_astropy.txt @@ -0,0 +1,57 @@ +Submitting Author: Author Name (@username) +All current maintainers: (@username, @username2) +Package Name: PackageName +One-Line Description of Package: Package description +Repository Link: https://example.com/username/repository +Version submitted: v.0.8.5 +Editor: @editoruser +Reviewer 1: @reviewer1 +Reviewer 2: @reviewer2 +Archive: [![DOI](https://zenodo.org/badge/DOI/fakedoi/doi.svg)](https://doi.org/fakedoi/doi.svg) +JOSS DOI: [![DOI](https://joss.theoj.org/papers/fakedoi.svg)](https://joss.theoj.org/papers/fakedoi) +Version accepted: v.0.9.2 +Date accepted (month/day/year): 04/21/2024 + +--- + +## Code of Conduct & Commitment to Maintain Package + +- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted. +- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment]. + +## Description + +Description of package + +That spans multiple lines + +## Scope + +- Please indicate which category or categories. +Check out our [package scope page][PackageCategories] to learn more about our +scope. (If you are unsure of which category you fit, we suggest you make a pre-submission inquiry): + + - [ ] Data retrieval + - [ ] Data extraction + - [ ] Data processing/munging + - [ ] Data deposition + - [ ] Data validation and testing + - [ ] Data visualization[^1] + - [ ] Workflow automation + - [ ] Citation management and bibliometrics + - [x] Scientific software wrappers + - [ ] Database interoperability + +ZodiPy was already [proposed and reviewed as an Astropy Affiliated package](https://github.com/astropy/astropy.github.com/pull/495) before the recent partnership between Astropy and pyOpenSci in [APE22](https://github.com/astropy/astropy-APEs/blob/main/APE22.rst#in-a-nutshell), so I am resubmitting the proposal as is here. + +## Domain Specific + +- [ ] Geospatial +- [ ] Education + +## Community Partnerships +If your package is associated with an +existing community please check below: + +- [x] Astropy: Link coming soon to standards +- [ ] Pangeo: My package adheres to the [Pangeo standards listed in the pyOpenSci peer review guidebook][PangeoCollaboration] diff --git a/tests/unit/test_parse_categories.py b/tests/unit/test_parse_categories.py index 747e041..a560d62 100644 --- a/tests/unit/test_parse_categories.py +++ b/tests/unit/test_parse_categories.py @@ -96,3 +96,16 @@ def test_clean_categories( review = ReviewModel.clean_categories(categories=input_categories) assert review == expected_return + + +@pytest.mark.parametrize( + "partners,input_file", [(["astropy"], "reviews/partnership_astropy.txt")] +) +def test_parse_partnerships(partners, input_file, data_file, process_issues): + """ + The community partnership checkboxes should be correctly parsed into + a value in the :class:`.Partnerships` enum + """ + review = data_file(input_file, True) + review = process_issues.parse_issue(review) + assert review.partners == partners diff --git a/tests/unit/test_parse_issue_header_methods.py b/tests/unit/test_parse_issue_header_methods.py index 79b4d30..e2492c3 100644 --- a/tests/unit/test_parse_issue_header_methods.py +++ b/tests/unit/test_parse_issue_header_methods.py @@ -3,6 +3,9 @@ parses out (and cleans) pyOpenSci review metadata. """ +import pytest +from pyosmeta.parse_issues import KEYED_STRING + def test_issue_as_dict(process_issues, issue_list): """A method within the parse issue header that turns the @@ -14,3 +17,66 @@ def test_issue_as_dict(process_issues, issue_list): meta = process_issues._header_as_dict(header) assert meta["package_name"] == "sunpy" assert len(meta) == 13 + + +@pytest.mark.parametrize( + "text,expected", + [ + pytest.param( + "apple: banana", {"key": "apple", "value": "banana"}, id="base" + ), + pytest.param( + "Apple : Banana", + {"key": "Apple", "value": "Banana"}, + id="whitespace", + ), + pytest.param( + " Apple : Banana ", + {"key": "Apple", "value": "Banana "}, + id="whitespace-leading", + ), + pytest.param( + "Apple: Multiple words", + {"key": "Apple", "value": "Multiple words"}, + id="whitespace-value", + ), + pytest.param( + "Apple:banana:cherry", + {"key": "Apple", "value": "banana:cherry"}, + id="non-greedy-key", + ), + pytest.param( + "a line\nApple: banana cherry\nwatermelon", + {"key": "Apple", "value": "banana cherry"}, + id="multiline", + ), + pytest.param( + "multiword key: banana", + {"key": "key", "value": "banana"}, + id="multiword-key", + ), + pytest.param( + "multiword-key: banana", + {"key": "multiword-key", "value": "banana"}, + id="multiword-key-hyphenated", + ), + pytest.param( + "* bulleted: key", + {"key": "bulleted", "value": "key"}, + id="bulleted-key", + ), + ], +) +def test_keyed_string(text, expected): + """ + KEYED_STRING can parse a key: value pair from a string as regex results dict. + + This is super general - we want to get any key/value-ish pair whether it's right or wrong, + we don't want to try and squeeze all normalization and cleaning into a single re, so it + eg. doesn't strip trailing whitespace and detects mid-line keys: like that + """ + matched = KEYED_STRING.search(text).groupdict() + if expected: + assert matched == expected + else: + assert matched is None