pyOpenSci · lwasser · Jul 12, 2024 · Jul 11, 2024 · Jul 12, 2024 · Jul 12, 2024
diff --git a/src/pyosmeta/models/base.py b/src/pyosmeta/models/base.py
@@ -5,6 +5,7 @@
 
 import re
 from datetime import datetime
+from enum import Enum
 from typing import Any, Optional, Set, Union
 
 import requests
@@ -20,6 +21,11 @@
 from pyosmeta.utils_clean import clean_date, clean_markdown
 
 
+class Partnerships(str, Enum):
+    astropy = "astropy"
+    pangeo = "pangeo"
+
+
 class UrlValidatorMixin:
     """A mixin to validate classes that are of the same type across
     several models.
@@ -227,6 +233,7 @@ class ReviewModel(BaseModel):
         populate_by_name=True,
         str_strip_whitespace=True,
         validate_assignment=True,
+        use_enum_values=True,
     )
 
     package_name: str | None = ""
@@ -252,7 +259,7 @@ class ReviewModel(BaseModel):
     closed_at: Optional[datetime] = None
     issue_link: str = None
     joss: Optional[str] = None
-    partners: Optional[list[str]] = None
+    partners: Optional[list[Partnerships]] = None
     gh_meta: Optional[GhMeta] = None
 
     @field_validator(

diff --git a/src/pyosmeta/parse_issues.py b/src/pyosmeta/parse_issues.py
@@ -13,6 +13,17 @@
 from .utils_clean import clean_date_accepted_key
 from .utils_parse import parse_user_names
 
+KEYED_STRING = re.compile(r"\s*(?P<key>\S*?)\s*:\s*(?P<value>.*)\s*")
+"""
+Parse a key-value string into keys and values.
+
+Examples:
+
+    >>> text = 'Astropy: Link coming soon to standards'
+    >>> KEYED_STRING.search(text).groupdict()
+    {'key': 'Astropy', 'value': 'Link coming soon to standards'}
+"""
+
 
 @dataclass
 class ProcessIssues:
@@ -190,7 +201,7 @@ def _postprocess_meta(self, meta: dict, body: List[str]) -> dict:
         # this could be made more flexible if it just runs until it runs
         # out of categories to parse
         meta["partners"] = self.get_categories(
-            body, "## Community Partnerships", 3
+            body, "## Community Partnerships", 3, keyed=True
         )
 
         return meta
@@ -422,7 +433,11 @@ def process_repo_meta(self, url: str) -> dict[str, Any]:
     # This works - i could just make it more generic and remove fmt since it's
     # not used and replace it with a number of values and a test string
     def get_categories(
-        self, issue_list: list[str], section_str: str, num_vals: int
+        self,
+        issue_list: list[str],
+        section_str: str,
+        num_vals: int,
+        keyed: bool = False,
     ) -> list[str] | None:
         """Parse through a pyOS review issue and grab categories associated
         with a package
@@ -440,6 +455,12 @@ def get_categories(
         num_vals : int
             Number of categories expected in the list. for instance
             3 partner options.
+
+        keyed : bool
+            If True, treat the category value as a key-value pair separated by a colon
+            (and just extract the key).
+
+            eg. ``- [x] Astropy: some other text`` would be parsed as ``'astropy'``
         """
         # Find the starting index of the category section
         index = [
@@ -473,4 +494,12 @@ def get_categories(
         categories = [
             re.sub(r"(\w+) (\w+)", r"\1-\2", item) for item in cleaned
         ]
-        return [item.lower().replace("[^1]", "") for item in categories]
+        categories = [item.lower().replace("[^1]", "") for item in categories]
+        if keyed:
+            categories = [
+                KEYED_STRING.search(c).groupdict().get("key")
+                for c in categories
+                if KEYED_STRING.search(c) is not None
+            ]
+
+        return categories
diff --git a/tests/data/reviews/partnership_astropy.txt b/tests/data/reviews/partnership_astropy.txt
@@ -0,0 +1,57 @@
+Submitting Author: Author Name (@username)
+All current maintainers: (@username, @username2)
+Package Name: PackageName
+One-Line Description of Package: Package description
+Repository Link:  https://example.com/username/repository
+Version submitted:  v.0.8.5
+Editor: @editoruser
+Reviewer 1: @reviewer1
+Reviewer 2: @reviewer2
+Archive: [![DOI](https://zenodo.org/badge/DOI/fakedoi/doi.svg)](https://doi.org/fakedoi/doi.svg)
+JOSS DOI: [![DOI](https://joss.theoj.org/papers/fakedoi.svg)](https://joss.theoj.org/papers/fakedoi)
+Version accepted: v.0.9.2
+Date accepted (month/day/year): 04/21/2024
+
+---
+
+## Code of Conduct & Commitment to Maintain Package
+
+- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted.
+- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment].
+
+## Description
+
+Description of package
+
+That spans multiple lines
+
+## Scope
+
+- Please indicate which category or categories.
+Check out our [package scope page][PackageCategories] to learn more about our
+scope. (If you are unsure of which category you fit, we suggest you make a pre-submission inquiry):
+
+	- [ ] Data retrieval
+	- [ ] Data extraction
+	- [ ] Data processing/munging
+	- [ ] Data deposition
+	- [ ] Data validation and testing
+	- [ ] Data visualization[^1]
+	- [ ] Workflow automation
+	- [ ] Citation management and bibliometrics
+	- [x] Scientific software wrappers
+	- [ ] Database interoperability
+
+ZodiPy was already [proposed and reviewed as an Astropy Affiliated package](https://github.com/astropy/astropy.github.com/pull/495) before the recent partnership between Astropy and pyOpenSci in [APE22](https://github.com/astropy/astropy-APEs/blob/main/APE22.rst#in-a-nutshell), so I am resubmitting the proposal as is here.
+
+## Domain Specific
+
+- [ ] Geospatial
+- [ ] Education
+
+## Community Partnerships
+If your package is associated with an
+existing community please check below:
+
+- [x] Astropy: Link coming soon to standards
+- [ ] Pangeo: My package adheres to the [Pangeo standards listed in the pyOpenSci peer review guidebook][PangeoCollaboration]
diff --git a/tests/unit/test_parse_categories.py b/tests/unit/test_parse_categories.py
@@ -96,3 +96,16 @@ def test_clean_categories(
 
     review = ReviewModel.clean_categories(categories=input_categories)
     assert review == expected_return
+
+
+@pytest.mark.parametrize(
+    "partners,input_file", [(["astropy"], "reviews/partnership_astropy.txt")]
+)
+def test_parse_partnerships(partners, input_file, data_file, process_issues):
+    """
+    The community partnership checkboxes should be correctly parsed into
+    a value in the :class:`.Partnerships` enum
+    """
+    review = data_file(input_file, True)
+    review = process_issues.parse_issue(review)
+    assert review.partners == partners
diff --git a/tests/unit/test_parse_issue_header_methods.py b/tests/unit/test_parse_issue_header_methods.py
@@ -3,6 +3,9 @@
 parses out (and cleans) pyOpenSci review metadata.
 """
 
+import pytest
+from pyosmeta.parse_issues import KEYED_STRING
+
 
 def test_issue_as_dict(process_issues, issue_list):
     """A method within the parse issue header that turns the
@@ -14,3 +17,66 @@ def test_issue_as_dict(process_issues, issue_list):
     meta = process_issues._header_as_dict(header)
     assert meta["package_name"] == "sunpy"
     assert len(meta) == 13
+
+
+@pytest.mark.parametrize(
+    "text,expected",
+    [
+        pytest.param(
+            "apple: banana", {"key": "apple", "value": "banana"}, id="base"
+        ),
+        pytest.param(
+            "Apple :  Banana",
+            {"key": "Apple", "value": "Banana"},
+            id="whitespace",
+        ),
+        pytest.param(
+            " Apple : Banana ",
+            {"key": "Apple", "value": "Banana "},
+            id="whitespace-leading",
+        ),
+        pytest.param(
+            "Apple: Multiple words",
+            {"key": "Apple", "value": "Multiple words"},
+            id="whitespace-value",
+        ),
+        pytest.param(
+            "Apple:banana:cherry",
+            {"key": "Apple", "value": "banana:cherry"},
+            id="non-greedy-key",
+        ),
+        pytest.param(
+            "a line\nApple: banana cherry\nwatermelon",
+            {"key": "Apple", "value": "banana cherry"},
+            id="multiline",
+        ),
+        pytest.param(
+            "multiword key: banana",
+            {"key": "key", "value": "banana"},
+            id="multiword-key",
+        ),
+        pytest.param(
+            "multiword-key: banana",
+            {"key": "multiword-key", "value": "banana"},
+            id="multiword-key-hyphenated",
+        ),
+        pytest.param(
+            "* bulleted: key",
+            {"key": "bulleted", "value": "key"},
+            id="bulleted-key",
+        ),
+    ],
+)
+def test_keyed_string(text, expected):
+    """
+    KEYED_STRING can parse a key: value pair from a string as regex results dict.
+
+    This is super general - we want to get any key/value-ish pair whether it's right or wrong,
+    we don't want to try and squeeze all normalization and cleaning into a single re, so it
+    eg. doesn't strip trailing whitespace and detects mid-line keys: like that
+    """
+    matched = KEYED_STRING.search(text).groupdict()
+    if expected:
+        assert matched == expected
+    else:
+        assert matched is None