Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix parsing of partnerships #187

Merged
merged 5 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/pyosmeta/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import re
from datetime import datetime
from enum import Enum
from typing import Any, Optional, Set, Union

import requests
Expand All @@ -20,6 +21,11 @@
from pyosmeta.utils_clean import clean_date, clean_markdown


class Partnerships(str, Enum):
astropy = "astropy"
pangeo = "pangeo"


class UrlValidatorMixin:
"""A mixin to validate classes that are of the same type across
several models.
Expand Down Expand Up @@ -227,6 +233,7 @@ class ReviewModel(BaseModel):
populate_by_name=True,
str_strip_whitespace=True,
validate_assignment=True,
use_enum_values=True,
)

package_name: str | None = ""
Expand All @@ -252,7 +259,7 @@ class ReviewModel(BaseModel):
closed_at: Optional[datetime] = None
issue_link: str = None
joss: Optional[str] = None
partners: Optional[list[str]] = None
partners: Optional[list[Partnerships]] = None
gh_meta: Optional[GhMeta] = None

@field_validator(
Expand Down
35 changes: 32 additions & 3 deletions src/pyosmeta/parse_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@
from .utils_clean import clean_date_accepted_key
from .utils_parse import parse_user_names

KEYED_STRING = re.compile(r"\s*(?P<key>\S*?)\s*:\s*(?P<value>.*)\s*")
"""
Parse a key-value string into keys and values.

Examples:

>>> text = 'Astropy: Link coming soon to standards'
>>> KEYED_STRING.search(text).groupdict()
{'key': 'Astropy', 'value': 'Link coming soon to standards'}
"""


@dataclass
class ProcessIssues:
Expand Down Expand Up @@ -190,7 +201,7 @@ def _postprocess_meta(self, meta: dict, body: List[str]) -> dict:
# this could be made more flexible if it just runs until it runs
# out of categories to parse
meta["partners"] = self.get_categories(
body, "## Community Partnerships", 3
body, "## Community Partnerships", 3, keyed=True
)

return meta
Expand Down Expand Up @@ -422,7 +433,11 @@ def process_repo_meta(self, url: str) -> dict[str, Any]:
# This works - i could just make it more generic and remove fmt since it's
# not used and replace it with a number of values and a test string
def get_categories(
self, issue_list: list[str], section_str: str, num_vals: int
self,
issue_list: list[str],
section_str: str,
num_vals: int,
keyed: bool = False,
) -> list[str] | None:
"""Parse through a pyOS review issue and grab categories associated
with a package
Expand All @@ -440,6 +455,12 @@ def get_categories(
num_vals : int
Number of categories expected in the list. for instance
3 partner options.

keyed : bool
If True, treat the category value as a key-value pair separated by a colon
(and just extract the key).

eg. ``- [x] Astropy: some other text`` would be parsed as ``'astropy'``
"""
# Find the starting index of the category section
index = [
Expand Down Expand Up @@ -473,4 +494,12 @@ def get_categories(
categories = [
re.sub(r"(\w+) (\w+)", r"\1-\2", item) for item in cleaned
]
return [item.lower().replace("[^1]", "") for item in categories]
categories = [item.lower().replace("[^1]", "") for item in categories]
if keyed:
categories = [
KEYED_STRING.search(c).groupdict().get("key")
for c in categories
if KEYED_STRING.search(c) is not None
]

return categories
57 changes: 57 additions & 0 deletions tests/data/reviews/partnership_astropy.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
Submitting Author: Author Name (@username)
All current maintainers: (@username, @username2)
Package Name: PackageName
One-Line Description of Package: Package description
Repository Link: https://example.com/username/repository
Version submitted: v.0.8.5
Editor: @editoruser
Reviewer 1: @reviewer1
Reviewer 2: @reviewer2
Archive: [![DOI](https://zenodo.org/badge/DOI/fakedoi/doi.svg)](https://doi.org/fakedoi/doi.svg)
JOSS DOI: [![DOI](https://joss.theoj.org/papers/fakedoi.svg)](https://joss.theoj.org/papers/fakedoi)
Version accepted: v.0.9.2
Date accepted (month/day/year): 04/21/2024

---

## Code of Conduct & Commitment to Maintain Package

- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted.
- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment].

## Description

Description of package

That spans multiple lines

## Scope

- Please indicate which category or categories.
Check out our [package scope page][PackageCategories] to learn more about our
scope. (If you are unsure of which category you fit, we suggest you make a pre-submission inquiry):

- [ ] Data retrieval
- [ ] Data extraction
- [ ] Data processing/munging
- [ ] Data deposition
- [ ] Data validation and testing
- [ ] Data visualization[^1]
- [ ] Workflow automation
- [ ] Citation management and bibliometrics
- [x] Scientific software wrappers
- [ ] Database interoperability

ZodiPy was already [proposed and reviewed as an Astropy Affiliated package](https://github.com/astropy/astropy.github.com/pull/495) before the recent partnership between Astropy and pyOpenSci in [APE22](https://github.com/astropy/astropy-APEs/blob/main/APE22.rst#in-a-nutshell), so I am resubmitting the proposal as is here.

## Domain Specific

- [ ] Geospatial
- [ ] Education

## Community Partnerships
If your package is associated with an
existing community please check below:

- [x] Astropy: Link coming soon to standards
- [ ] Pangeo: My package adheres to the [Pangeo standards listed in the pyOpenSci peer review guidebook][PangeoCollaboration]
13 changes: 13 additions & 0 deletions tests/unit/test_parse_categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,16 @@ def test_clean_categories(

review = ReviewModel.clean_categories(categories=input_categories)
assert review == expected_return


@pytest.mark.parametrize(
"partners,input_file", [(["astropy"], "reviews/partnership_astropy.txt")]
)
def test_parse_partnerships(partners, input_file, data_file, process_issues):
"""
The community partnership checkboxes should be correctly parsed into
a value in the :class:`.Partnerships` enum
"""
review = data_file(input_file, True)
review = process_issues.parse_issue(review)
assert review.partners == partners
66 changes: 66 additions & 0 deletions tests/unit/test_parse_issue_header_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
parses out (and cleans) pyOpenSci review metadata.
"""

import pytest
from pyosmeta.parse_issues import KEYED_STRING


def test_issue_as_dict(process_issues, issue_list):
"""A method within the parse issue header that turns the
Expand All @@ -14,3 +17,66 @@ def test_issue_as_dict(process_issues, issue_list):
meta = process_issues._header_as_dict(header)
assert meta["package_name"] == "sunpy"
assert len(meta) == 13


@pytest.mark.parametrize(
"text,expected",
[
pytest.param(
"apple: banana", {"key": "apple", "value": "banana"}, id="base"
),
pytest.param(
"Apple : Banana",
{"key": "Apple", "value": "Banana"},
id="whitespace",
),
pytest.param(
" Apple : Banana ",
{"key": "Apple", "value": "Banana "},
id="whitespace-leading",
),
pytest.param(
"Apple: Multiple words",
{"key": "Apple", "value": "Multiple words"},
id="whitespace-value",
),
pytest.param(
"Apple:banana:cherry",
{"key": "Apple", "value": "banana:cherry"},
id="non-greedy-key",
),
pytest.param(
"a line\nApple: banana cherry\nwatermelon",
{"key": "Apple", "value": "banana cherry"},
id="multiline",
),
pytest.param(
"multiword key: banana",
{"key": "key", "value": "banana"},
id="multiword-key",
),
pytest.param(
"multiword-key: banana",
{"key": "multiword-key", "value": "banana"},
id="multiword-key-hyphenated",
),
pytest.param(
"* bulleted: key",
{"key": "bulleted", "value": "key"},
id="bulleted-key",
),
],
)
def test_keyed_string(text, expected):
"""
KEYED_STRING can parse a key: value pair from a string as regex results dict.

This is super general - we want to get any key/value-ish pair whether it's right or wrong,
we don't want to try and squeeze all normalization and cleaning into a single re, so it
eg. doesn't strip trailing whitespace and detects mid-line keys: like that
"""
matched = KEYED_STRING.search(text).groupdict()
if expected:
assert matched == expected
else:
assert matched is None
Loading