Skip to content

Commit

Permalink
Fix: make parse comment more readable
Browse files Browse the repository at this point in the history
  • Loading branch information
lwasser committed Feb 29, 2024
1 parent ab0df02 commit 9fc98f7
Showing 1 changed file with 124 additions and 57 deletions.
181 changes: 124 additions & 57 deletions src/pyosmeta/parse_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def parse_user_names(username: str) -> dict:
Returns
-------
: dict
dict
``{name: str, github_username: str}``
Notes
Expand Down Expand Up @@ -404,68 +404,76 @@ def return_response(self) -> list[dict[str, object]]:
response = self._get_response()
return response.json()

def _contains_keyword(self, string: str) -> bool:
def _is_review_role(self, string: str) -> bool:
"""
Returns true if starts with any of the 3 items below.
"""
return string.startswith(
("Submitting", "Editor", "Reviewer", "All current maintainers")
)

def _clean_name(self, a_str: str) -> str:
"""Helper to strip unwanted chars from text"""
def _remove_extra_chars(self, a_str: str) -> str:
"""Helper to strip unwanted characters from text"""

unwanted = ["(", ")", "@"]
for char in unwanted:
a_str = a_str.replace(char, "")

return a_str.strip()

def _get_line_meta(self, line_item: list[str]) -> dict[str, object]:
"""
Parameters
----------
line_item : list
A single list item representing a single line in the issue
containing metadata for the review.
This comment is metadata for the review that the author fills out.
Returns
-------
Dict
Containing the metadata for a submitting author, reviewer or
maintainer(s)
"""

meta = {}
a_key = line_item[0].lower().replace(" ", "_")
if self._contains_keyword(line_item[0]):
if line_item[0].startswith("All current maintainers"):
names = line_item[1].split(",")
# There are at least 2 maintainers if there is a comma
# if len(names) > 1:
meta[a_key] = []
for aname in names:
# Add each maintainer to the dict
a_maint = parse_user_names(username=aname)
# filtered_list = list(filter(None, my_list))
meta[a_key].append(a_maint)
else:
names = parse_user_names(line_item[1])
meta[a_key] = names
elif len(line_item) > 1:
meta[a_key] = line_item[1].strip()
else:
meta[a_key] = self._clean_name(line_item[0])
return meta
# def _get_line_meta(self, line_item: list[str]) -> dict[str, object]:
# """Parse through a single line of a review and return cleaned metadata.

# This helper method processes each line of a review and cleans the data.
# If the line represents a review role (editor, maintainer, reviewer),
# then it's processed differently to return the github username and
# (optionally) the gh user's name. If the line represents another part
# of the review such as the package description it is more easily parsed.

# Parameters
# ----------
# line_item : list
# A single list item representing a single line in the issue
# containing metadata for the review.
# This comment is metadata for the review that the author fills out.

# Returns
# -------
# Dict
# Containing the metadata for a submitting author, reviewer or
# maintainer(s)
# """
# # TODO: would it be easier to read if this code was in the loop and
# # broken out into helpers there?
# meta = {}
# a_key = line_item[0].lower().replace(" ", "_")
# # If the line is for a review role - editor, maintainer, reviewer
# if self._is_review_role(line_item[0]):
# # Parse comma separated names for maintainer list
# if line_item[0].startswith("All current maintainers"):
# names = line_item[1].split(",")
# meta[a_key] = []
# for name in names:
# # Add each maintainer to the dict
# a_maint = parse_user_names(username=name)
# meta[a_key].append(a_maint)
# # Parse other review roles; these have one name per line
# else:
# names = parse_user_names(line_item[1])
# meta[a_key] = names
# elif len(line_item) > 1:
# meta[a_key] = line_item[1].strip()
# else:
# meta[a_key] = self._remove_extra_chars(line_item[0])
# return meta

def parse_issue_header(
self, issues: list[str], total_lines: int = 20
) -> dict[str, str]:
"""Parses through all headers comments of selected reviews and returns
metadata
"""Parses through each header comment for selected reviews and returns
review metadata.
This will go through all reviews and return:
Returns:
GitHub Issue meta: "created_at", "updated_at", "closed_at"
Parameters
Expand All @@ -490,8 +498,8 @@ def parse_issue_header(
review = {}
review_final = {}
for issue in issues:
# Return issue comment as a cleaned list + package name
pkg_name, body_data = self.parse_comment(issue)
# Return issue comment as cleaned list + package name
pkg_name, body_data = self.comment_to_list(issue)

Check warning on line 502 in src/pyosmeta/parse_issues.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/parse_issues.py#L502

Added line #L502 was not covered by tests
if not pkg_name:
continue

Expand Down Expand Up @@ -556,15 +564,47 @@ def parse_issue_header(

return review_final

def get_contributor_data(self, line: list[str]) -> dict[str, str | int]:
"""Parse names for various review roles from issue metadata.
Parameters
----------
line : list of str
A single list item representing a single line in the issue
containing metadata for the review.
Returns
-------
dict
Containing the metadata for a submitting author, reviewer, or
maintainer(s).
"""

meta = {}
a_key = line[0].lower().replace(" ", "_")

Check warning on line 584 in src/pyosmeta/parse_issues.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/parse_issues.py#L583-L584

Added lines #L583 - L584 were not covered by tests

if line[0].startswith("All current maintainers"):
names = line[1].split(",")
meta[a_key] = []

Check warning on line 588 in src/pyosmeta/parse_issues.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/parse_issues.py#L587-L588

Added lines #L587 - L588 were not covered by tests
for name in names:
# Add each maintainer to the dict
a_maint = parse_user_names(username=name)
meta[a_key].append(a_maint)

Check warning on line 592 in src/pyosmeta/parse_issues.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/parse_issues.py#L591-L592

Added lines #L591 - L592 were not covered by tests
else:
names = parse_user_names(line[1])
meta[a_key] = names

Check warning on line 595 in src/pyosmeta/parse_issues.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/parse_issues.py#L594-L595

Added lines #L594 - L595 were not covered by tests

return meta

Check warning on line 597 in src/pyosmeta/parse_issues.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/parse_issues.py#L597

Added line #L597 was not covered by tests

def get_issue_meta(
self,
body_data: list[str],
end_range: int,
) -> dict[str, str]:
"""
"""Process a single review returning metadata for that review.
Parse through a list of strings, each of which represents a line in the
first comment of a review.
grab the metadata for the review.
first comment of a review. Return the cleaned review metadata.
Parameters
----------
Expand All @@ -580,8 +620,36 @@ def get_issue_meta(
dict
"""
issue_meta = {}
for item in body_data[0:end_range]:
issue_meta.update(self._get_line_meta(item))
# TODO: change to for line in review_comment
for single_line in body_data[0:end_range]:
# TODO - i think this will be easier to read if the code to parse
# each line is here rather than another redirect.
# Fix: this method self.get_line_meta is what i'm removing
# issue_meta.update(self._get_line_meta(item))

meta = {}
a_key = single_line[0].lower().replace(" ", "_")

Check warning on line 631 in src/pyosmeta/parse_issues.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/parse_issues.py#L630-L631

Added lines #L630 - L631 were not covered by tests
# If the line is for a review role - editor, maintainer, reviewer
if self._is_review_role(single_line[0]):
meta = self.get_contributor_data(single_line)

Check warning on line 634 in src/pyosmeta/parse_issues.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/parse_issues.py#L634

Added line #L634 was not covered by tests
# # Parse comma separated names for maintainer list
# if single_line[0].startswith("All current maintainers"):
# names = single_line[1].split(",")
# meta[a_key] = []
# for name in names:
# # Add each maintainer to the dict
# a_maint = parse_user_names(username=name)
# meta[a_key].append(a_maint)
# # Parse other review roles; these have one name per line
# else:
# names = parse_user_names(single_line[1])
# meta[a_key] = names
elif len(single_line) > 1:
meta[a_key] = single_line[1].strip()

Check warning on line 648 in src/pyosmeta/parse_issues.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/parse_issues.py#L648

Added line #L648 was not covered by tests
else:
meta[a_key] = self._remove_extra_chars(single_line[0])

Check warning on line 650 in src/pyosmeta/parse_issues.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/parse_issues.py#L650

Added line #L650 was not covered by tests

issue_meta.update(meta)

Check warning on line 652 in src/pyosmeta/parse_issues.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/parse_issues.py#L652

Added line #L652 was not covered by tests

return issue_meta

Expand Down Expand Up @@ -616,10 +684,9 @@ def get_repo_endpoints(
)
return all_repos

def parse_comment(self, issue: dict[str, str]) -> tuple[str, list[str]]:
"""
Parses the first comment in an issue comment for pyOpenSci review.
This is where the review metadata are stored.
def comment_to_list(self, issue: dict[str, str]) -> tuple[str, list[str]]:
"""Parses the first comment in a pyOpenSci review issue.
Returns the package name
and the body of the comment parsed into a list of elements.
Expand Down Expand Up @@ -775,7 +842,7 @@ def get_categories(
----------
issue_list : list[list[str]]
The first comment from the issue split into lines and then the
lines split as by self.parse_comment()
lines split as by self.comment_to_list()
section_str : str
The section string to find where the categories live in the review
Expand All @@ -802,7 +869,7 @@ def get_categories(
cat_index = i
break
except StopIteration:
print(section_str, " not found in the list.")
print(section_str, "not found in the list.")
return None

# Get checked categories for package
Expand Down

0 comments on commit 9fc98f7

Please sign in to comment.