From 1231ed3b735789ae1a4f41ad4b5cba2e3003bdb4 Mon Sep 17 00:00:00 2001 From: Sean Kim <33474168+seankim658@users.noreply.github.com> Date: Mon, 4 Nov 2024 13:20:58 -0500 Subject: [PATCH 1/3] Refactor to parse title date --- kenpompy/FanMatch.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kenpompy/FanMatch.py b/kenpompy/FanMatch.py index 799fdbb..16572ef 100644 --- a/kenpompy/FanMatch.py +++ b/kenpompy/FanMatch.py @@ -5,6 +5,7 @@ import pandas as pd from io import StringIO import re +from datetime import datetime from cloudscraper import CloudScraper from bs4 import BeautifulSoup from typing import Optional @@ -57,13 +58,15 @@ def __init__(self, browser: CloudScraper, date: Optional[str]=None): fm = BeautifulSoup(get_html(browser, self.url), "html.parser") if "Sorry, no games today." in fm.text: return - time_header = fm.find('th', string=re.compile(r"Time \(ET\)")) - if time_header and time_header.find("a"): - href = time_header.find("a")["href"] - date_match = re.search(r'd=(\d{4}-\d{2}-\d{2})', href) + if date is not None: + date_text = fm.find("div", class_="lh12").get_text() + date_match = re.search(r"for \w+, (\w+ \d{1,2}[a-z]{2})", date_text) if date_match: - found_date = date_match.group(1) - if found_date != date: + extracted_date_str = re.sub(r"(st|nd|rd|th)", "", date_match.group(1)) + extracted_date = datetime.strptime(extracted_date_str, "%B %d") + extracted_mmdd = extracted_date.strftime("%m-%d") + user_mmdd = datetime.strptime(date, "%Y-%m-%d").strftime("%m-%d") + if extracted_mmdd != user_mmdd: return table = fm.find_all("table")[0] fm_df = pd.read_html(StringIO(str(table))) From a2e1751f06fa9cc1c20d18aa30b644a5ee135543 Mon Sep 17 00:00:00 2001 From: Sean Kim <33474168+seankim658@users.noreply.github.com> Date: Mon, 4 Nov 2024 14:03:33 -0500 Subject: [PATCH 2/3] Handle predicted posessions --- kenpompy/FanMatch.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/kenpompy/FanMatch.py b/kenpompy/FanMatch.py index 16572ef..553edba 100644 --- a/kenpompy/FanMatch.py +++ b/kenpompy/FanMatch.py @@ -130,13 +130,11 @@ def __init__(self, browser: CloudScraper, date: Optional[str]=None): pos = fm_df.Game.str.split(r" \[").str[1] fm_df["Game"], fm_df["Possessions"] = fm_df.Game.str.split(r" \[").str[0], pos.astype("str") fm_df.Possessions = fm_df.Possessions.str.rstrip(r"\] ") - predict_info = fm_df.Prediction.str.split() - pred_winner = fm_df.Prediction.astype("str").str.split().str[0:-2].tolist() - pred_winner = [" ".join(i) if not any(pd.isnull(i)) else float("nan") for i in pred_winner] - fm_df["PredictedWinner"] = pred_winner - fm_df["PredictedScore"] = fm_df.Prediction.str.split().str[-2] - fm_df["WinProbability"] = fm_df.Prediction.str.split().str[-1] - fm_df.WinProbability = fm_df.WinProbability.str.strip("()") + fm_df["PredictedWinner"] = fm_df["Prediction"].str.extract(r"^(.+?) \d+-\d+")[0] + fm_df["PredictedScore"] = fm_df["Prediction"].str.extract(r" (\d+-\d+)")[0] + fm_df["WinProbability"] = fm_df["Prediction"].str.extract(r"\((\d+%)\)")[0] + fm_df["PredictedPossessions"] = fm_df["Prediction"].str.extract(r"\[(\d+)\]")[0].astype(float) + fm_df["Possessions"] = fm_df["Possessions"].where(fm_df["Possessions"] != "", fm_df["PredictedPossessions"]) fm_df["PredictedMOV"] = [(int(x[0]) - int(x[1])) if len(x) > 1 else float("nan") for x in fm_df.PredictedScore.astype("str").str.split("-")] @@ -146,11 +144,12 @@ def __init__(self, browser: CloudScraper, date: Optional[str]=None): # Parse predicted loser. teams = fm_df.Game.str.split(", ").tolist() teams_np = fm_df.Game.str.split(" at ").tolist() + pred_winner = fm_df["PredictedWinner"].tolist() i = 0 pred_loser = [] - for x in teams: - if not len(x) == 2: + for i, x in enumerate(teams): + if len(x) != 2: x = teams_np[i] # Account for neutral games. From dba64a01f864865bb66ec538625403648e50333b Mon Sep 17 00:00:00 2001 From: Sean Kim <33474168+seankim658@users.noreply.github.com> Date: Mon, 4 Nov 2024 18:27:02 -0500 Subject: [PATCH 3/3] Update test for predicted possessions field --- tests/test_fanmatch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_fanmatch.py b/tests/test_fanmatch.py index fa1a885..9c693d5 100644 --- a/tests/test_fanmatch.py +++ b/tests/test_fanmatch.py @@ -17,6 +17,7 @@ def test_fanmatch(browser): "Marquette", "73-72", "51%", + "nan", "1", "Xavier", "2OT",