Skip to content

Commit

Permalink
Merge pull request #101 from seankim658/v0.4.0
Browse files Browse the repository at this point in the history
Update for predicted possessions and new date matching
  • Loading branch information
j-andrews7 authored Nov 4, 2024
2 parents 922dec4 + dba64a0 commit b7d7eab
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 15 deletions.
32 changes: 17 additions & 15 deletions kenpompy/FanMatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
from io import StringIO
import re
from datetime import datetime
from cloudscraper import CloudScraper
from bs4 import BeautifulSoup
from typing import Optional
Expand Down Expand Up @@ -57,13 +58,15 @@ def __init__(self, browser: CloudScraper, date: Optional[str]=None):
fm = BeautifulSoup(get_html(browser, self.url), "html.parser")
if "Sorry, no games today." in fm.text:
return
time_header = fm.find('th', string=re.compile(r"Time \(ET\)"))
if time_header and time_header.find("a"):
href = time_header.find("a")["href"]
date_match = re.search(r'd=(\d{4}-\d{2}-\d{2})', href)
if date is not None:
date_text = fm.find("div", class_="lh12").get_text()
date_match = re.search(r"for \w+, (\w+ \d{1,2}[a-z]{2})", date_text)
if date_match:
found_date = date_match.group(1)
if found_date != date:
extracted_date_str = re.sub(r"(st|nd|rd|th)", "", date_match.group(1))
extracted_date = datetime.strptime(extracted_date_str, "%B %d")
extracted_mmdd = extracted_date.strftime("%m-%d")
user_mmdd = datetime.strptime(date, "%Y-%m-%d").strftime("%m-%d")
if extracted_mmdd != user_mmdd:
return
table = fm.find_all("table")[0]
fm_df = pd.read_html(StringIO(str(table)))
Expand Down Expand Up @@ -127,13 +130,11 @@ def __init__(self, browser: CloudScraper, date: Optional[str]=None):
pos = fm_df.Game.str.split(r" \[").str[1]
fm_df["Game"], fm_df["Possessions"] = fm_df.Game.str.split(r" \[").str[0], pos.astype("str")
fm_df.Possessions = fm_df.Possessions.str.rstrip(r"\] ")
predict_info = fm_df.Prediction.str.split()
pred_winner = fm_df.Prediction.astype("str").str.split().str[0:-2].tolist()
pred_winner = [" ".join(i) if not any(pd.isnull(i)) else float("nan") for i in pred_winner]
fm_df["PredictedWinner"] = pred_winner
fm_df["PredictedScore"] = fm_df.Prediction.str.split().str[-2]
fm_df["WinProbability"] = fm_df.Prediction.str.split().str[-1]
fm_df.WinProbability = fm_df.WinProbability.str.strip("()")
fm_df["PredictedWinner"] = fm_df["Prediction"].str.extract(r"^(.+?) \d+-\d+")[0]
fm_df["PredictedScore"] = fm_df["Prediction"].str.extract(r" (\d+-\d+)")[0]
fm_df["WinProbability"] = fm_df["Prediction"].str.extract(r"\((\d+%)\)")[0]
fm_df["PredictedPossessions"] = fm_df["Prediction"].str.extract(r"\[(\d+)\]")[0].astype(float)
fm_df["Possessions"] = fm_df["Possessions"].where(fm_df["Possessions"] != "", fm_df["PredictedPossessions"])

fm_df["PredictedMOV"] = [(int(x[0]) - int(x[1])) if len(x) > 1 else float("nan") for x
in fm_df.PredictedScore.astype("str").str.split("-")]
Expand All @@ -143,11 +144,12 @@ def __init__(self, browser: CloudScraper, date: Optional[str]=None):
# Parse predicted loser.
teams = fm_df.Game.str.split(", ").tolist()
teams_np = fm_df.Game.str.split(" at ").tolist()
pred_winner = fm_df["PredictedWinner"].tolist()

i = 0
pred_loser = []
for x in teams:
if not len(x) == 2:
for i, x in enumerate(teams):
if len(x) != 2:
x = teams_np[i]

# Account for neutral games.
Expand Down
1 change: 1 addition & 0 deletions tests/test_fanmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def test_fanmatch(browser):
"Marquette",
"73-72",
"51%",
"nan",
"1",
"Xavier",
"2OT",
Expand Down

0 comments on commit b7d7eab

Please sign in to comment.