diff --git a/kenpompy/FanMatch.py b/kenpompy/FanMatch.py index 799fdbb..553edba 100644 --- a/kenpompy/FanMatch.py +++ b/kenpompy/FanMatch.py @@ -5,6 +5,7 @@ import pandas as pd from io import StringIO import re +from datetime import datetime from cloudscraper import CloudScraper from bs4 import BeautifulSoup from typing import Optional @@ -57,13 +58,15 @@ def __init__(self, browser: CloudScraper, date: Optional[str]=None): fm = BeautifulSoup(get_html(browser, self.url), "html.parser") if "Sorry, no games today." in fm.text: return - time_header = fm.find('th', string=re.compile(r"Time \(ET\)")) - if time_header and time_header.find("a"): - href = time_header.find("a")["href"] - date_match = re.search(r'd=(\d{4}-\d{2}-\d{2})', href) + if date is not None: + date_text = fm.find("div", class_="lh12").get_text() + date_match = re.search(r"for \w+, (\w+ \d{1,2}[a-z]{2})", date_text) if date_match: - found_date = date_match.group(1) - if found_date != date: + extracted_date_str = re.sub(r"(st|nd|rd|th)", "", date_match.group(1)) + extracted_date = datetime.strptime(extracted_date_str, "%B %d") + extracted_mmdd = extracted_date.strftime("%m-%d") + user_mmdd = datetime.strptime(date, "%Y-%m-%d").strftime("%m-%d") + if extracted_mmdd != user_mmdd: return table = fm.find_all("table")[0] fm_df = pd.read_html(StringIO(str(table))) @@ -127,13 +130,11 @@ def __init__(self, browser: CloudScraper, date: Optional[str]=None): pos = fm_df.Game.str.split(r" \[").str[1] fm_df["Game"], fm_df["Possessions"] = fm_df.Game.str.split(r" \[").str[0], pos.astype("str") fm_df.Possessions = fm_df.Possessions.str.rstrip(r"\] ") - predict_info = fm_df.Prediction.str.split() - pred_winner = fm_df.Prediction.astype("str").str.split().str[0:-2].tolist() - pred_winner = [" ".join(i) if not any(pd.isnull(i)) else float("nan") for i in pred_winner] - fm_df["PredictedWinner"] = pred_winner - fm_df["PredictedScore"] = fm_df.Prediction.str.split().str[-2] - fm_df["WinProbability"] = fm_df.Prediction.str.split().str[-1] - fm_df.WinProbability = fm_df.WinProbability.str.strip("()") + fm_df["PredictedWinner"] = fm_df["Prediction"].str.extract(r"^(.+?) \d+-\d+")[0] + fm_df["PredictedScore"] = fm_df["Prediction"].str.extract(r" (\d+-\d+)")[0] + fm_df["WinProbability"] = fm_df["Prediction"].str.extract(r"\((\d+%)\)")[0] + fm_df["PredictedPossessions"] = fm_df["Prediction"].str.extract(r"\[(\d+)\]")[0].astype(float) + fm_df["Possessions"] = fm_df["Possessions"].where(fm_df["Possessions"] != "", fm_df["PredictedPossessions"]) fm_df["PredictedMOV"] = [(int(x[0]) - int(x[1])) if len(x) > 1 else float("nan") for x in fm_df.PredictedScore.astype("str").str.split("-")] @@ -143,11 +144,12 @@ def __init__(self, browser: CloudScraper, date: Optional[str]=None): # Parse predicted loser. teams = fm_df.Game.str.split(", ").tolist() teams_np = fm_df.Game.str.split(" at ").tolist() + pred_winner = fm_df["PredictedWinner"].tolist() i = 0 pred_loser = [] - for x in teams: - if not len(x) == 2: + for i, x in enumerate(teams): + if len(x) != 2: x = teams_np[i] # Account for neutral games. diff --git a/tests/test_fanmatch.py b/tests/test_fanmatch.py index fa1a885..9c693d5 100644 --- a/tests/test_fanmatch.py +++ b/tests/test_fanmatch.py @@ -17,6 +17,7 @@ def test_fanmatch(browser): "Marquette", "73-72", "51%", + "nan", "1", "Xavier", "2OT",